计算文件中的字符、单词和行数

Counting chars, words and lines in a file

我尝试计算文件中的字符数、单词数和行数。 txt 文件是:

The snail moves like a
Hovercraft, held up by a
Rubber cushion of itself,
Sharing its secret

这是代码,

void count_elements(FILE* fileptr, char* filename, struct fileProps* properties) // counts chars, words and lines 
{
    fileptr = fopen(filename, "rb"); 
    int chars = 0, words = 0, lines = 0; 
    char ch;
    while ((ch = fgetc(fileptr)) != EOF  )
    {
        if(ch != ' ') chars++;
        if (ch == '\n') // check lines 
            lines++;
        if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '[=11=]') // check words
            words++;
      
    
    }
    fclose(fileptr); 
    properties->char_count = chars;
    properties->line_count = lines; 
    properties->word_count = words;

}

但是当我打印字符数、单词数和行数时,输出分别为 81、18、5 我错过了什么? (阅读模式不会改变任何东西,我也试过“r”)

我提出的解决方案给出了与 gedit 文档统计信息相同的结果:

#include <stdio.h>

void count_elements(char* filename)
{
    // This can be a local variable as its not used externally. You do not have to put it into the functions signature.
    FILE *fileptr = fopen(filename, "rb"); 
    int chars = 0, words = 0, lines = 0; 
    int read;
    unsigned char last_char = ' '; // Save the last char to see if really a new word was there or multiple spaces
    while ((read = fgetc(fileptr)) != EOF) // Read is an int as fgetc returns an int, which is a unsigned char that got casted to int by the function (see manpage for fgetc)
    {
        unsigned char ch = (char)read; // This cast is safe, as it was already checked for EOF, so its an unsigned char.

        if (ch >= 33 && ch <= 126) // only do printable chars without spaces
        {
            ++chars;
        }
        else if (ch == '\n' || ch == '\t' || ch == '[=10=]' || ch == ' ')
        {
            // Only if the last character was printable we count it as new word
            if (last_char >= 33 && last_char <= 126)
            {
                ++words;
            }
            if (ch == '\n')
            {
                ++lines;
            }
        }
        last_char = ch;     
    }
    fclose(fileptr); 
    
    printf("Chars: %d\n", chars);
    printf("Lines: %d\n", lines);
    printf("Words: %d\n", words);

}

int main()
{
    count_elements("test");
}

注释和解释请看代码中的注释。该代码还会过滤掉任何其他特殊控制序列,例如 windows CRLF 并仅考虑 LF

您的函数将 FILE*filename 作为参数,其中一个应该被删除。我删除了 filename 以便该函数可以与任何 FILE* 一起使用,例如 stdin.

#include <ctype.h>
#include <stdint.h>
#include <stdio.h>

typedef struct { /* type defining the struct for easier usage */
    uintmax_t char_count;
    uintmax_t word_count;
    uintmax_t line_count;
} fileProps;

/* a helper function to print the content of a fileProps */
FILE* fileProps_print(FILE *fp, const fileProps *p) {
    fprintf(fp,
            "chars %ju\n"
            "words %ju\n"
            "lines %ju\n",
            p->char_count, p->word_count, p->line_count);
    return fp;
}

void count_elements(FILE *fileptr, fileProps *properties) {
    if(!fileptr) return;

    properties->char_count = 0;
    properties->line_count = 0;
    properties->word_count = 0;

    char ch;
    while((ch = fgetc(fileptr)) != EOF) {
        ++properties->char_count; /* count all characters */

        /* use isspace() to check for whitespace characters */
        if(isspace((unsigned char)ch)) {
            ++properties->word_count;      
            if(ch == '\n') ++properties->line_count;
        }
    }
}

int main() {
    fileProps p;

    FILE *fp = fopen("the_file.txt", "r");
    if(fp) {
        count_elements(fp, &p);
        fclose(fp);

        fileProps_print(stdout, &p);
    }
}

您在问题中显示的文件的输出:

chars 93
words 17
lines 4

编辑: 我刚刚注意到您的评论“试图仅将字母计算为 char”。为此,您可以使用 isalpha 并将 while 循环替换为:

    while((ch = fgetc(fileptr)) != EOF) {
        if(isalpha((unsigned char)ch)) ++properties->char_count;
        else if(isspace((unsigned char)ch)) {
            ++properties->word_count;
            if(ch == '\n') ++properties->line_count;
        }
    }

修改版本的输出:

chars 74
words 17
lines 4

能够读取“宽”字符(多字节)的版本:

#include <locale.h>
#include <stdint.h>
#include <stdio.h>
#include <wchar.h>
#include <wctype.h>

typedef struct {
    uintmax_t char_count;
    uintmax_t word_count;
    uintmax_t line_count;
} fileProps;

FILE* fileProps_print(FILE *fp, const fileProps *p) {
    fprintf(fp,
            "chars %ju\n"
            "words %ju\n"
            "lines %ju\n",
            p->char_count, p->word_count, p->line_count);
    return fp;
}

void count_elements(FILE *fileptr, fileProps *properties) {
    if(!fileptr) return;

    properties->char_count = 0;
    properties->line_count = 0;
    properties->word_count = 0;

    wint_t ch;
    while((ch = fgetwc(fileptr)) != WEOF) {
        if(iswalpha(ch)) ++properties->char_count;
        else if(iswspace(ch)) {
            ++properties->word_count;
            if(ch == '\n') ++properties->line_count;
        }
    }
}

int main() {
    setlocale(LC_ALL, "sv_SE.UTF-8");      // set your locale
    FILE *fp = fopen("the_file.txt", "r");
    if(fp) {
        fileProps p;
        count_elements(fp, &p);
        fclose(fp);
        fileProps_print(stdout, &p);
    }
}

如果 the_file.txt 包含一行 öäü 它将报告

chars 3
words 1
lines 1

对于您的原始文件,它的报告与上面相同。