C 中的唯一单词计数器

Question

我正在用 C 编写一个小程序，它应该计算 c 中的唯一单词。为此，我有一个单词本来存储所有找到的单词。 Normaly 它应该只把里面没有的单词放在里面，但它会不断输入所有书面单词。我该如何解决这个问题以及如何删除我的单词本中的所有空白部分 "woerterbuch"？

#include <stdio.h>
#include <stdlib.h>
#include <string.h>


char lies_wort(char *Text);
char suche_wort(char *wort);
char neues_wort(char *wort);
char *woerterbuch[1000];

int main(void)
{
    char Text[1000];
    printf("Bitte Text eingeben : \n") ;
    fgets (Text, 1000, stdin);
    lies_wort(Text);
    int i;
    for(i=0;i<1000;i++){
        printf("woerterbuch :%s\n",woerterbuch[i]);}
}
char lies_wort(char *Text){
    char *wort;
    int i=1;
    wort = strtok(Text, " ,.!?;:");
    while(wort != NULL) {
        suche_wort(wort);
        printf("gefunden %d: %s\n", i++, wort);
        wort = strtok(NULL, " ,.!?;:");}
}
char suche_wort(char *wort)
{
    int i;
    for (i = 0; i>1000; i++){
        if (!strcmp(woerterbuch[i],wort)){return 0;}}
    neues_wort(wort);
    return 0;
}
char neues_wort(char *wort)
{
    int i;
    for (i=0; i<1000; i++){
        if(woerterbuch[i]==0){
            woerterbuch[i]=wort;
            return 0;}}
}

为了测试这个程序只是打印 "woerterbuch" 中的所有单词所以我可以检查它是否工作。

Answer 1

在suche_wort

for (i = 0; i>1000; i++)

应该是

for (i = 0; i<1000; i++)

你的循环每次都会立即终止。

Answer 2

我认为您的代码中存在一些问题：

首先，在这一行：

woerterbuch[i]=wort;

只会覆盖woerterbuch[i]的地址，这会导致错误的结果。相反，您需要通过 malloc or strdup 为 worterbuch[i] 分配 space。

您可以像这样为单个指针分配 space：

worterbuch[i] = malloc(strlen(wort)+1);

注意：检查malloc()和free()这些指针总是好的。

现在，由于指针指向某处，您可以将内容复制到其中。您可以使用 strcpy 来执行此操作。如果你想跳过这个复制步骤，你可以使用 strdup() 代替。

其次，您可以在 struct:

中管理这个指针数组，而不是全局定义 char *woerterbuch[1000];

typedef struct {
    char *woerterbuch[1000];
    size_t n;
} worterbuch;

这样可以更轻松地管理您的阵列。

第三，您没有检查 fgets() 的 return。如果不成功，这可以 return NULL。您还应该在此处检查缓冲区溢出。

最后，如果您的 worterbuch 中有很多单词，使用线性搜索检查重复项可能效率不高。这个过程平均是 O(N) 时间。相反，您可以使用二进制搜索，平均为 O(logN)，因此如果 n 变得非常大，效率会更高。

这是我不久前写的一些代码，它做了类似的事情：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TEXTSIZE 1000

typedef struct {
    char *dictionary[TEXTSIZE];
    size_t numwords;
} dictionary_t;

void read_text(char *text);
void read_words(char *text, dictionary_t *dict);
int search_word(dictionary_t *dict, char *word);
void print_words(dictionary_t *dict);
int str_cmp(const void *a, const void *b);

int main(void) {
    dictionary_t dict;
    char text[TEXTSIZE];

    read_text(text);

    read_words(text, &dict);

    print_words(&dict);

    return 0;
}

void read_text(char *text) {
    size_t slen;

    printf("Please enter text: \n");
    if (fgets(text, TEXTSIZE, stdin) == NULL) {
        fprintf(stderr, "Error reading text\n");
        exit(EXIT_FAILURE);
    }

    /* removes '\n' character from fgets(), and checks for overflow */
    slen = strlen(text);
    if (slen > 0) {
        if (text[slen-1] == '\n') {
            text[slen-1] = '[=13=]';
        } else {
            printf("Buffer overflow detected.\n");
            exit(EXIT_FAILURE);
        }
    }

    if (!*text) {
        printf("No text entered.\n");
        exit(EXIT_FAILURE);
    }
}

void read_words(char *text, dictionary_t *dict) {
    char *word;
    const char *delim = " ,.!?;:";
    dict->numwords = 0;

    word = strtok(text, delim);
    while (word != NULL) {

        if (search_word(dict, word)) {

            /* allocate space for ptr */
            dict->dictionary[dict->numwords] = malloc(strlen(word)+1);
            if (!dict->dictionary[dict->numwords]) {
                printf("Cannot allocate word.\n");
                exit(EXIT_FAILURE);
            }

            /* copy it into array */
            strcpy(dict->dictionary[dict->numwords], word);

            /* increment count, ready for next word */
            dict->numwords++;
        }
        word = strtok(NULL, delim);
    }
}

/* linear searching the word */
int search_word(dictionary_t *dict, char *word) {
    size_t i;

    for (i = 0; i < dict->numwords; i++) {
        if (strcmp(dict->dictionary[i], word) == 0) {
            return 0;
        }
    }
    return 1;
}

/* cmp function for sorting dictionary */
int str_cmp(const void *a, const void *b) {
    const char **str1 = (const char **)a;
    const char **str2 = (const char **)b;

    return strcmp(*str1, *str2);
}

void print_words(dictionary_t *dict) {
    size_t i;

    /* sort using qsort */
    qsort(dict->dictionary, dict->numwords, sizeof(*(dict->dictionary)), str_cmp);

    printf("\nDictionary:\n");
    for (i = 0; i < dict->numwords; i++) {
        printf("%s\n", dict->dictionary[i]);

        /* freeing memory previosly allocated from malloc() */
        free(dict->dictionary[i]);
        dict->dictionary[i] = NULL;
    }
}

C 中的唯一单词计数器

Unique Words Counter in C

c

words

unique

counting

这是我不久前写的一些代码，它做了类似的事情：