C中的拼写检查器
Spell-checker in C
我一直在尝试使用大型词典对一些包含大约 2000 个单词的文本文件实施拼写检查。但是,我的拼写检查器 returns 所有单词都被拼错了。老实说,我不知道为什么 - 有人可以帮助我吗?
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
#define lenght 45
#define hashtable_size 65536
char word[lenght+1];
int count = 0;
/*
*
* Hash function. Thanks to Brenda from cs50 reddit.
*/
int hash_it(const char* needs_hashing)
{
unsigned int hash = 0;
for (int i=0, n=strlen(needs_hashing); i<n; i++)
hash = (hash << 2) ^ needs_hashing[i];
return hash % hashtable_size;
}
typedef struct node
{
char* word;
struct node* next;
}node;
node* previous;
node* hashtable[hashtable_size];
/*
*
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
char word[lenght+1];
FILE* dict = fopen(dictionary,"r");
for(int i = 0; i < 26;i++)
{
hashtable[i] = NULL;
for(int a = fgetc(dict); a != EOF; a = fgetc(dict))
{
count++;
int hashvalue = hash_it(word);
node* new = malloc(sizeof(node));
if(hashtable[hashvalue] == NULL)
{
hashtable[hashvalue] = new;
new -> next = NULL;
}
else
{
new -> next = hashtable[hashvalue];
hashtable[hashvalue] = new;
}
}
}
fclose(dict);
return true;
}
/*
*
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
char tmp[lenght + 1];
int lenghtw = strlen(word);
for (int i = 0; i < lenghtw; i++)
{
tmp[i] = tolower(word[i]);
}
int index = hash_it(tmp);
if (hashtable[index] == NULL)
{
return false;
}
node* cursor = hashtable[index];
while(cursor != NULL)
{
if(strcmp(tmp, cursor -> word) == 0)
{
return true;
}
cursor = cursor -> next;
}
return false;
}
/*
*
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return count;
}
/*
*
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
int index = 0;
while(index < hashtable_size)
{
if(hashtable[index] == NULL)
{
index++;
}
else
{
while(hashtable[index] != NULL)
{
node* cursor = hashtable[index];
hashtable[index] = cursor -> next;
free(cursor);
}
index++;
}
}
return true;
}
int main(int argc, char **argv)
{
if (argc != 2)
return 3;
if (!load("dictionary"))
return 1;
printf("loaded %d words\n", size());
printf("word '%s'%s found\n", argv[1], check(argv[1]) ? "" : " not");
unload();
return 0;
}
你的代码有很多问题:
在 load
函数中,您不会将词典中的单词加载到散列 table 中。您使用 fgetc()
一次读取一个字符,并从未初始化的本地缓冲区 word
.
创建一个节点
hash_it
函数仅散列单词的最后 16 个字符。此外,hashtable_size
是 2 的幂,这是个坏主意。实际上只有最后 8 个字符参与哈希值。这不是错误,只是一种低效的哈希方法。
在check
函数中,你复制了单词并将其转换为小写,但你忘记将tmp
数组的最后一个字节设置为'[=18= ]'
.
这里是 load
的更正版本,每个词典行读一个词:
bool load(const char *dictionary) {
char line[256];
FILE *dict = fopen(dictionary, "r");
if (!dict)
return false;
while (fgets(line, sizeof line, dict) != NULL) {
char *p = line + strspn(line, " \t"); // skip blanks
p[strcspn(p, " \t\r\n")] = '[=10=]'; // strip trailing blanks
if (*p == '[=10=]' || *p == '#' || *p == ';')
continue; // ignore blank lines and comments
count++;
int hashvalue = hash_it(p);
node *np = malloc(sizeof(node));
np->word = strdup(p);
np->next = hashtable[hashvalue];
hashtable[hashvalue] = np;
}
fclose(dict);
return true;
}
我一直在尝试使用大型词典对一些包含大约 2000 个单词的文本文件实施拼写检查。但是,我的拼写检查器 returns 所有单词都被拼错了。老实说,我不知道为什么 - 有人可以帮助我吗?
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include "dictionary.h"
#define lenght 45
#define hashtable_size 65536
char word[lenght+1];
int count = 0;
/*
*
* Hash function. Thanks to Brenda from cs50 reddit.
*/
int hash_it(const char* needs_hashing)
{
unsigned int hash = 0;
for (int i=0, n=strlen(needs_hashing); i<n; i++)
hash = (hash << 2) ^ needs_hashing[i];
return hash % hashtable_size;
}
typedef struct node
{
char* word;
struct node* next;
}node;
node* previous;
node* hashtable[hashtable_size];
/*
*
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
char word[lenght+1];
FILE* dict = fopen(dictionary,"r");
for(int i = 0; i < 26;i++)
{
hashtable[i] = NULL;
for(int a = fgetc(dict); a != EOF; a = fgetc(dict))
{
count++;
int hashvalue = hash_it(word);
node* new = malloc(sizeof(node));
if(hashtable[hashvalue] == NULL)
{
hashtable[hashvalue] = new;
new -> next = NULL;
}
else
{
new -> next = hashtable[hashvalue];
hashtable[hashvalue] = new;
}
}
}
fclose(dict);
return true;
}
/*
*
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
char tmp[lenght + 1];
int lenghtw = strlen(word);
for (int i = 0; i < lenghtw; i++)
{
tmp[i] = tolower(word[i]);
}
int index = hash_it(tmp);
if (hashtable[index] == NULL)
{
return false;
}
node* cursor = hashtable[index];
while(cursor != NULL)
{
if(strcmp(tmp, cursor -> word) == 0)
{
return true;
}
cursor = cursor -> next;
}
return false;
}
/*
*
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return count;
}
/*
*
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
int index = 0;
while(index < hashtable_size)
{
if(hashtable[index] == NULL)
{
index++;
}
else
{
while(hashtable[index] != NULL)
{
node* cursor = hashtable[index];
hashtable[index] = cursor -> next;
free(cursor);
}
index++;
}
}
return true;
}
int main(int argc, char **argv)
{
if (argc != 2)
return 3;
if (!load("dictionary"))
return 1;
printf("loaded %d words\n", size());
printf("word '%s'%s found\n", argv[1], check(argv[1]) ? "" : " not");
unload();
return 0;
}
你的代码有很多问题:
在
load
函数中,您不会将词典中的单词加载到散列 table 中。您使用fgetc()
一次读取一个字符,并从未初始化的本地缓冲区word
. 创建一个节点
hash_it
函数仅散列单词的最后 16 个字符。此外,hashtable_size
是 2 的幂,这是个坏主意。实际上只有最后 8 个字符参与哈希值。这不是错误,只是一种低效的哈希方法。在
check
函数中,你复制了单词并将其转换为小写,但你忘记将tmp
数组的最后一个字节设置为'[=18= ]'
.
这里是 load
的更正版本,每个词典行读一个词:
bool load(const char *dictionary) {
char line[256];
FILE *dict = fopen(dictionary, "r");
if (!dict)
return false;
while (fgets(line, sizeof line, dict) != NULL) {
char *p = line + strspn(line, " \t"); // skip blanks
p[strcspn(p, " \t\r\n")] = '[=10=]'; // strip trailing blanks
if (*p == '[=10=]' || *p == '#' || *p == ';')
continue; // ignore blank lines and comments
count++;
int hashvalue = hash_it(p);
node *np = malloc(sizeof(node));
np->word = strdup(p);
np->next = hashtable[hashvalue];
hashtable[hashvalue] = np;
}
fclose(dict);
return true;
}