有人会关心确定什么可能导致我的代码中出现段错误吗?
Would someone care to identify what might cause a segfault in my code?
这是我的 CS50 作业。我必须制作一个拼写检查器,将字典加载到 trie 数据结构中。在文件大小增加之前不会发生段错误。
//Trie structure
typedef struct node {
bool is_word;
struct node* children[27];
}node;
node* root;
// maximum length for a word
#define LENGTH 45
//global counter for total words in dic
int SIZE = 0;
将词典加载到内存中。
bool load(const char* dictionary)
{
//Open dictionary file && safecheck
FILE* d_file = fopen(dictionary,"r");
if(d_file == NULL) { return false; }
//allocate space for the first node
root = calloc(1,sizeof(node));
//string buffer
char word[SIZE+1];
//loop through each word of the dictionary and store it to word
while(fscanf(d_file,"%s\n",word)!=EOF){
//navigation node steping further into trie
node* nav = root;
int word_length = strlen(word);
//Iterate through each letter in a word
for(int i=0;i<word_length;i++){
int letter_pos = tolower(word[i]) - 'a';
//check if letter node exists
if(nav->children[letter_pos]==NULL){
//create node for a letter
node* l_node = calloc(1,sizeof(node));
if(l_node == NULL) { return false; }
nav->children[letter_pos] = l_node;
}
//proceed to the next node
if(i < word_length-1)
nav = nav->children[letter_pos];
}
//set word to true;
if(nav->is_word != true){
nav->is_word = true;
// counter for total words in the dictionary
SIZE++;
}
}
fclose(d_file);
return true;
}
我不确定这是否是主要问题,但这部分代码严重缺乏健壮性:
int letter_pos = tolower(word[i]) - 'a';
如果此时 letter_pos
无效(例如,因为 word[i]
不是字母),那么所有后续操作都将导致 UB。
我至少会在这里添加一个 assert
:
int letter_pos = tolower(word[i]) - 'a';
assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));
由于您的数组大小为 27,我猜您希望将最后一个元素用于非字母字符,因此您可能希望使代码更像这样:
int letter_pos;
if (isalpha(word[i])) // if 'A'..'Z' or 'a'..'z'
letter_pos = tolower(word[i]) - 'a'; // index = 0..25
else
letter_pos = 'z' - 'a' + 1; // index = 26
assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));
这是我的 CS50 作业。我必须制作一个拼写检查器,将字典加载到 trie 数据结构中。在文件大小增加之前不会发生段错误。
//Trie structure
typedef struct node {
bool is_word;
struct node* children[27];
}node;
node* root;
// maximum length for a word
#define LENGTH 45
//global counter for total words in dic
int SIZE = 0;
将词典加载到内存中。
bool load(const char* dictionary)
{
//Open dictionary file && safecheck
FILE* d_file = fopen(dictionary,"r");
if(d_file == NULL) { return false; }
//allocate space for the first node
root = calloc(1,sizeof(node));
//string buffer
char word[SIZE+1];
//loop through each word of the dictionary and store it to word
while(fscanf(d_file,"%s\n",word)!=EOF){
//navigation node steping further into trie
node* nav = root;
int word_length = strlen(word);
//Iterate through each letter in a word
for(int i=0;i<word_length;i++){
int letter_pos = tolower(word[i]) - 'a';
//check if letter node exists
if(nav->children[letter_pos]==NULL){
//create node for a letter
node* l_node = calloc(1,sizeof(node));
if(l_node == NULL) { return false; }
nav->children[letter_pos] = l_node;
}
//proceed to the next node
if(i < word_length-1)
nav = nav->children[letter_pos];
}
//set word to true;
if(nav->is_word != true){
nav->is_word = true;
// counter for total words in the dictionary
SIZE++;
}
}
fclose(d_file);
return true;
}
我不确定这是否是主要问题,但这部分代码严重缺乏健壮性:
int letter_pos = tolower(word[i]) - 'a';
如果此时 letter_pos
无效(例如,因为 word[i]
不是字母),那么所有后续操作都将导致 UB。
我至少会在这里添加一个 assert
:
int letter_pos = tolower(word[i]) - 'a';
assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));
由于您的数组大小为 27,我猜您希望将最后一个元素用于非字母字符,因此您可能希望使代码更像这样:
int letter_pos;
if (isalpha(word[i])) // if 'A'..'Z' or 'a'..'z'
letter_pos = tolower(word[i]) - 'a'; // index = 0..25
else
letter_pos = 'z' - 'a' + 1; // index = 26
assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));