有人会关心确定什么可能导致我的代码中出现段错误吗？

Question

这是我的 CS50 作业。我必须制作一个拼写检查器，将字典加载到 trie 数据结构中。在文件大小增加之前不会发生段错误。

//Trie structure
typedef struct node {

    bool is_word;
    struct node* children[27];
}node;

node* root;

// maximum length for a word
#define LENGTH 45
//global counter for total words in dic
int SIZE = 0;

将词典加载到内存中。

bool load(const char* dictionary)
{
    //Open dictionary file && safecheck
    FILE* d_file = fopen(dictionary,"r");
    if(d_file == NULL) { return false; }

    //allocate space for the first node
    root = calloc(1,sizeof(node));

    //string buffer
    char word[SIZE+1];
    //loop through each word of the dictionary and store it to word
    while(fscanf(d_file,"%s\n",word)!=EOF){
        //navigation node steping further into trie
        node* nav = root;
        int word_length = strlen(word);
        //Iterate through each letter in a word
        for(int i=0;i<word_length;i++){
            int letter_pos = tolower(word[i]) - 'a';
            //check if letter node exists
            if(nav->children[letter_pos]==NULL){
                //create node for a letter
                node* l_node = calloc(1,sizeof(node));
                if(l_node == NULL) { return false; }
                nav->children[letter_pos] = l_node;
            }
            //proceed to the next node
            if(i < word_length-1)
                nav = nav->children[letter_pos];
        }
        //set word to true;
        if(nav->is_word != true){
            nav->is_word = true;
            // counter for total words in the dictionary
            SIZE++;
        }
    }
    fclose(d_file);
    return true;
}

Answer 1

我不确定这是否是主要问题，但这部分代码严重缺乏健壮性：

        int letter_pos = tolower(word[i]) - 'a';

如果此时 letter_pos 无效（例如，因为 word[i] 不是字母），那么所有后续操作都将导致 UB。

我至少会在这里添加一个 assert：

        int letter_pos = tolower(word[i]) - 'a';
        assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));

由于您的数组大小为 27，我猜您希望将最后一个元素用于非字母字符，因此您可能希望使代码更像这样：

        int letter_pos;
        if (isalpha(word[i]))                    // if 'A'..'Z' or 'a'..'z'
            letter_pos = tolower(word[i]) - 'a'; // index = 0..25
        else
            letter_pos = 'z' - 'a' + 1;          // index = 26
        assert(letter_pos >= 0 && letter_pos < sizeof(nav->children) / sizeof(nav->children[0]));

有人会关心确定什么可能导致我的代码中出现段错误吗？

Would someone care to identify what might cause a segfault in my code?

c

trie

cs50