Valgrind error: Conditional jump or move depends on uninitialised value

Question

这是我第一次 post 在这里，对于任何违反规范的行为，我们深表歉意... 我正在完成哈佛 Cs50 的拼写问题。任务是将字典文件加载到哈希映射中，然后检查文本文件中是否存在拼写错误的单词。我的程序运行良好并通过了除 valgrind 测试之外的所有测试。这是我的问题代码，还有另一个哈佛编写的文件，其中包含程序的主要部分并调用我编写的所有函数：

// Implements a dictionary's functionality

#include <ctype.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <strings.h>
#include <stdio.h>
#include "dictionary.h"

// Represents a node in a hash table

typedef struct node
{
    char word[LENGTH + 1];
    struct node* next;
}
node;

void deleteList(node* n);
bool checkList(node* n, const char* checkword);


// TODO: Choose number of buckets in hash table
const unsigned int N = 21952;

// Hash table
node* table[N];

// Returns true if word is in dictionary, else false
bool check(const char* word)
{
    int code = hash(word);
    return checkList(table[code], word);
}

int dictSize = 0;
bool dictLoaded = false;

// Hashes word to a number
unsigned int hash(const char* word)
{
    int index = 0;
    //char* wordCopy = malloc((sizeof(char) * strlen(word))); //VALGRIND ERROR Conditional jump or move depends on uninitialised value
    char* wordCopy = calloc(strlen(word), sizeof(char));
    strncpy(wordCopy, word, strlen(word)); //this line


    int sum = 0;
    //iterate thru the word until it ends or we get to our requisite 3 characters
    //must accomodate any capitalization or apostraphes
    while (word[index] != '[=10=]' && index < 3) // this line is still an an error, just like when word was wordcopy
    {
        //change chars to int vals so that null is 0, a is 1, z is 26, and ' is 27
        int ascii = 0; // this remains unchanged if the current char is null
        if (isalpha(wordCopy[index]) != 0)
        {
            wordCopy[index] = tolower(wordCopy[index]);
            ascii = wordCopy[index] - 96;
        }
        else if (wordCopy[index] == '\'')
        {
            ascii = 27;
        }

// add the current chars val to the sum of the word's first three vals
// the math here ensures that "null null null" will be 0, and ''' will be 21,951 or the last index of our hash map
    if (index == 0)
        sum += ascii * 784;

    if (index == 1)
        sum += ascii * 28;

    if (index == 2)
        sum += ascii;

    index++;
    }


    free(wordCopy);

    return sum;
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char* dictionary)
{

    FILE* dict = fopen(dictionary, "r");



   int wordLen = 0;
   char* tmpWord = malloc((sizeof(char) * 45) + 1);

   //hard code the first word

   // this first word would screw our loop bc the loop compares the null at the end of a word to a previous null
   //this first word has no previous null to compare to
    int index = 0;

    char c;
    while (fread(&c, sizeof(char), 1, dict))
    {

        if (c != '\n')
        {
            tmpWord[index] = c;
            index++;
        }
        else
        {
            break;
        }
    }


    if (index < 1)
    {
        printf("first word broke load function\n");
        return false;
    }
//find some memory for our word
    node* firstNode = malloc(sizeof(node));

    //copy over the word into the node
    strncpy(firstNode->word, tmpWord, index);

    int code = hash(firstNode->word);

    firstNode->next = table[code];
    table[code] = firstNode;
    dictSize++;


    int lastNull = index - 1;
    int tmpWordIndex = 0;

    //now we can loop thru that ish!
    while (fread(&c, sizeof(char), 1, dict))
    {


        if (c != '\n' && c != '[=10=]')
        {
            tmpWord[tmpWordIndex] = c; //this starts copying into the tmp word at 0
            tmpWordIndex++;
        }
        else // we have reached the end of a string, dictionary[i] == [=10=]
        {
            wordLen = index - lastNull - 1; // -1 for the new line characters

            //create a new node to store this new word
            node* newNode = malloc(sizeof(node));

            // then we actually copy the word over from tmpWord
            strncpy(newNode->word, tmpWord, wordLen);

            code = hash(newNode->word);

            //insert node at the beginning of our list by changing pointer to current first item
            //then change the head node at table[code] to point to our new node
            newNode->next = table[code];
            table[code] = newNode;
            dictSize++;

            //reset the word index so that the next word will copy into the start of tmp word
            //reset the last null encountered to our current char, a null
            tmpWordIndex = 0;
            lastNull = index;

        }


        index++;
    }
        //do it all once more outside loop to grab the last character
            wordLen = index - lastNull - 1; 
            node* newNode = malloc(sizeof(node));
            strncpy(newNode->word, tmpWord, wordLen);
            code = hash(newNode->word);
            newNode->next = table[code];
            table[code] = newNode;


    
    free(tmpWord);
    dictLoaded = true;


    fclose(dict);
    return true;

}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
    if (dictLoaded)
    {
        return dictSize;
    }
    else
    {
        return 0;
    }
}

// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
    bool unloaded = false;
    for (int n = 0; n < N; n++)
    {
        node* tmp;
        while(table[n] != NULL)
        {
            tmp = table[n];
            table[n] = table[n]->next;
            free(tmp);
        }
        if(n == N-1)
        {
            unloaded = true;
        }
    }

    return unloaded;
}

void deleteList(node* n) 
{
    // base case is if next pointer is null
    if (n->next == NULL)
    {
        free(n);
        return;
    }

    deleteList(n->next);
}

bool checkList(node* n, const char* checkword)//recursion brah
{
    if (n == NULL)
    {
        return false;
    }
    if (strcasecmp(n->word, checkword) == 0) //error here, see below


// VALGRIND ERROR Conditional jump or move depends on uninitialised value(s): (file: dictionary.c, line: 258)
//Use of uninitialised value of size 8: (file: dictionary.c, line: 258)


    {
        return true;
    }
    else
    {
        return checkList(n->next, checkword); 
    }

}

哈佛大学的检查测试特别针对这些行给我一个错误：在 hash(const char* word) 函数中

char* wordCopy = calloc(strlen(word), sizeof(char));
strncpy(wordCopy, word, strlen(word));
...
while (word[index] != '[=11=]' && index < 3)

我试过将 malloc 更改为 calloc。我已经尝试将我要求 malloc/calloc 的内存大小增加和减少 1，即 malloc((strlen(word) +/- 1) * sizeof(char)). 原代码在while循环中对比了wordCopy[index]，但是word[index]还是报同样的错误。我在 valgrind 上尝试了运行 --track-origins=yes arg（见下文），但这并没有给我任何东西，只有哈佛的测试实际上告诉我错误。

speller/ $ valgrind -s --track-origins=yes ./speller
==7117== Memcheck, a memory error detector
==7117== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==7117== Using Valgrind-3.15.0 and LibVEX; rerun with -h for copyright info
==7117== Command: ./speller
==7117== 
Usage: ./speller [DICTIONARY] text
==7117== 
==7117== HEAP SUMMARY:
==7117==     in use at exit: 0 bytes in 0 blocks
==7117==   total heap usage: 1 allocs, 1 frees, 1,024 bytes allocated
==7117== 
==7117== All heap blocks were freed -- no leaks are possible
==7117== 
==7117== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

哈佛的测试结果如下：

running valgrind --show-leak-kinds=all --xml=yes --xml-file=/tmp/tmpqjc7357q -- ./speller substring/dict substring/text...
checking for output "MISSPELLED WORDS\n\nca\ncats\ncaterpill\ncaterpillars\n\nWORDS MISSPELLED: 4\nWORDS IN DICTIONARY: 2\nWORDS IN TEXT: 6\n"...
checking that program exited with status 0...
checking for valgrind errors...
Conditional jump or move depends on uninitialised value(s): (file: dictionary.c, line: 45)
Conditional jump or move depends on uninitialised value(s): (file: dictionary.c, line: 46)
Conditional jump or move depends on uninitialised value(s): (file: dictionary.c, line: 57)
Conditional jump or move depends on uninitialised value(s): (file: dictionary.c, line: 267)
Use of uninitialised value of size 8: (file: dictionary.c, line: 267)

45、46 和 57 是上面提到的散列函数中的行。第267行如下，在代码底部的checkList函数中找到。

if (strcasecmp(n->word, checkword) == 0)

这是我的第一个编码 class，这个特殊问题完全阻碍了我的进步。如果代码不太好看，我深表歉意。感谢阅读我的 post!

编辑：下面是 dictionary.h 和 speller.c，它们都是我预先编写的...

// Declares a dictionary's functionality

#ifndef DICTIONARY_H
#define DICTIONARY_H

#include <stdbool.h>

// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45

// Prototypes
bool check(const char* word);
unsigned int hash(const char* word);
bool load(const char* dictionary);
unsigned int size(void);
bool unload(void);

#endif // DICTIONARY_H

// Implements a spell-checker

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"

// Undefine any definitions
#undef calculate
#undef getrusage

// Default dictionary
#define DICTIONARY "dictionaries/large"

// Prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // Check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: ./speller [DICTIONARY] text\n");
        return 1;
    }

    // Structures for timing data
    struct rusage before, after;

    // Benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // Determine dictionary to use
    char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // Load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // Exit if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // Calculate time to load dictionary
    time_load = calculate(&before, &after);

    // Try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *file = fopen(text, "r");
    if (file == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // Prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // Prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH + 1];

    // Spell-check each word in text
    char c;
    while (fread(&c, sizeof(char), 1, file))
    {
        // Allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // Append character to word
            word[index] = c;
            index++;

            // Ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // Consume remainder of alphabetical string
                while (fread(&c, sizeof(char), 1, file) && isalpha(c));

                // Prepare for new word
                index = 0;
            }
        }

        // Ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // Consume remainder of alphanumeric string
            while (fread(&c, sizeof(char), 1, file) && isalnum(c));

            // Prepare for new word
            index = 0;
        }

        // We must have found a whole word
        else if (index > 0)
        {
            // Terminate current word
            word[index] = '[=16=]';

            // Update counter
            words++;

            // Check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // Update benchmark
            time_check += calculate(&before, &after);

            // Print word if misspelled
            if (misspelled)
            {
                printf("%s\n", word);
                misspellings++;
            }

            // Prepare for next word
            index = 0;
        }
    }

    // Check whether there was an error
    if (ferror(file))
    {
        fclose(file);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // Close text
    fclose(file);


    // Determine dictionary's size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // Calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // Unload dictionary
    // causing core dump!!
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // Abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // Calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // Report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n",
           time_load + time_check + time_size + time_unload);

    // Success
    return 0;
}

// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                  (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                 ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                  (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

编辑： pm100 的回答帮助缓解了一切，除了我的哈希函数中的 while 循环检查。我认为我传递给散列的所有单词都应该在适当的索引中以 null 结尾。我将留下我的旧代码作为参考。以下是我当前相关功能的代码。我在加载函数中添加了空值，因为此函数将乱码字符串传递给给出错误的散列函数。为了消除哈希中的 malloc 错误，我想我可以只复制当前索引处的字符，而不是引用传递给函数的整个单词的副本。那有帮助。但是出于某种原因，在 while 循环条件中将 d char 与 null 进行比较并不好。


// Hashes word to a number
unsigned int hash(const char* word)
{
    int index = 0;

    int sum = 0;
    //iterate thru the word until it ends or we get to our requisite 3 characters
    //must accomodate any capitalization or apostraphes
    char d = word[0];
    while (d != '[=17=]' && index < 3) // this line is still an an error, just like when it word was wordcopy
    {

        //change chars to int vals so that null is 0, a is 1, z is 26, and ' is 27
        int ascii = 0; // this remains unchanged if the current char is null
        if (isalpha(d) != 0)
        {
            d = tolower(d);
            ascii = d - 96;
        }
        else if (d == '\'')
        {
            ascii = 27;
        }

// add the current chars val to the sum of the word's first three vals
// the math here ensures that "null null null" will be 0, and ''' will be 21,951 or the last index of our hash map
    if (index == 0)
        sum += ascii * 784;

    if (index == 1)
        sum += ascii * 28;

    if (index == 2)
        sum += ascii;

    index++;
    d = word[index];

    }



    return sum;
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char* dictionary)
{
    FILE* dict = fopen(dictionary, "r");



   int wordLen = 0;
   char* tmpWord = malloc((sizeof(char) * 45) + 1);

   //hard code the first word

   // this first word would screw our loop bc the loop compares the null at the end of a word to a previous null
   //this first word has no previous null to compare to
    int index = 0;

    char c;
    while (fread(&c, sizeof(char), 1, dict))
    {
        //printf("%c\n", c);
        if (c != '\n')
        {
            tmpWord[index] = c;
            index++;
        }
        else
        {
            break;
        }
    }
    tmpWord[index] = '[=17=]';

    if (index < 1)
    {
        printf("first word broke load function\n");
        return false;
    }
//find some memory for our word
    node* firstNode = malloc(sizeof(node));

    //copy over the word into the node
    strncpy(firstNode->word, tmpWord, index + 1);

    int code = hash(tmpWord);


    firstNode->next = table[code];
    table[code] = firstNode;
    dictSize++;


    int lastNull = index - 1;
    int tmpWordIndex = 0;

    //now we can loop thru that!
    while (fread(&c, sizeof(char), 1, dict))
    {

        if (c != '\n' && c != '[=17=]')
        {
            tmpWord[tmpWordIndex] = c; //this starts copying into the tmp word at 0
            tmpWordIndex++;
        }
        else // we have reached the end of a string, dictionary[i] == \n
        {
            wordLen = index - lastNull - 1; // -2 for the null and new line characters

            //create a new node to store this new word
            node* newNode = malloc(sizeof(node));
            tmpWord[tmpWordIndex] = '[=17=]';
            // then we actually copy the word over from tmpWord
            strncpy(newNode->word, tmpWord, wordLen + 1);
            //insert node at the beginning of our list by changing pointer to current first item
            //then change the head node at table[code] to point to our new node
            newNode->next = table[code];
            table[code] = newNode;
            dictSize++;

            //reset the word index so that the next word will copy into the start of tmp word
            //reset the last null encountered to our current char, a null
            tmpWordIndex = 0;
            lastNull = index;

        }


        index++;
    }

            wordLen = index - lastNull - 1;
            node* newNode = malloc(sizeof(node));
            tmpWord[tmpWordIndex] = '[=17=]';
            strncpy(newNode->word, tmpWord, wordLen);
            code = hash(newNode->word);
            newNode->next = table[code];
            table[code] = newNode;



    free(tmpWord);
    dictLoaded = true;

    fclose(dict);
    return true;

}

哈希函数里面的这一行while (d != '[=19=]' && index < 3)还在返回条件跳转valgrind错误

Answer 1

好的，你的主要问题是空终止符，我不太理解你们的逻辑，我只是用一个简单的 4 字文件调用了加载。

您必须确保您为 null 赚了足够的 space 并且您必须确保您有一个 null。

所以

我找到的，你必须修复其他的

char c;
while (fread(&c, sizeof(char), 1, dict))
{

    if (c != '\n')
    {
        tmpWord[index] = c;
        index++;
    }
    else
    {
        break;
    }
}
tmpWord[index] = 0; <<<<===== add the null at the end

和

   //copy over the word into the node
   // +1 for the null
    strncpy(firstNode->word, tmpWord, index + 1);

现在在哈希中

char* wordCopy = calloc(strlen(word)+1, sizeof(char));
strncpy(wordCopy, word, strlen(word)+1);

+1, +1

我确定还有其他人。

valgrind 抱怨的原因是因为没有像 strlen 这样的尾随 null 函数会一直读入内存，直到他们找到一个随机的 null，valgrind 看到你从复制的末尾读取字符

为确保您拥有空值，请输入 printf 或在调试器中暂停并查看字符串。如果你看到

 "cat"

一切都很好，但如果你看到

 "cat!!@*foodle!....ll"

那么你知道你错过了 null

Valgrind error: Conditional jump or move depends on uninitialised value

Valgrind error: Conditional jump or move depends on uninitialised value

c

malloc

valgrind

initialization

cs50