C - 链表排序无限循环

C - LinkedList Sorting Infinite Loop

我正在创建一个程序,它将从 main.c 中的文本文件中读取一个单词,并将其发送到 list.c 文件以创建一个新节点来存储该单词。该节点还将存储三个整数:first(该词在 txt 文件 1 中出现的次数)、second(该词在 txt 文件 2 中出现的次数)和 dif (abs(first-second))。在将所有新单词添加到文件并计算每个单词在每个 txt 文件中出现的次数后,main.c 将调用一个方法来计算每个节点的第一个和第二个之间的差异。这是差异(存储在每个节点的 dif 中)将用于按降序对链接节点进行排序。

EX。单词:the,第一:2888,第二:2466,dif:422。 红色, 39 12 27 .....

但是,当main调用sort方法时,出现死循环。这个无限循环来自于排序算法的内层循环,当前节点被赋值给curr->next指针的节点。在排序方法的某处,当前节点的下一个指针指向当前节点,而不是链表中实际的下一个节点。如果 sort 方法被禁用,那么所有其他函数都可以正常工作,包括遍历整个列表并打印每个节点中的数据的 printAll(参见我上面的示例)。

我的问题是我无法在我的排序方法中找到 current->next 是如何开始指向当前节点的。感谢您的帮助!

/*
 * list.h
 */

#ifndef LIST_H_
#define LIST_H_

typedef struct node Node;

void findWord(char *word, int book);
void addWord(char *word, int book);
void editWord(Node **endPtr, int book);
void sort();
void swap(Node **a, Node **b);
void calculateDiff();
void printAll();


#endif /* LIST_H_ */




/*
 * list.c
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "list.h"

typedef struct node{
    int first;
    int second;
    int dif;
    char name[20];
    struct node *next;
}Node;

Node *front = NULL;

/*
 * Sees if the current word exists in the
 * linkedlist.
 */
void findWord(char *word, int book) {
    Node *curr = front;
    int boolean = 0;

    while (curr != NULL) {
        if(strcmp(curr->name, word) == 0) {
            boolean = 1;
            editWord(&curr, book);
            break;
        }
        curr = curr->next;

    }

    if(!boolean) { //Add word if it does not exist.
        addWord(word, book);
    }
}

/*
 * Creates a new node for the added word. Adds to front.
 */
void addWord(char *word, int book) {
    Node *newNode = malloc (sizeof(Node));

    /*
     * Since this word is being added
     * to the linkedlist with a newly
     * created node, either the
     * first or second int must be to 1
     * while the other is set to 0. Based
     * off of book int.
     */
    if(book == 1) {
        newNode->first = 1;
        newNode->second = 0;
    } else {
        newNode->first = 0;
        newNode->second = 1;
    }

    newNode->dif = 0;
    strcpy(newNode->name, word);

    newNode->next = front;
    front = newNode;

}

/*
 * Edits the data for an existing word.
 * Only called if current word exists in
 * the linkedlist.
 */
void editWord(Node **endPtr, int book) {
    if (book == 1) {
        (*endPtr)->first++;
    } else {
        (*endPtr)->second++;
    }
}

/*
 * Sorts the list in descending order based on
 * difference value.
 */
void sort() {
    Node *curr, *last = NULL;
    curr = front;

    while (curr != last) {
        while (curr->next != last) {
            if(curr->dif < curr->next->dif ) {
                swap(&curr, &curr->next);
            }
            curr = curr->next;
        }
        last = curr;
        curr = front;
    }
}

/*
 * Swaps the data in the current and next node in the list.
 */
void swap(Node **a, Node **b) {
    int temp;
    char nameTemp[20];


    //Swap first
    temp = (*a)->first;
    (*a)->first = (*b)->first;
    (*b)->first = temp;

    //Swap second
    temp = (*a)->second;
    (*a)->second = (*b)->second;
    (*b)->second = temp;

    //Swap dif
    temp = (*a)->dif;
    (*a)->dif = (*b)->dif;
    (*b)->dif = temp;

    //Swap name
    strcpy(nameTemp, (*a)->name);
    strcpy((*a)->name, (*b)->name);
    strcpy((*b)->name, nameTemp);
}

/*
 * Calculates the difference between first and second
 */
void calculateDiff() {
    Node *curr = front;
    while(curr != NULL) {
        curr->dif = abs((curr->first - curr->second));
        curr = curr->next;
    }
}

/*
 * Prints all the data from the nodes.
 */
void printAll() {
    printf("|| Word || RedBadge || LittleRegiment ||  Diff\n");
    Node *curr = front;
    while ( curr != NULL  ) {
        printf("%s, %d, %d, %d\n", curr->name, curr->first, curr->second, curr->dif);
        curr = curr->next;
    }
}

/*
 * main.c
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "list.h"

void readBook(int book, FILE *infile);
void readLine(char *line, int book);

int main (void) {
    setvbuf(stdout, NULL, _IONBF,0);
    FILE *infile = fopen("RedBadge.txt", "r");
    FILE *infile2 = fopen("LittleRegiment.txt", "r");
    readBook(1, infile);
    readBook(2, infile2);
    fclose(infile);
    fclose(infile2);
    calculateDiff(); 
    sort();
    printAll();
    return 0;
}

void readBook(int book, FILE *infile) {
    char line[70];

    //Read in each line
    while (!feof(infile)) {
        fgets(line, 70, infile);
        readLine(line, book);
    }
}

void readLine(char *line, int book) {
    int i = 0, j = 0;
    char word[20];


    while (line[i]) {
        line[i] = tolower(line[i]); //Convert line to lowercase
        if((line[i] <= 'z' && line[i] >= 'a') || line[i] == 39 || line[i] == '-') {
            word[j] = line[i];
            j++;
        } else if (j != 0) {
            word[j] = '[=10=]';
            findWord(word, book);
            j = 0;
        }
        i++;
    }
}

我认为您的错误实际上是缓冲区溢出。这些书中有些单词超过 19 个字符(适合您的 word 变量的最大值)。当您的 readline 函数尝试读取这些单词时,它将写入 word 数组的边界之外,这是未定义的行为。然后它还会使用 strcpy 将单词复制到节点中,这也会溢出节点的 word 数组。

一个快速的解决方法是丢弃 19 之后不适合您的单词数组的额外字符。在 readline 中添加一个测试 j 有多大:

        if (j < sizeof word - 1) {
            word[j] = line[i];
            j++;
        }

其中一个有问题的词是 "ain't--plundering----"(至少在我下载的文本副本中是这样),这让我觉得也许你也应该在标点符号上拆分词。