cs50 pset5 拼写优化

Question

我已经完成拼写并通过了所有检查。但我仍然对性能感到困惑。我已尽最大努力进行研究和运行测试，但与员工的实施相比，我的实施速度慢了 10-20%。我怎样才能改进我的代码？

    // Implements a dictionary's functionality

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include "dictionary.h"

// Represents a node in a hash table
typedef struct node
{
    char word[LENGTH + 1];
    struct node *next;
}
node;
//Prototypes
unsigned int hash(const char *word);

// TODO: Choose number of buckets in hash table
const unsigned int N = 187751; //odd prime number bigger than word count, because math

// Hash table
node *table[N]; //may need to use calloc

//word count
int word_count = 0;

// Returns true if word is in dictionary, else false
bool check(const char *word)
{
    // TODO
    //debug
    //int hashcode = hash(word);
    //printf("bucket: %i, word: %s\n", hash(word), table[hash(word)]->word);
    for (node *tmp = table[hash(word)]; tmp != NULL; tmp = tmp->next) //iterate through bucket
    {
        if (strcasecmp(tmp->word, word) == 0) //compare strings
        {
            return true;
        }
    }
    return false;
}

// Hashes word to a number
unsigned int hash(const char *word)
{
    // TODO: Improve this hash function
    //Used Data Structures youtube course of Dr. Rob Edwards from San Diego State University mostly
    int hash = 0;
    /*int w = 0;
    //convert to upper case
    while (word[w] != '[=10=]')
    {
        word[w] = toupper(word[w]);
        w++;
    }*/
    for (int i = 0; i <= strlen(word); i++)
    {
        hash = (31 * hash + toupper(word[i])) % N; //31 because math
        //check hash size
        /*if (hash >= N)
            {
                printf("incorrect hash!");
            }*/
    }
    return hash;
}

// Loads dictionary into memory, returning true if successful, else false
bool load(const char *dictionary)
{
    // TODO
    //open dictionary file
    FILE *dict = fopen(dictionary, "r");
    if (dict == NULL)
    {
        //printf("Could not open file.\n");
        return false;
    }
    //create temp string for word
    char *word = malloc(LENGTH + 1);
    if (word == NULL)
    {
        return false;
    }
    //read words from dictionary and write to hash table
    while (fscanf(dict, "%s", word) != EOF)
    {
        //node *tmp_word = NULL;
        node *n = malloc(sizeof(node));
        if (n == NULL)
        {
            return false;
        }
        int pos = hash(word); //run hash to find the bucket in table
        strcpy(n->word, word); //write word to temp node
        if (table[pos] == NULL) //check if node is first in bucket (may need to use calloc to create a table)
        {
            n->next = NULL;
        }
        else //if not the first, write node to the head
        {
            n->next = table[pos]; //reference first node
        }
        table[pos] = n; //write node to table
        word_count++; //count new word
    }
    fclose(dict);
    free(word);
    //debug
    /*int j = 0;
    while (j <= 11)
    {
        for (node *tmp = table[j]; tmp != NULL; tmp = tmp->next) //делаем ноду; пока не дойдем до последней; переходим к следующей
                {
                    printf("%s\n", tmp->word); //забираем значение ноды

                }
        printf("Bucket number: %i\n", j);
        j++;
    }*/
    //printf("word count:%i\n", word_count);
    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int size(void)
{
    return word_count;
}

// Unloads dictionary from memory, returning true if successful, else false
bool unload(void)
{
    // TODO
    for (int i = 0; i <= N; i++)
    {
        while (table[i] != NULL)
        {
            node *tmp = table[i]->next; //делаем ноду и переходим на следующую ячейку
            free(table[i]); //освобождаем текущую
            table[i] = tmp; //начинаем list со второй ячейки
        }
    }
    return true;
}

我的结果：

WORDS MISSPELLED:     955
WORDS IN DICTIONARY:  143091
WORDS IN TEXT:        17756
TIME IN load:         0.04
TIME IN check:        0.02
TIME IN size:         0.00
TIME IN unload:       0.01
TIME IN TOTAL:        0.07

Cs50人员成绩：

WORDS MISSPELLED:     955
WORDS IN DICTIONARY:  143091
WORDS IN TEXT:        17756
TIME IN load:         0.02
TIME IN check:        0.02
TIME IN size:         0.00
TIME IN unload:       0.01
TIME IN TOTAL:        0.06

我已经玩过bucket size，没用(((

Pset5 指令：https://cs50.harvard.edu/x/2022/psets/5/speller/

如果您需要的话，我也包括 dictionary.h。检查正在从另一个文本文件获取字符串，而不是加载中使用的文件。

#ifndef DICTIONARY_H
#define DICTIONARY_H

#include <stdbool.h>

// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45

// Prototypes
bool check(const char *word);
unsigned int hash(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);

#endif // DICTIONARY_H

Craig Estey 几乎完美地回答了我的问题。与我的代码相比，他的代码运行速度超快，而且比 cs50 员工的代码运行速度更快：

更大的文字：

我学到了很多东西，真的很感激。这是我的第一个问题，关于 SO，我从来没有预料到它会是一个如此有帮助和友好的地方。

Answer 1

几个问题：

while (fscanf(dict, "%s", word) != EOF) 是错误的。你想要：while (fscanf(dict, "%s", word) == 1)
更快地将给定单词存储到table中作为大写。（即）我们对每个单词只toupper一次。
check 函数会更快，因为它可以 [then] 使用 strcmp 而不是 strcasecmp [ 更慢 ]
如果我们可以将字段添加到 node 结构，我们可以让它保存一个哈希值。这可以加速 check 函数（参见下面代码中的 NODEHASH）。
为每个节点做一个malloc是slow/wasteful。 pooled/arena/slab 分配器可以更快（参见下面代码中的 FASTALLOC）。同样，if 我们可以向 node 结构添加一个字段。

下面的代码中记录了一些其他错误（请参阅“注意”注释）。我使用 cpp 条件来表示旧代码与新代码：

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

这是重构后的代码。我编译了它，但没有测试过它：

    // Implements a dictionary's functionality

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>

//#include "dictionary.h"

// NOTE/BUG: dictionary.h was _not_ posted, so we have to guess
#define LENGTH      20

#ifndef NODEHASH
#define NODEHASH    1
#endif

#ifndef FASTALLOC
#define FASTALLOC   1
#endif

// Represents a node in a hash table
typedef struct node {
    char word[LENGTH + 1];
    struct node *next;
#if NODEHASH
    unsigned int hash;
#endif
#if FASTALLOC
    int alloc;
#endif
} node;

//Prototypes
unsigned int hash(const char *word);

// TODO: Choose number of buckets in hash table
// odd prime number bigger than word count, because math
// NOTE/BUG: this doesn't work too well for C (may be okay in C++) when used
// as dimension for table below
// NOTE/BUG: enum may be faster in calculations
#if 0
const unsigned int N = 187751;
#else
enum {
    N = 187751
};
#endif

// Hash table
node *table[N];                         // may need to use calloc

//word count
int word_count = 0;

// Returns true if word is in dictionary, else false
bool
check(const char *word)
{
    // TODO
    // debug
    // int hashcode = hash(word);
    // printf("bucket: %i, word: %s\n", hash(word), table[hash(word)]->word);

#if 1
    unsigned int hval = hash(word);
#endif

    // iterate through bucket
    for (node *tmp = table[hval]; tmp != NULL; tmp = tmp->next) {
#if NODEHASH
        // check hash value first -- limits number of str*cmp calls
        if (tmp->hash != hval)
            continue;
#endif

        // compare strings
// NOTE/BUG: strcasecmp is _slow_ and if we _store_ uppercase we don't need it
#if 0
        if (strcasecmp(tmp->word, word) == 0) {
            return true;
        }
#else
        if (strcmp(tmp->word, word) == 0)
            return true;
#endif
    }

    return false;
}

// Hashes word to a number
unsigned int
hash(const char *word)
{
    // TODO: Improve this hash function
    // Used Data Structures youtube course of Dr. Rob Edwards from San Diego State University mostly
    int hash = 0;

    /* int w = 0; //convert to upper case while (word[w] != '[=11=]') { word[w] = toupper(word[w]); w++; } */

// NOTE/BUG: _without_ optimization this is O(n^2) instead of O(n) because it
// calls strlen on _each_ loop iteration
#if 0
    for (int i = 0; i <= strlen(word); i++) {
        hash = (31 * hash + toupper(word[i])) % N;  // 31 because math
        // check hash size
        /* if (hash >= N) { printf("incorrect hash!"); } */
    }
#else
    for (int chr = *word++;  chr != 0;  chr = *word++)
        hash = (31 * hash + chr) % N;   // 31 because math
#endif

    return hash;
}

#if FASTALLOC
node *freelist = NULL;
int freecount = 0;

// adjust this value to suit
#define ARENASIZE       1000

// newnode -- allocate from pool
node *
newnode(void)
{
    node *cur;

    while (1) {
        // get node from free list cache
        cur = freelist;

        // use the cached node if possible
        if (cur != NULL) {
            freelist = cur->next;
            break;
        }

        // allocate a new arena
        freelist = malloc(sizeof(node) * ARENASIZE);

        // out of memory
        if (freelist == NULL)
            break;

        // link all nodes in the arena
        node *prev = NULL;
        for (cur = freelist;  cur < &freelist[ARENASIZE];  ++cur) {
            cur->alloc = 0;
            cur->next = prev;
            prev = cur;
        }

        // mark the arena head
        freelist->alloc = 1;
    }

    return cur;
}
#endif

// Loads dictionary into memory, returning true if successful, else false
bool
load(const char *dictionary)
{
    // TODO
    // open dictionary file
    FILE *dict = fopen(dictionary, "r");

    if (dict == NULL) {
        // printf("Could not open file.\n");
        return false;
    }
    // create temp string for word
// NOTE/BUG: no need to malloc this (and it is slightly slower)
#if 0
    char *word = malloc(LENGTH + 1);
    if (word == NULL) {
        return false;
    }
#else
    char word[LENGTH + 1];
#endif

    // read words from dictionary and write to hash table
// NOTE/BUG: this scanf is wrong (and may loop too much)
#if 0
    while (fscanf(dict, "%s", word) != EOF) {
#else
    while (fscanf(dict, "%s", word) == 1) {
#endif
        // node *tmp_word = NULL;
#if FASTALLOC
        node *n = newnode();
#else
        node *n = malloc(sizeof(node));
#endif
        if (n == NULL)
            return false;

// NOTE/FIX: convert to uppercase _once_
        int idx;
        for (idx = 0;  word[idx] != 0;  ++idx)
            n->word[idx] = toupper(word[idx] & 0xFF);
        n->word[idx] = 0;

        // run hash to find the bucket in table
// NOTE/BUG: pos should be unsigned
#if 0
        int pos = hash(word);
#else
        unsigned int pos = hash(n->word);
#endif

#if 0
        strcpy(n->word, word);          // write word to temp node
#endif

#if NODEHASH
        n->hash = pos;
#endif

// NOTE/BUG: no need to check table[pos]
#if 0
        // check if node is first in bucket
        // (may need to use calloc to create a table)
        if (table[pos] == NULL)
        {
            n->next = NULL;
        }

        // if not the first, write node to the head
        else
        {
            n->next = table[pos];       // reference first node
        }
#else
        n->next = table[pos];
#endif

        table[pos] = n;                 // write node to table
        word_count++;                   // count new word
    }

    fclose(dict);
#if 0
    free(word);
#endif

    // debug
    /* int j = 0; while (j <= 11) { for (node *tmp = table[j]; tmp != NULL; tmp = tmp->next) //делаем ноду; пока не дойдем до последней; переходим к следующей { printf("%s\n", tmp->word); //забираем значение ноды

       } printf("Bucket number: %i\n", j); j++; } */
    // printf("word count:%i\n", word_count);
    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int
size(void)
{
    return word_count;
}

// Unloads dictionary from memory, returning true if successful, else false
#if ! FASTALLOC
bool
unload(void)
{

    // TODO
// NOTE/BUG: this goes one beyond the table end (i.e. want: i < N)
#if 0
    for (int i = 0; i <= N; i++) {
        while (table[i] != NULL) {
            node *tmp = table[i]->next; // делаем ноду и переходим на следующую ячейку

            free(table[i]);             // освобождаем текущую
            table[i] = tmp;             // начинаем list со второй ячейки
        }
    }
#else
    for (int i = 0;  i < N;  ++i) {
        node *cur = table[i];
        if (cur == NULL)
            continue;

        node *next;

        for (;  cur != NULL;  cur = next) {
            next = cur->next;
            free(cur);
        }
#endif

    return true;
}
#endif

// Unloads dictionary from memory, returning true if successful, else false
#if FASTALLOC
bool
unload(void)
{
    node *cur;
    node *next;

    // remove all nodes from the table
    for (int i = 0;  i < N;  ++i) {
        cur = table[i];
        if (cur == NULL)
            continue;

        // put all nodes in hash table back on [cached] free list
        for (;  cur != NULL;  cur = next) {
            next = cur->next;
            cur->next = freelist;
            freelist = cur;
        }

        table[i] = NULL;
    }

    cur = freelist;
    freelist = NULL;

    // trim free list to allocation/arena heads
    for (;  cur != NULL;  cur = next) {
        next = cur->next;
        if (cur->alloc) {
            cur->next = freelist;
            freelist = cur;
        }
    }

    // free the allocated nodes
    cur = freelist;
    freelist = NULL;
    for (;  cur != NULL;  cur = next) {
        next = cur->next;
        free(cur);
    }

    return true;
}
#endif

这是上面的清理版本（我运行通过 unifdef）：

    // Implements a dictionary's functionality

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>

//#include "dictionary.h"

// NOTE/BUG: dictionary.h was _not_ posted, so we have to guess
#define LENGTH      20

// Represents a node in a hash table
typedef struct node {
    char word[LENGTH + 1];
    struct node *next;
    unsigned int hash;
    int alloc;
} node;

//Prototypes
unsigned int hash(const char *word);

// TODO: Choose number of buckets in hash table
// odd prime number bigger than word count, because math
enum {
    N = 187751
};

// Hash table
node *table[N];                         // may need to use calloc

//word count
int word_count = 0;

// Returns true if word is in dictionary, else false
bool
check(const char *word)
{
    // TODO
    // debug
    // int hashcode = hash(word);
    // printf("bucket: %i, word: %s\n", hash(word), table[hash(word)]->word);

    unsigned int hval = hash(word);

    // iterate through bucket
    for (node *tmp = table[hval]; tmp != NULL; tmp = tmp->next) {
        // check hash value first -- limits number of str*cmp calls
        if (tmp->hash != hval)
            continue;

        // compare strings
        if (strcmp(tmp->word, word) == 0)
            return true;
    }

    return false;
}

// Hashes word to a number
unsigned int
hash(const char *word)
{
    // TODO: Improve this hash function
    // Used Data Structures youtube course of Dr. Rob Edwards from San Diego State University mostly
    int hash = 0;

    /* int w = 0; //convert to upper case while (word[w] != '[=12=]') { word[w] = toupper(word[w]); w++; } */

    for (int chr = *word++;  chr != 0;  chr = *word++)
        hash = (31 * hash + chr) % N;   // 31 because math

    return hash;
}

node *freelist = NULL;
int freecount = 0;

// adjust this value to suit
#define ARENASIZE       1000

// newnode -- allocate from pool
node *
newnode(void)
{
    node *cur;

    while (1) {
        // get node from free list cache
        cur = freelist;

        // use the cached node if possible
        if (cur != NULL) {
            freelist = cur->next;
            break;
        }

        // allocate a new arena
        freelist = malloc(sizeof(node) * ARENASIZE);

        // out of memory
        if (freelist == NULL)
            break;

        // link all nodes in the arena
        node *prev = NULL;
        for (cur = freelist;  cur < &freelist[ARENASIZE];  ++cur) {
            cur->alloc = 0;
            cur->next = prev;
            prev = cur;
        }

        // mark the arena head
        freelist->alloc = 1;
    }

    return cur;
}

// Loads dictionary into memory, returning true if successful, else false
bool
load(const char *dictionary)
{
    // TODO
    // open dictionary file
    FILE *dict = fopen(dictionary, "r");

    if (dict == NULL) {
        // printf("Could not open file.\n");
        return false;
    }
    // create temp string for word
// NOTE/BUG: no need to malloc this (and it is slightly slower)
    char word[LENGTH + 1];

    // read words from dictionary and write to hash table
// NOTE/BUG: this scanf is wrong (and may loop too much)
    while (fscanf(dict, "%s", word) == 1) {
        // node *tmp_word = NULL;
        node *n = newnode();
        if (n == NULL)
            return false;

// NOTE/FIX: convert to uppercase _once_
        int idx;
        for (idx = 0;  word[idx] != 0;  ++idx)
            n->word[idx] = toupper(word[idx] & 0xFF);
        n->word[idx] = 0;

        // run hash to find the bucket in table
// NOTE/BUG: pos should be unsigned
        unsigned int pos = hash(n->word);

        n->hash = pos;

// NOTE/BUG: no need to check table[pos]
        n->next = table[pos];

        table[pos] = n;                 // write node to table
        word_count++;                   // count new word
    }

    fclose(dict);

    // debug
    /* int j = 0; while (j <= 11) { for (node *tmp = table[j]; tmp != NULL; tmp = tmp->next) //делаем ноду; пока не дойдем до последней; переходим к следующей { printf("%s\n", tmp->word); //забираем значение ноды

       } printf("Bucket number: %i\n", j); j++; } */
    // printf("word count:%i\n", word_count);
    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int
size(void)
{
    return word_count;
}

// Unloads dictionary from memory, returning true if successful, else false
bool
unload(void)
{
    node *cur;
    node *next;

    // remove all nodes from the table
    for (int i = 0;  i < N;  ++i) {
        cur = table[i];
        if (cur == NULL)
            continue;

        // put all nodes in hash table back on [cached] free list
        for (;  cur != NULL;  cur = next) {
            next = cur->next;
            cur->next = freelist;
            freelist = cur;
        }

        table[i] = NULL;
    }

    cur = freelist;
    freelist = NULL;

    // trim free list to allocation/arena heads
    for (;  cur != NULL;  cur = next) {
        next = cur->next;
        if (cur->alloc) {
            cur->next = freelist;
            freelist = cur;
        }
    }

    // free the allocated nodes
    cur = freelist;
    freelist = NULL;
    for (;  cur != NULL;  cur = next) {
        next = cur->next;
        free(cur);
    }

    return true;
}

更新#2：

I've already submitted my version. Just trying to understand the subject better. – meotet

好的，因此，我制作了一个最终版本，这是我对速度的最大努力。如果你还在努力，我做了更多的重构。

注意事项...

为每个节点做一个 single malloc 是浪费。 malloc 比较慢。请注意，在您的基准计时中，主要是 load 阶段比员工实施慢。

此外，还有一个 hidden 结构，malloc 将它放在内存中的指针 returns 之前，用于跟踪拨款。此结构占用 space，对于像节点结构这样的小分配，额外的内存开销可能很大。

最好调用 malloc 并在连续块或数组中分配大量 node 结构。进一步的分配可以来自该块内。这类似于“平板分配”、“对象池”或“内存池”：

在下面的代码中，节点以 1000 个为一组进行分配。这将 malloc 调用的次数从 100,000 次减少到大约 100 次。这个版本比我的上一个分配器提高了速度，因为它使用了一个额外的“段”（又名“竞技场”）结构来控制分配。它不再需要将每个节点结构预链接到“空闲列表”或跟踪节点是否是从 malloc.

接收到的给定内存片段中的第一个节点。

看字典不用toupper/tolower。因为字典保证运行全部小写，每行只有一个字，所以用fgets看字典速度更快。我们可以将字典单词直接读入给定 node 结构的 word 字段 [if 我们做节点在执行 fgets].

之前进行分配

在计算散列时，我们不需要在每次迭代中都执行 % N。由于模运算的性质，我们可以在最后做一次取余运算一次。

我们还可以将 hash 值存储到 node 结构中。然后，在 check 中，我们可以比较哈希值并跳过一些 [expensive] strcmp 调用。在您的代码中，由于 N 太大，大多数哈希桶只有一两个条目，因此这种加速可能影响不大 [并且可能会稍微慢一些]。

在 check 中，最好将参数 word 复制到一个临时缓冲区中，在我们进行一次时小写。然后，我们可以使用较快的 strcmp 而不是较慢的 strcasecmp。无论我们是否将哈希值存储在节点结构中，这都有效。同样，由于 N 值较大，此加速效果可能有限。

无论如何，这里是重构代码：

// fix2.c -- Implements a dictionary's functionality

#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>

#include "dictionary.h"

#if _USE_ZPRT_
#define dbgprt(_fmt...)             printf(_fmt)
#else
#define dbgprt(_fmt...)             do { } while (0)
#endif

#ifndef NODEHASH
#define NODEHASH    1
#endif

#ifndef FASTNODE
#define FASTNODE    1
#endif

// Represents a node in a hash table
typedef struct node {
    char word[LENGTH + 1 + 1];
    struct node *next;
#if NODEHASH
    unsigned int hash;
#endif
} node;

//Prototypes
unsigned int hash(const char *word);

// TODO: Choose number of buckets in hash table
// odd prime number bigger than word count, because math
#ifndef N
#define N 187751
#endif

// Hash table
node *table[N];                         // may need to use calloc

// word count
int word_count = 0;

// 31 because math
// modulus only really needed to clip hash to table index
// so do it _once_ by caller of hash/hash2
#if 0
#define HASH \
    hash = (31 * hash + chr) % N
#else
#define HASH \
    hash = (31 * hash) + chr
#endif

// Hashes word to a number and copies lowercase chars to output buffer
unsigned int
hash2(char *dest,const char *word)
{
    unsigned int hash = 0;

    for (int chr = *word++;  chr != 0;  chr = *word++) {
        chr = tolower((unsigned char) chr);
        HASH;
        *dest++ = chr;
    }

    *dest = 0;

    return hash;
}

// Returns true if word is in dictionary, else false
bool
check(const char *word)
{
    char tmp[LENGTH + 10];

    unsigned int hval = hash2(tmp,word);
    unsigned int hidx = hval % N;

    // iterate through bucket
    for (node *cur = table[hidx];  cur != NULL;  cur = cur->next) {
        // check hash value first -- limits number of str*cmp calls
#if NODEHASH
        if (cur->hash != hval)
            continue;
#endif

        if (strcmp(cur->word, tmp) == 0)
            return true;
    }

    return false;
}

// Hashes word to a number
unsigned int
hash(const char *word)
{
    unsigned int hash = 0;

    for (int chr = *word++;  chr != 0;  chr = *word++)
        HASH;

    return hash;
}

// adjust this value to suit
#ifndef ARENASIZE
#define ARENASIZE       1000
#endif

typedef struct seg seg_t;
struct seg {
    seg_t *seg_next;                // next segment
    int seg_count;                  // number of used nodes in this segment
    node seg_node[ARENASIZE];       // nodes in this segment
};

node *nodelist;                     // list of freed nodes
seg_t *seglist;                     // head of linked list of segments

// newnode -- allocate from pool
node *
newnode(void)
{
    seg_t *seg;
    node *cur;

    while (1) {
        // get node from free node cache
        // use the cached node if possible
        // this only happens if freenode were called [which it isn't]
        cur = nodelist;
        if (cur != NULL) {
            nodelist = cur->next;
            break;
        }

        // try to allocate from current segment
        seg = seglist;
        if (seg != NULL) {
            if (seg->seg_count < ARENASIZE) {
                cur = &seg->seg_node[seg->seg_count++];
                break;
            }
        }

        // allocate a new segment
        seg = malloc(sizeof(*seg));

        // out of memory
        if (seg == NULL)
            break;

        // mark segment as completely unused
        seg->seg_count = 0;

        // attach to segment list
        seg->seg_next = seglist;
        seglist = seg;
    }

    return cur;
}

// freenode -- free a node
void
freenode(node *cur)
{

#if FASTNODE
    cur->next = nodelist;
    nodelist = cur;
#else
    free(cur);
#endif
}

// segunload -- release all segments
void
segunload(void)
{
    seg_t *seg;
    seg_t *next;

    seg = seglist;
    seglist = NULL;

    for (;  seg != NULL;  seg = next) {
        next = seg->seg_next;
        free(seg);
    }
}

// Unloads dictionary from memory, returning true if successful, else false
bool
unload(void)
{
    node *cur;

    // remove all nodes from the table
    for (int i = 0;  i < N;  ++i) {
        cur = table[i];

        // put all nodes in hash table back on [cached] free list
        // NOTE: not really necessary
#if FASTNODE
        word_count = 0;
        cur = NULL;
#else
        node *next;
        for (;  cur != NULL;  cur = next, --word_count) {
            next = cur->next;
            freenode(cur);
        }
#endif

        table[i] = cur;
    }

    segunload();

    return true;
}

// Loads dictionary into memory, returning true if successful, else false
bool
load(const char *dictionary)
{

    // open dictionary file
    FILE *dict = fopen(dictionary, "r");
    if (dict == NULL)
        return false;

    // read words from dictionary and write to hash table
    while (1) {
#if FASTNODE
        node *n = newnode();
#else
        node *n = malloc(sizeof(*n));
#endif
        if (n == NULL)
            return false;

        char *word = n->word;

        if (fgets(word,sizeof(n->word),dict) == NULL) {
            freenode(n);
            break;
        }

        // strip newline
#if 0
        word += strlen(word);
        if (--word >= n->word) {
            if (*word == '\n');
                *word = 0;
        }
#else
        word = strchr(word,'\n');
        if (word != NULL)
            *word = 0;
#endif

        // run hash to find the bucket in table
        unsigned int pos = hash(n->word);

#if NODEHASH
        n->hash = pos;
#endif

        // put node in hash table
        pos %= N;
        n->next = table[pos];
        table[pos] = n;

        word_count++;
    }

    fclose(dict);

    // DEBUG: show the bucket counts
#if _USE_ZPRT_
    node *cur;
    int mincount = 0;
    int maxcount = 0;
    int buckcount = 0;
    int totcount = 0;

    for (int i = 0;  i < N;  ++i) {
        cur = table[i];
        if (cur == NULL)
            continue;

        int count = 0;
        for (;  cur != NULL;  cur = cur->next)
            ++count;

        printf("load: BUCKET/%d count=%d\n",i,count);

        if (i == 0) {
            mincount = count;
            maxcount = count;
        }

        if (count < mincount)
            mincount = count;
        if (count > maxcount)
            maxcount = count;
        totcount += count;

        ++buckcount;
    }

    printf("load: TOTAL buckcnt=%d/%d mincount=%d maxcount=%d avgcount=%d/%d\n",
        buckcount,N,mincount,maxcount,totcount / buckcount,totcount / N);
#endif

    return true;
}

// Returns number of words in dictionary if loaded, else 0 if not yet loaded
unsigned int
size(void)
{
    return word_count;
}

cs50 pset5 拼写优化

cs50 pset5 Speller optimisation

c

performance

hashtable

linked-list

cs50