需要帮助在 c 中重新散列哈希表

need help rehashing a hashtable in c

我想通过为新的 table 分配 space,遍历旧的 table,并为每个元素计算一个 新的散列值然后 link 把它变成新的 table。我将 linked 列表作为散列 table 的条目,因此第二个 for 循环同时遍历旧散列 table。我也想释放旧的 table,但首先要正确地将元素放入新的 table。

我需要帮助,我在遍历旧table时哪里出错了?我也可以在最后将原来的 ht 指向 newht 吗?之后我还需要释放旧的 table(prevtable),稍后我会弄清楚。

typedef struct hashtable {
    htentry_ptr *table;         /*<< a pointer to the underlying table        */
    unsigned int size;          /*<< the current size of the underlying table */
    unsigned int num_entries;   /*<< the current number of entries            */
    float max_loadfactor;       /*<< the maximum load factor before the
                                 *   underlying table is resized              */
    unsigned short idx;         /*<< the index into the delta array           */
    unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function   */
    int (*cmp)(void *, void *);         /*<< a pointer to the comparison
                                         *   function                         */
} hashtable_t;

重新哈希函数如下所示

static void rehash(hashtab_ptr ht)
{
    hashtab_ptr prevtable;
    /* store reference to the old table */
    prevtable->table = ht->table;
    htentry_ptr p;
    unsigned int i;
    unsigned int newidx;
    printf("\nrehashing\n");
    ht->size = getsize(prevtable);
    printf("\nnew table size %d\n", ht->size);
    ht->table = calloc(ht->size , sizeof(htentry_t));
    for (i = 0; i < prevtable->size; i++) {
        for (p = prevtable->table[i]; p; p = p->next_ptr) {
            newidx = ht->hash(p->key, ht->size);
            if(ht->table[newidx]){
                htentry_ptr next;
                htentry_ptr prev = NULL;
                next = ht->table[newidx];
                printf("\ncollision adding to linked list\n");
                while (next) {
                    prev = next;
                    next = next->next_ptr;
                }
                prev->next_ptr = p;
                p->next_ptr = NULL;
            } else {
                ht->table[newidx] = p; 
                ht->table[newidx]->next_ptr = NULL;
                ht->num_entries++;
            }
        }
    }
}

正在插入哈希table。当 table 变得太密集时,将在插入结束时调用 rehash 函数。

int ht_insert(hashtab_ptr ht, void *key, void *value)
{
/* key is the id of the variable like num1 and value is number 
index = value 
*/

unsigned int N = ht->size;
unsigned int ne;
float current_loadfactor;
int k;
htentry_ptr p;
p = calloc(1,sizeof(htentry_t));
p->key = key;
p->value = value;
k = ht->hash(key, ht->size);
if(ht->table[k]){
    htentry_ptr next;
    htentry_ptr prev = NULL;
    /* theres already something in the index*/
    next = ht->table[k];
    printf("\ncollision adding to linked list");
    while (next) {
        prev = next;
        next = next->next_ptr;
    }
    ht->num_entries++;
    prev->next_ptr = p;
    p->next_ptr = NULL;
} else {
    ht->table[k] = p;
    ht->table[k]->next_ptr = NULL; 
    ht->num_entries++;
}
ne = ht->num_entries;
current_loadfactor = ne / N;
if (current_loadfactor > ht->max_loadfactor) {
    rehash(ht);
}

Also can I just point the original ht to the newht at the end?

没有

指针 ht 是本地函数堆栈上的副本。使用 ht = newht; 更改值只会更改副本。

最简单的解决方案是让您的 rehash() 函数 return 指向新散列的指针table。

static hashtab_ptr rehash(hashtab_ptr ht)
{
  [...]
  return newht;
}

然后你可以这样称呼它:

current_ht = rehash(current_ht);

第二种解决方案是更改原型以传递双指针:

static void rehash(hashtab_ptr *ht)
{
  [...]
  *ht = newht;
}

这意味着您需要更改 ht 在 rehash() 函数中各处的使用,以反映它现在是双指针。

第三个解决方案是不创建新的 hashtable_t,而只是创建一个新的 htentry_ptr *table 区域并更新 ht; 中的值这将是我最喜欢的解决方案代码审查。

I need help, where am I going wrong in traversing the old table?

  while (next)
  {
    prev = next;
    next = next->next_ptr;
    newht->num_entries++;
  }

newht->num_entries++; 放错地方了。当您寻找链表的末尾时,已经存在的元素不会增加您的散列 table 的大小。您可以将表达式 newht->num_entries++; 从两个 if/else 中移出 - 无论是否存在碰撞,您的 table 都会增加一个。

其次,在链表循环结束时它看起来像这样:

prev = [last_element of linked list];
next = null;
prev->next_ptr = old_element;

但是..old_element->next_ptr指向哪里?不能保证它为空。 所以你需要添加 p->next_ptr = NULL; 以便以前不在碰撞结束而现在在碰撞结束的元素正确结束链表。

问题是您不能只执行 p->next_ptr = NULL;,因为这样您的循环就认为它已经结束了。当链表中间的链表元素被重新分配给新哈希 table 中的新索引时,您的概念就搞砸了。元素不能同时具有 next_ptr 中新旧 table 的正确值。

所以,有两种解决方案:
a) 向后遍历您的冲突列表,但由于这看起来是一个单链表,因此将元素放入堆栈是一个非常痛苦的过程。
b) 通过创建新元素而不是尝试重用旧元素来重新散列 table。

编辑:

好的,有了插入函数,rehash 函数看起来像这样(又快又脏):

static hashtab_ptr rehash(hashtab_ptr ht)
{
    hashtab_ptr prevtable = ht;
    hashtab_ptr newht;
    htentry_ptr p;
    unsigned int i;
    unsigned int newidx;

    printf("\nrehashing");

    newht->idx  = prevtable->idx + 1;
    newht->size = getsize(prevtable);
    newht->num_entries = 0;
    newht->hash = prevtable->hash;
    newht->cmp  = prevtable->cmp;

    newht->max_loadfactor = prevtable->max_loadfactor;

    newht->table = calloc(newht->size , sizeof(htentry_t));

    for (i = 0; i < ht->size; i++) {
      for (p = ht->table[i]; p; p = p->next_ptr) {
        ht_insert(newht, p->key, p->value);
    }

    return newht;
   }

那么你应该有一个释放散列的函数table,所以你最终使用它:

if (current_loadfactor > ht->max_loadfactor) {
    hashtab_ptr tempht = ht;
    ht = rehash(ht);
    ht_delete(tempht);
}

这是为了表明:

  • 您只需要重新分配 table[] 成员,而不是信封
  • 指向指针的指针可以简化事情
  • 将一个元素从旧的 table 移动到新的元素时,请注意不要损坏它的下一个指针

[注意:我删除了类型定义,因为我讨厌它们...]


#include <stdio.h>
#include <stdlib.h>

struct hashentry {
    struct hashentry *next;
    char *key;
    void *payload;
    };
struct hashtable {
    struct hashentry **table;   /*<< a pointer to array of pointers        */
    unsigned int size;          /*<< current size */
    unsigned int num_entries;   /*<< current number of entries    */
    float max_loadfactor;
    /* unsigned short idx;      the index into the delta array(Quoi?) */
    unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function   */
    int (*cmp)(void *, void *);         /*<< a pointer to the comparison function     */
    };

static void rehash(struct hashtable *ht);
// The rehash function could look like this
static void rehash(struct hashtable *ht)
{
    struct hashentry **newtab;
    struct hashentry **pp, **qq, *this;
    unsigned int newsize, oldidx, newidx;

    newsize = ht->size * 2; /* or something like (max_loadfactor*num_entries), rounded up */
    fprintf(stderr, "new table size %u\n", newsize);
    newtab = malloc(newsize * sizeof *newtab );

    for (newidx=0; newidx < newsize; newidx++) {
        newtab[newidx] = NULL;
        }

    for (oldidx = 0; oldidx < ht->size; oldidx++) {
        for (pp = &ht->table[oldidx]; *pp; ) {
            this = *pp;
            *pp = this->next; /* this is important ! */
            this->next = NULL; /* ... because ... */

            newidx = ht->hash(this->key, newsize);
            for(qq = &newtab[newidx]; *qq; qq = &(*qq)->next) {
                /* You could count the number of "collisions" here */
                }

            *qq = this;
        }
    }
    free(ht->table);
    ht->table = newtab;
    ht->size = newsize;
    /* The rest of the fields does not need to change */
}

我认为这可能是解决方案,但我不是 100% 确定。

static void rehash(hashtab_ptr ht)
{
        unsigned int old_size, new_size;
        unsigned int newindex;
        unsigned int i;
        htentry_ptr q, p;
        htentry_ptr *new_table;
        old_size = ht->size;
        /*gets new size in prime table */
        new_size = getsize(ht);
        new_table = malloc(sizeof(htentry_t) * new_size);
        /* nullify the new table */
        for (i = 0; i < new_size; i++) {
            new_table[i] = NULL;
        }
        printf("\n*****rehashing******\n");
        ht->size = new_size; 
        printf("%s %d\n", "new size:", new_size);
        for (i = 0; i < old_size; i++) {
            p = ht->table[i];
            while (p) {
                q = p->next_ptr;
                newindex = ht->hash(p->key, new_size);
                /*
                temp = malloc(sizeof(htentry_t));
                temp->key = p->key;
                temp->value = p->value;
                temp->next_ptr = new_table[ht->hash(temp->key, next_size)];
                new_table[ht->hash(temp->key, next_size)] = temp;
                */
                if (new_table[newindex]) {
                    p->next_ptr = new_table[newindex];
                    new_table[newindex] = p;    
                } else {
                    new_table[newindex] = p;
                    new_table[newindex]->next_ptr = NULL;
                }
                p = q;
            }
        }

    free(ht->table);
    ht->table = new_table;  
}