需要帮助在 c 中重新散列哈希表
need help rehashing a hashtable in c
我想通过为新的 table 分配 space,遍历旧的 table,并为每个元素计算一个
新的散列值然后 link 把它变成新的 table。我将 linked 列表作为散列 table 的条目,因此第二个 for 循环同时遍历旧散列 table。我也想释放旧的 table,但首先要正确地将元素放入新的 table。
我需要帮助,我在遍历旧table时哪里出错了?我也可以在最后将原来的 ht 指向 newht 吗?之后我还需要释放旧的 table(prevtable),稍后我会弄清楚。
typedef struct hashtable {
htentry_ptr *table; /*<< a pointer to the underlying table */
unsigned int size; /*<< the current size of the underlying table */
unsigned int num_entries; /*<< the current number of entries */
float max_loadfactor; /*<< the maximum load factor before the
* underlying table is resized */
unsigned short idx; /*<< the index into the delta array */
unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function */
int (*cmp)(void *, void *); /*<< a pointer to the comparison
* function */
} hashtable_t;
重新哈希函数如下所示
static void rehash(hashtab_ptr ht)
{
hashtab_ptr prevtable;
/* store reference to the old table */
prevtable->table = ht->table;
htentry_ptr p;
unsigned int i;
unsigned int newidx;
printf("\nrehashing\n");
ht->size = getsize(prevtable);
printf("\nnew table size %d\n", ht->size);
ht->table = calloc(ht->size , sizeof(htentry_t));
for (i = 0; i < prevtable->size; i++) {
for (p = prevtable->table[i]; p; p = p->next_ptr) {
newidx = ht->hash(p->key, ht->size);
if(ht->table[newidx]){
htentry_ptr next;
htentry_ptr prev = NULL;
next = ht->table[newidx];
printf("\ncollision adding to linked list\n");
while (next) {
prev = next;
next = next->next_ptr;
}
prev->next_ptr = p;
p->next_ptr = NULL;
} else {
ht->table[newidx] = p;
ht->table[newidx]->next_ptr = NULL;
ht->num_entries++;
}
}
}
}
正在插入哈希table。当 table 变得太密集时,将在插入结束时调用 rehash 函数。
int ht_insert(hashtab_ptr ht, void *key, void *value)
{
/* key is the id of the variable like num1 and value is number
index = value
*/
unsigned int N = ht->size;
unsigned int ne;
float current_loadfactor;
int k;
htentry_ptr p;
p = calloc(1,sizeof(htentry_t));
p->key = key;
p->value = value;
k = ht->hash(key, ht->size);
if(ht->table[k]){
htentry_ptr next;
htentry_ptr prev = NULL;
/* theres already something in the index*/
next = ht->table[k];
printf("\ncollision adding to linked list");
while (next) {
prev = next;
next = next->next_ptr;
}
ht->num_entries++;
prev->next_ptr = p;
p->next_ptr = NULL;
} else {
ht->table[k] = p;
ht->table[k]->next_ptr = NULL;
ht->num_entries++;
}
ne = ht->num_entries;
current_loadfactor = ne / N;
if (current_loadfactor > ht->max_loadfactor) {
rehash(ht);
}
Also can I just point the original ht to the newht at the end?
没有
指针 ht 是本地函数堆栈上的副本。使用 ht = newht;
更改值只会更改副本。
最简单的解决方案是让您的 rehash() 函数 return 指向新散列的指针table。
static hashtab_ptr rehash(hashtab_ptr ht)
{
[...]
return newht;
}
然后你可以这样称呼它:
current_ht = rehash(current_ht);
第二种解决方案是更改原型以传递双指针:
static void rehash(hashtab_ptr *ht)
{
[...]
*ht = newht;
}
这意味着您需要更改 ht 在 rehash() 函数中各处的使用,以反映它现在是双指针。
第三个解决方案是不创建新的 hashtable_t
,而只是创建一个新的 htentry_ptr *table
区域并更新 ht;
中的值这将是我最喜欢的解决方案代码审查。
I need help, where am I going wrong in traversing the old table?
while (next)
{
prev = next;
next = next->next_ptr;
newht->num_entries++;
}
newht->num_entries++;
放错地方了。当您寻找链表的末尾时,已经存在的元素不会增加您的散列 table 的大小。您可以将表达式 newht->num_entries++;
从两个 if/else 中移出 - 无论是否存在碰撞,您的 table 都会增加一个。
其次,在链表循环结束时它看起来像这样:
prev = [last_element of linked list];
next = null;
prev->next_ptr = old_element;
但是..old_element->next_ptr
指向哪里?不能保证它为空。
所以你需要添加 p->next_ptr = NULL;
以便以前不在碰撞结束而现在在碰撞结束的元素正确结束链表。
问题是您不能只执行 p->next_ptr = NULL;
,因为这样您的循环就认为它已经结束了。当链表中间的链表元素被重新分配给新哈希 table 中的新索引时,您的概念就搞砸了。元素不能同时具有 next_ptr 中新旧 table 的正确值。
所以,有两种解决方案:
a) 向后遍历您的冲突列表,但由于这看起来是一个单链表,因此将元素放入堆栈是一个非常痛苦的过程。
b) 通过创建新元素而不是尝试重用旧元素来重新散列 table。
编辑:
好的,有了插入函数,rehash 函数看起来像这样(又快又脏):
static hashtab_ptr rehash(hashtab_ptr ht)
{
hashtab_ptr prevtable = ht;
hashtab_ptr newht;
htentry_ptr p;
unsigned int i;
unsigned int newidx;
printf("\nrehashing");
newht->idx = prevtable->idx + 1;
newht->size = getsize(prevtable);
newht->num_entries = 0;
newht->hash = prevtable->hash;
newht->cmp = prevtable->cmp;
newht->max_loadfactor = prevtable->max_loadfactor;
newht->table = calloc(newht->size , sizeof(htentry_t));
for (i = 0; i < ht->size; i++) {
for (p = ht->table[i]; p; p = p->next_ptr) {
ht_insert(newht, p->key, p->value);
}
return newht;
}
那么你应该有一个释放散列的函数table,所以你最终使用它:
if (current_loadfactor > ht->max_loadfactor) {
hashtab_ptr tempht = ht;
ht = rehash(ht);
ht_delete(tempht);
}
这是为了表明:
- 您只需要重新分配 table[] 成员,而不是信封
- 指向指针的指针可以简化事情
- 将一个元素从旧的 table 移动到新的元素时,请注意不要损坏它的下一个指针
[注意:我删除了类型定义,因为我讨厌它们...]
#include <stdio.h>
#include <stdlib.h>
struct hashentry {
struct hashentry *next;
char *key;
void *payload;
};
struct hashtable {
struct hashentry **table; /*<< a pointer to array of pointers */
unsigned int size; /*<< current size */
unsigned int num_entries; /*<< current number of entries */
float max_loadfactor;
/* unsigned short idx; the index into the delta array(Quoi?) */
unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function */
int (*cmp)(void *, void *); /*<< a pointer to the comparison function */
};
static void rehash(struct hashtable *ht);
// The rehash function could look like this
static void rehash(struct hashtable *ht)
{
struct hashentry **newtab;
struct hashentry **pp, **qq, *this;
unsigned int newsize, oldidx, newidx;
newsize = ht->size * 2; /* or something like (max_loadfactor*num_entries), rounded up */
fprintf(stderr, "new table size %u\n", newsize);
newtab = malloc(newsize * sizeof *newtab );
for (newidx=0; newidx < newsize; newidx++) {
newtab[newidx] = NULL;
}
for (oldidx = 0; oldidx < ht->size; oldidx++) {
for (pp = &ht->table[oldidx]; *pp; ) {
this = *pp;
*pp = this->next; /* this is important ! */
this->next = NULL; /* ... because ... */
newidx = ht->hash(this->key, newsize);
for(qq = &newtab[newidx]; *qq; qq = &(*qq)->next) {
/* You could count the number of "collisions" here */
}
*qq = this;
}
}
free(ht->table);
ht->table = newtab;
ht->size = newsize;
/* The rest of the fields does not need to change */
}
我认为这可能是解决方案,但我不是 100% 确定。
static void rehash(hashtab_ptr ht)
{
unsigned int old_size, new_size;
unsigned int newindex;
unsigned int i;
htentry_ptr q, p;
htentry_ptr *new_table;
old_size = ht->size;
/*gets new size in prime table */
new_size = getsize(ht);
new_table = malloc(sizeof(htentry_t) * new_size);
/* nullify the new table */
for (i = 0; i < new_size; i++) {
new_table[i] = NULL;
}
printf("\n*****rehashing******\n");
ht->size = new_size;
printf("%s %d\n", "new size:", new_size);
for (i = 0; i < old_size; i++) {
p = ht->table[i];
while (p) {
q = p->next_ptr;
newindex = ht->hash(p->key, new_size);
/*
temp = malloc(sizeof(htentry_t));
temp->key = p->key;
temp->value = p->value;
temp->next_ptr = new_table[ht->hash(temp->key, next_size)];
new_table[ht->hash(temp->key, next_size)] = temp;
*/
if (new_table[newindex]) {
p->next_ptr = new_table[newindex];
new_table[newindex] = p;
} else {
new_table[newindex] = p;
new_table[newindex]->next_ptr = NULL;
}
p = q;
}
}
free(ht->table);
ht->table = new_table;
}
我想通过为新的 table 分配 space,遍历旧的 table,并为每个元素计算一个 新的散列值然后 link 把它变成新的 table。我将 linked 列表作为散列 table 的条目,因此第二个 for 循环同时遍历旧散列 table。我也想释放旧的 table,但首先要正确地将元素放入新的 table。
我需要帮助,我在遍历旧table时哪里出错了?我也可以在最后将原来的 ht 指向 newht 吗?之后我还需要释放旧的 table(prevtable),稍后我会弄清楚。
typedef struct hashtable {
htentry_ptr *table; /*<< a pointer to the underlying table */
unsigned int size; /*<< the current size of the underlying table */
unsigned int num_entries; /*<< the current number of entries */
float max_loadfactor; /*<< the maximum load factor before the
* underlying table is resized */
unsigned short idx; /*<< the index into the delta array */
unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function */
int (*cmp)(void *, void *); /*<< a pointer to the comparison
* function */
} hashtable_t;
重新哈希函数如下所示
static void rehash(hashtab_ptr ht)
{
hashtab_ptr prevtable;
/* store reference to the old table */
prevtable->table = ht->table;
htentry_ptr p;
unsigned int i;
unsigned int newidx;
printf("\nrehashing\n");
ht->size = getsize(prevtable);
printf("\nnew table size %d\n", ht->size);
ht->table = calloc(ht->size , sizeof(htentry_t));
for (i = 0; i < prevtable->size; i++) {
for (p = prevtable->table[i]; p; p = p->next_ptr) {
newidx = ht->hash(p->key, ht->size);
if(ht->table[newidx]){
htentry_ptr next;
htentry_ptr prev = NULL;
next = ht->table[newidx];
printf("\ncollision adding to linked list\n");
while (next) {
prev = next;
next = next->next_ptr;
}
prev->next_ptr = p;
p->next_ptr = NULL;
} else {
ht->table[newidx] = p;
ht->table[newidx]->next_ptr = NULL;
ht->num_entries++;
}
}
}
}
正在插入哈希table。当 table 变得太密集时,将在插入结束时调用 rehash 函数。
int ht_insert(hashtab_ptr ht, void *key, void *value)
{
/* key is the id of the variable like num1 and value is number
index = value
*/
unsigned int N = ht->size;
unsigned int ne;
float current_loadfactor;
int k;
htentry_ptr p;
p = calloc(1,sizeof(htentry_t));
p->key = key;
p->value = value;
k = ht->hash(key, ht->size);
if(ht->table[k]){
htentry_ptr next;
htentry_ptr prev = NULL;
/* theres already something in the index*/
next = ht->table[k];
printf("\ncollision adding to linked list");
while (next) {
prev = next;
next = next->next_ptr;
}
ht->num_entries++;
prev->next_ptr = p;
p->next_ptr = NULL;
} else {
ht->table[k] = p;
ht->table[k]->next_ptr = NULL;
ht->num_entries++;
}
ne = ht->num_entries;
current_loadfactor = ne / N;
if (current_loadfactor > ht->max_loadfactor) {
rehash(ht);
}
Also can I just point the original ht to the newht at the end?
没有
指针 ht 是本地函数堆栈上的副本。使用 ht = newht;
更改值只会更改副本。
最简单的解决方案是让您的 rehash() 函数 return 指向新散列的指针table。
static hashtab_ptr rehash(hashtab_ptr ht)
{
[...]
return newht;
}
然后你可以这样称呼它:
current_ht = rehash(current_ht);
第二种解决方案是更改原型以传递双指针:
static void rehash(hashtab_ptr *ht)
{
[...]
*ht = newht;
}
这意味着您需要更改 ht 在 rehash() 函数中各处的使用,以反映它现在是双指针。
第三个解决方案是不创建新的 hashtable_t
,而只是创建一个新的 htentry_ptr *table
区域并更新 ht;
中的值这将是我最喜欢的解决方案代码审查。
I need help, where am I going wrong in traversing the old table?
while (next)
{
prev = next;
next = next->next_ptr;
newht->num_entries++;
}
newht->num_entries++;
放错地方了。当您寻找链表的末尾时,已经存在的元素不会增加您的散列 table 的大小。您可以将表达式 newht->num_entries++;
从两个 if/else 中移出 - 无论是否存在碰撞,您的 table 都会增加一个。
其次,在链表循环结束时它看起来像这样:
prev = [last_element of linked list];
next = null;
prev->next_ptr = old_element;
但是..old_element->next_ptr
指向哪里?不能保证它为空。
所以你需要添加 p->next_ptr = NULL;
以便以前不在碰撞结束而现在在碰撞结束的元素正确结束链表。
问题是您不能只执行 p->next_ptr = NULL;
,因为这样您的循环就认为它已经结束了。当链表中间的链表元素被重新分配给新哈希 table 中的新索引时,您的概念就搞砸了。元素不能同时具有 next_ptr 中新旧 table 的正确值。
所以,有两种解决方案:
a) 向后遍历您的冲突列表,但由于这看起来是一个单链表,因此将元素放入堆栈是一个非常痛苦的过程。
b) 通过创建新元素而不是尝试重用旧元素来重新散列 table。
编辑:
好的,有了插入函数,rehash 函数看起来像这样(又快又脏):
static hashtab_ptr rehash(hashtab_ptr ht)
{
hashtab_ptr prevtable = ht;
hashtab_ptr newht;
htentry_ptr p;
unsigned int i;
unsigned int newidx;
printf("\nrehashing");
newht->idx = prevtable->idx + 1;
newht->size = getsize(prevtable);
newht->num_entries = 0;
newht->hash = prevtable->hash;
newht->cmp = prevtable->cmp;
newht->max_loadfactor = prevtable->max_loadfactor;
newht->table = calloc(newht->size , sizeof(htentry_t));
for (i = 0; i < ht->size; i++) {
for (p = ht->table[i]; p; p = p->next_ptr) {
ht_insert(newht, p->key, p->value);
}
return newht;
}
那么你应该有一个释放散列的函数table,所以你最终使用它:
if (current_loadfactor > ht->max_loadfactor) {
hashtab_ptr tempht = ht;
ht = rehash(ht);
ht_delete(tempht);
}
这是为了表明:
- 您只需要重新分配 table[] 成员,而不是信封
- 指向指针的指针可以简化事情
- 将一个元素从旧的 table 移动到新的元素时,请注意不要损坏它的下一个指针
[注意:我删除了类型定义,因为我讨厌它们...]
#include <stdio.h>
#include <stdlib.h>
struct hashentry {
struct hashentry *next;
char *key;
void *payload;
};
struct hashtable {
struct hashentry **table; /*<< a pointer to array of pointers */
unsigned int size; /*<< current size */
unsigned int num_entries; /*<< current number of entries */
float max_loadfactor;
/* unsigned short idx; the index into the delta array(Quoi?) */
unsigned int (*hash)(void *, unsigned int); /*<< a pointer to the hash function */
int (*cmp)(void *, void *); /*<< a pointer to the comparison function */
};
static void rehash(struct hashtable *ht);
// The rehash function could look like this
static void rehash(struct hashtable *ht)
{
struct hashentry **newtab;
struct hashentry **pp, **qq, *this;
unsigned int newsize, oldidx, newidx;
newsize = ht->size * 2; /* or something like (max_loadfactor*num_entries), rounded up */
fprintf(stderr, "new table size %u\n", newsize);
newtab = malloc(newsize * sizeof *newtab );
for (newidx=0; newidx < newsize; newidx++) {
newtab[newidx] = NULL;
}
for (oldidx = 0; oldidx < ht->size; oldidx++) {
for (pp = &ht->table[oldidx]; *pp; ) {
this = *pp;
*pp = this->next; /* this is important ! */
this->next = NULL; /* ... because ... */
newidx = ht->hash(this->key, newsize);
for(qq = &newtab[newidx]; *qq; qq = &(*qq)->next) {
/* You could count the number of "collisions" here */
}
*qq = this;
}
}
free(ht->table);
ht->table = newtab;
ht->size = newsize;
/* The rest of the fields does not need to change */
}
我认为这可能是解决方案,但我不是 100% 确定。
static void rehash(hashtab_ptr ht)
{
unsigned int old_size, new_size;
unsigned int newindex;
unsigned int i;
htentry_ptr q, p;
htentry_ptr *new_table;
old_size = ht->size;
/*gets new size in prime table */
new_size = getsize(ht);
new_table = malloc(sizeof(htentry_t) * new_size);
/* nullify the new table */
for (i = 0; i < new_size; i++) {
new_table[i] = NULL;
}
printf("\n*****rehashing******\n");
ht->size = new_size;
printf("%s %d\n", "new size:", new_size);
for (i = 0; i < old_size; i++) {
p = ht->table[i];
while (p) {
q = p->next_ptr;
newindex = ht->hash(p->key, new_size);
/*
temp = malloc(sizeof(htentry_t));
temp->key = p->key;
temp->value = p->value;
temp->next_ptr = new_table[ht->hash(temp->key, next_size)];
new_table[ht->hash(temp->key, next_size)] = temp;
*/
if (new_table[newindex]) {
p->next_ptr = new_table[newindex];
new_table[newindex] = p;
} else {
new_table[newindex] = p;
new_table[newindex]->next_ptr = NULL;
}
p = q;
}
}
free(ht->table);
ht->table = new_table;
}