在 c 中重新散列线性探测散列 table

Rehashing in a linear probe hash table in c

为了学习散列,我尝试制作一个散列 table,其中散列是通过线性探测完成的。每当负载因子 - alpha(已填充 buckets/total 桶)超过 0.75 时,我都会增加 table 的大小。以下是相同的代码。但是当我执行它时,程序在中间停止了。 令人困惑的部分是,有时 table 的大小调整发生在多个步骤中,而其他时候则不会。调整大小是在重新散列函数中完成的。要求初始 table 大小为 31 并上升到 8191。

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#define null 0
#define occupied 1
#define deleted 2
#define maxLen 8191

typedef struct cell
{
    int key;
    short flag;
} Cell;

void insert(Cell *a, Cell* copy, int key);
int search(Cell* a, int key);
void delete(Cell* a, int key);
void rehash(Cell* a, Cell* copy);
int nextPrime(int n);
int isPrime(int n);

int buckets = 31;
int filled = 0;
float alpha = 0.0;

int main()
{
    int i, total, val;
    printf("\nHashing with linear probing\n");
    printf("_______________________________________________________________\n\n");

    Cell *hashTable, *copy;
    hashTable = malloc(buckets * sizeof(Cell));
    for (i = 0; i < buckets; i++)
    {
        hashTable[i].key = -100;
        hashTable[i].flag = null;
    }

    printf("Initially:\n");
    printf("1. Number of buckets = %d\n",buckets);
    printf("2. Load factor (alpha) = %4.2f\n", alpha);
    printf("_______________________________________________________________\n\n");
    printf("Enter the number of values to be hashed in the table.\n");
    scanf("%d",&total);

    //Peripheral routines to hash random integers into hash table
    srand(time(NULL));

    for (i = 0; i < total; i++)
    {
        int temp = rand() % maxLen;
        //printf("%4d  ",i);
        insert(hashTable, copy, temp);
        //printf("buckets = %4d  filled = %4d  alpha = %4.2f\n\n",buckets, filled, alpha);
    }
    printf("\n");
    printf("Following is a list of operation and the respective commands:\n\n");
    printf(" 1. Insert - i\n 2. Search - s\n 3. Delete - d\n 4. Exit - e\n\n");
    char option;
    while(1)
    {
        scanf("%c",&option);
        if (option == 'e')
        {
            break;
        }
        else if (option == 'i')
        {
            printf("Please enter the value to be inserted : ");
            scanf("%d",&val);
            insert(hashTable, copy, val);
            printf("\n");
        }
        else if (option == 's')
        {
            printf("Please enter the value to be searched : ");
            scanf("%d",&val);
            search(hashTable, val);
            printf("\n");
        }
        else if (option == 'd')
        {
            printf("Please enter the value to be deleted : ");
            scanf("%d",&val);
            delete(hashTable, val);
            printf("\n");
        }
    }
    free(hashTable);
    return(0);
}

void insert(Cell *a, Cell* copy, int key)    // Method of linear probing
{
    int lt, rt, shift, hashKey;
    alpha = (float)(filled + 1)/buckets;
    if (alpha >= 0.75) { rehash(a, copy); }

    lt = hashKey = key % buckets;
    printf("Key = %4d  Hashed at = %4d  ", key, lt);

    while (a[hashKey].flag == occupied)
    {
        if (a[hashKey].key == key)
        {
            printf("Key already present in table at %4d.\n",hashKey);
            return;
        }
        hashKey = (hashKey + 1) % buckets;
    }
    rt = hashKey;
    a[hashKey].key = key;
    a[hashKey].flag = occupied;
    if (rt >= lt) shift = rt - lt;
    else          shift = buckets - lt + rt;

    printf("Placed at = %4d  Shift = %4d\n", rt, shift);
    filled++;
    alpha = (float)filled/buckets;
    return;
}

int search(Cell* a, int key)
{
    int hashKey = key % buckets;
    while (a[hashKey].flag != null)
    {
        if (a[hashKey].key == key)
        {
            if (a[hashKey].flag == occupied)
            {
                printf("Element found in table at position %d.\n", hashKey);
                return hashKey;
            }
            else
            {
                printf("Element not found in table.\n");
                return -1;
            }
        }
        hashKey = (hashKey + 1)% buckets;
    }
    printf("Element not found in table.\n");
    return -1;
}

void delete(Cell* a, int key)
{
    int hashKey = search(a, key);
    if (hashKey == -1) return;
    a[hashKey].flag = deleted;
    filled--;
    alpha = (float)filled/buckets;
    printf("Element has been deleted from the table.\n");
    return;
}

void rehash(Cell* a, Cell* copy)
{
    if (buckets == maxLen)
    {
        printf("Table size cannot exceed 8191.\n");
        return;
    }
    int i = 0, temp = 0, count = 0, num = buckets, hashKey;
    buckets = nextPrime(2*num);
    printf("_____________________________________________________________\n\n");
    printf("Due to load factor(alpha) exceeding 0.75, table resized to %d\n\n",buckets);

    copy = malloc(buckets * sizeof(Cell));
    for (i = 0; i < buckets; i++)
    {
        copy[i].key = -100;
        copy[i].flag = null;
    }

    for (i = 0; i < num; i++)
    {
        if(a[i].flag != occupied)
        {
            //printf("Unoccupied: \n");
            //printf("   a: index = %4d  key = %4d  flag = %4d  count = %4d\n",i,a[i].key, a[i].flag, count);
            //printf("copy: index = %4d  key = %4d  flag = %4d  count = %4d\n\n",hashKey,copy[i].key, copy[i].flag, count);
            continue;
        }
        temp = a[i].key;
        hashKey = temp % buckets;
        while (copy[hashKey].flag == occupied)
        {
            hashKey = (hashKey + 1) % buckets;
        }
        copy[hashKey].key = temp;
        copy[hashKey].flag = occupied;
        count++;
        //printf("Occupied: \n");
        //printf("   a: index = %4d  key = %4d  flag = %4d  count = %4d\n", i, a[i].key, a[i].flag, count);
        //printf("copy: index = %4d  key = %4d  flag = %4d  count = %4d\n\n", hashKey, copy[hashKey].key, copy[hashKey].flag, count);
    }
    free(a);
    a = malloc(buckets * sizeof(Cell));
    for (i = 0; i < buckets; i++)
    {
        a[i].key = -100;
        a[i].flag = null;
    }
    a = copy;
    free(copy);
    filled = count;
    alpha = (float)filled/buckets;
}

int nextPrime(int n)
{
    int num = n+1;  // parameter n is even, hence +1 to make it odd.
    while(!isPrime(num)) num += 2;
    return num;
}

int isPrime(int n)
{
    int i = 0;
    int a = sqrt(n);
    for (i = 3; i <= a; i++)
        if (n % i == 0) return 0;
    return 1;
}

当你rehash时,你将旧数组和新数组作为指针传递。您还在函数的末尾做了一些相当奇怪的事情:

void rehash(Cell* a, Cell* copy)
{
    // determine new bucket size

    copy = malloc(buckets * sizeof(Cell));
    // copy entries

    free(a);
    a = malloc(buckets * sizeof(Cell));     // (a)
    for (i = 0; i < buckets; i++)
    {
        a[i].key = -100;
        a[i].flag = null;
    }
    a = copy;
    free(copy);     // (b)
}

先看结尾吧。在 (a) 处,您分配 space,初始化所有内容,然后立即用副本覆盖 a,实际上放弃了新内存的句柄。 (另外,为什么要在最后初始化?那是复制和粘贴剩下的吗?)跳过分配和初始化。

接下来,您将 copy 的句柄分配给 a。这些阵列现在是相同的。当您在 (b) 处释放它们时,您同时释放了它们。

摆脱free(a);

之后的一切

因为您已将散列数组作为指针传递,所以调用函数无法知道您所做的更改。在更改数组内容的函数中,传递一个指针就足够了。但是 malloc 改变了指针本身,所以至少 copy 应该是指向指针的指针。更好的是,您可以只传递原始数组:

void rehash(Cell **a)
{
    // determine new bucket size

    Cell *copy = malloc(buckets * sizeof(Cell));
    // copy entries

    free(*a);
    *a = copy;      // now the new array is in effect
}

或者,您可以 return 新指针。 (您可以使用 z malloced 内存安全地执行此操作,但不能使用本地数组。)因此您的函数可能如下所示:

Cell *rehash(const Cell *a)
{
    // determine new bucket size

    copy = malloc(buckets * sizeof(Cell));
    // copy entries

    free(a);
    return copy;
}

并这样称呼它:

a = rehash(a);

rehash函数之外不需要copy,所以你不应该把它传递给insert,例如。

终于,我做对了。除了上述答案的帮助外,另一件需要注意的事情是重新散列的调用是从插入函数给出的,在这种情况下,新的(更大的尺寸)table 被 returned 到插入功能。但是 insert 函数对 main 函数没有 return 任何东西。因此,为了解决这个问题,我遇到了以下三种方法:

  1. 如上所述,传递给 void 函数的参数应该 是指向指针的指针,因为这里涉及 malloc() 函数。
  2. 在不涉及插入的情况下从main调用rehash函数 功能。
  3. 让插入函数也return从table收到的新散列 重新哈希函数。

下面是第三种方法的代码。

#include<stdio.h>
#include<stdlib.h>
#include<math.h>
#define null 0
#define occupied 1
#define deleted 2
#define maxLen 8191

typedef struct cell
{
    int key;
    short flag;
} Cell;

void insert(Cell *a, int key);
int search(Cell *a, int key);
void delete(Cell *a, int key);
Cell* rehash(Cell *a);
int nextPrime(int n);
int isPrime(int n);

int buckets = 31;
int filled = 0;
float alpha = 0.0;

int main()
{
    int i, total, val;
    printf("\nHashing with linear probing\n");
    printf("_______________________________________________________________\n\n");

    Cell *hashTable;
    hashTable = (Cell*)calloc(buckets, sizeof(Cell));


    printf("Initially:\n");
    printf("1. Number of buckets = %d\n",buckets);
    printf("2. Load factor (alpha) = %4.2f\n", alpha);
    printf("_______________________________________________________________\n\n");
    printf("Enter the number of values to be hashed in the table.\n");
    scanf("%d",&total);

    //Peripheral routines to hash random integers into hash table
    srand(time(NULL));

    for (i = 0; i < total; i++)
    {
        int temp = rand() % maxLen;
        //printf("%4d  ",i);

        alpha = (float)(filled + 1)/buckets;

        //Rehashing in case of alpha >= 0.75
        if (alpha >= 0.75) { hashTable = rehash(hashTable); }
        insert(hashTable, temp);
    }
    printf("\n");
    printf("Following is a list of operation and the respective commands:\n\n");
    printf(" 1. Insert - i\n 2. Search - s\n 3. Delete - d\n 4. Exit - e\n\n");
    char option;
    while(1)
    {
        scanf("%c",&option);
        if (option == 'e')
        {
            break;
        }
        else if (option == 'i')
        {
            alpha = (float)(filled + 1)/buckets;

            //Rehashing in case of alpha >= 0.75
            if (alpha >= 0.75) { hashTable = rehash(hashTable); }
            printf("Please enter the value to be inserted : ");
            scanf("%d",&val);
            insert(hashTable, val);
            printf("\n");
        }
        else if (option == 's')
        {
            printf("Please enter the value to be searched : ");
            scanf("%d",&val);
            search(hashTable, val);
            printf("\n");
        }
        else if (option == 'd')
        {
            printf("Please enter the value to be deleted : ");
            scanf("%d",&val);
            delete(hashTable, val);
            printf("\n");
        }
    }
    free(hashTable);
    return(0);
}

void insert(Cell *a, int key)    // Method of linear probing
{
    int lt, rt, shift, hashKey;
    lt = hashKey = key % buckets;
    printf("Key = %4d  Hashed at = %4d  ", key, lt);

    while (a[hashKey].flag == occupied)
    {
        if (a[hashKey].key == key)
        {
            printf("Key already present in table at %4d.\n",hashKey);
            return;
        }
        hashKey = (hashKey + 1) % buckets;
    }
    rt = hashKey;
    a[hashKey].key = key;
    a[hashKey].flag = occupied;
    if (rt >= lt) shift = rt - lt;
    else          shift = buckets - lt + rt;

    printf("Placed at = %4d  Shift = %4d\n", rt, shift);
    filled++;
    alpha = (float)filled/buckets;
    return;
}

int search(Cell *a, int key)
{
    int hashKey = key % buckets;
    while (a[hashKey].flag != null)
    {
        if (a[hashKey].key == key)
        {
            if (a[hashKey].flag == occupied)
            {
                printf("Element found in table at position %d.\n", hashKey);
                return hashKey;
            }
            else
            {
                printf("Element not found in table.\n");
                return -1;
            }
        }
        hashKey = (hashKey + 1)% buckets;
    }
    printf("Element not found in table.\n");
    return -1;
}

void delete(Cell *a, int key)
{
    int hashKey = search(a, key);
    if (hashKey == -1) return;
    a[hashKey].flag = deleted;
    filled--;
    alpha = (float)filled/buckets;
    printf("Element has been deleted from the table.\n");
    return;
}

Cell* rehash(Cell* a)
{
    if (buckets == maxLen)
    {
        printf("Table size cannot exceed 8191.\n");
        return;
    }
    int num = buckets;
    buckets = nextPrime(2*num);
    printf("_____________________________________________________________\n\n");
    printf("Due to load factor(alpha) exceeding 0.75, table resized to %d\n\n",buckets);

    int i = 0, temp = 0, count = 0;
    Cell* copy;
    copy = (Cell*)calloc(buckets, sizeof(Cell));
    if (copy == NULL) {
        perror("Failed calloc() for copy");
        exit(1);
    }

    for (i = 0; i < num; i++)
    {
        if(a[i].flag != occupied) continue;

        temp = a[i].key;
        insert(copy, temp);
        count++;
    }
    free(a);

    filled = count;
    alpha = (float)filled/buckets;
    return copy;
}

int nextPrime(int n)
{
    int num = n+1;  // parameter n is even, hence +1 to make it odd.
    while(!isPrime(num)) num += 2;
    return num;
}

int isPrime(int n)
{
    int i = 0;
    int a = sqrt(n);
    for (i = 3; i <= a; i++)
        if (n % i == 0) return 0;
    return 1;
}