大整数的小型查找 Table
Small Lookup Table for Large Integers
我想进行快速、相对较小的查找 table,但输入范围较大:
- 输入:最大 32 位值。 (一个 32 位颜色值)
-输出:最大 8 位索引。 (索引到 table)
类似于下面的代码。 (如果索引的值超过256个,索引将直接为0)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
static uint8_t getIndx(uint32_t value);
static uint8_t **indx;
static uint8_t count = 0;
int main(void) {
// set up the indx
const uint32_t size = 0xFFFF; // for demonstrative purposes not even nearly as large as wished to be (0xFFFFFFFF) plus my for loop below would get in trouble, I think
indx = (uint8_t**)malloc(sizeof(uint8_t*) * size);
if(indx == NULL) {
printf("could not allocate memory\n");
return 0;
}
for(int i = 0; i < size + 1; i++) {
indx[i] = NULL;
}
printf("%d\n", getIndx(111));
printf("%d\n", getIndx(222));
printf("%d\n", getIndx(333));
printf("%d\n", getIndx(111));
printf("%d\n", getIndx(222));
printf("%d\n", getIndx(333));
return 0;
}
static uint8_t getIndx(uint32_t value) {
if(indx[value] == NULL) {
if(count > 255) return 0;
indx[value] = (uint8_t*)malloc(sizeof(uint8_t));
*indx[value] = count;
count++;
}
return *(indx[value]);
}
输出为:
0
1
2
0
1
2
无论我怎么想,我总是以那样的方式结束。输入范围为 32 位(4294967296 个状态),我需要分配太多内存才能获得 256 个可能的输出。并形成 256 if else
,是否在 for
循环内,也不是我想要的。
是否有一些快速的方法,无论是 table 还是最终具有相同功能的方法,我还没有听说过?
非常感谢!
您可以使用散列 table 将 32 位值映射到 look-up table 索引。散列的长度 table 应明显长于 look-up table 以减少散列冲突的机会。
以下示例使用散列 table 的线性搜索,从从输入值派生的散列值开始,直到找到匹配条目或找到空散列 table。如果未找到输入值,并且 look-up table 和散列 table 中有空间(应该有,因为它比 look-up table),将输入值添加到 look-up table 并更新散列 table。它使用的散列 table 是 look-up table.
大小的四倍
该示例使用相同的 pseudo-random 序列执行两遍。第一遍应该主要填充 look-up table 并更新散列 table。第二遍不应再对 look-up table 或散列 table 进行任何更改,因为它只是重复第一个序列。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
struct hash_index {
uint8_t index;
char used;
};
#define HASHBITS 10
#define HASHSIZE (1U << HASHBITS)
#define HASHMASK (HASHSIZE - 1)
#define LOOKUPSIZE 256U
static uint8_t getIndx(uint32_t value);
static uint32_t lookup[LOOKUPSIZE];
static struct hash_index hashtab[HASHSIZE];
static unsigned int count = 0;
static unsigned int tot_collisions = 0;
static unsigned int max_collisions = 0;
static unsigned int hash_used = 0;
int main(void) {
int pass;
unsigned int i;
uint32_t value;
uint8_t index;
unsigned int successes;
unsigned int failures;
int ok;
printf("Lookup size: %u, Hash size: %u\n", LOOKUPSIZE, HASHSIZE);
for (pass = 1; pass <= 2; pass++) {
successes = 0;
failures = 0;
tot_collisions = 0;
max_collisions = 0;
/* Not resetting hash_used because it shouldn't change after first pass. */
printf("\nPass %d, currently used hashes: %u\n\n", pass, hash_used);
srand(1);
for (i = 0; i < 260; i++) {
static const char * const outcomes[2] = {"FAIL", "OK"};
value = rand();
index = getIndx(value);
ok = lookup[index] == value;
printf("%" PRIu32 " -> %" PRIu8 " (%s)\n", value, index, outcomes[ok]);
if (ok) {
successes++;
} else {
failures++;
}
}
printf("\nSuccesses: %u, Failures: %u\n", successes, failures);
printf("Used hashes: %u, Total collisions: %u, Max collisions: %u\n\n",
hash_used, tot_collisions, max_collisions);
}
return 0;
}
static uint8_t getIndx(uint32_t value) {
unsigned int initial_hash;
unsigned int hash;
unsigned int collisions = 0;
uint8_t index;
/*
* Search for value using hash table, starting at position hashed from value.
*
* The hash table is longer than the maximum number of used entries,
* so we should always be able to find an unused entry in the hash table.
*/
initial_hash = ((value * UINT32_C(0x61c88647)) >> (32 - HASHBITS)) & HASHMASK;
for (hash = initial_hash;
collisions < HASHSIZE && hashtab[hash].used;
hash = (hash + 1) & HASHMASK) {
/*
* This hash table entry is used. Get the corresponding index in the
* main lookup table to check if the value matches.
*/
index = hashtab[hash].index;
if (lookup[index] == value) {
/* Matching value found. Return its index in the main table. */
return index;
}
/* Count hash collisions and total hash collisions. */
collisions++;
tot_collisions++;
if (max_collisions < collisions) {
/* Update max hash collisions for diagnostics. */
max_collisions = collisions;
}
}
/* Value not found. */
if (count < LOOKUPSIZE && collisions < HASHSIZE) {
/*
* There is room in the main lookup table for the new value
* and room in the hash table. The index of the new value in the
* main lookup table will be the current count, which will be incremented.
*/
index = count++;
/*
* Add value to main lookup table,
* add index in main lookup table to hash table,
* and return the index in the main lookup table.
*/
lookup[index] = value;
hashtab[hash].index = index;
hashtab[hash].used = 1;
hash_used++; /* Count of used hash table entries for diagnostics. */
return index;
}
/*
* Value not found and main lookup table is full or hash table is full.
* Give up.
*/
return 0;
}
另一种处理冲突的方法是使每个散列 table 入口指向匹配值的链表,但这更复杂。
我想进行快速、相对较小的查找 table,但输入范围较大: - 输入:最大 32 位值。 (一个 32 位颜色值) -输出:最大 8 位索引。 (索引到 table)
类似于下面的代码。 (如果索引的值超过256个,索引将直接为0)
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
static uint8_t getIndx(uint32_t value);
static uint8_t **indx;
static uint8_t count = 0;
int main(void) {
// set up the indx
const uint32_t size = 0xFFFF; // for demonstrative purposes not even nearly as large as wished to be (0xFFFFFFFF) plus my for loop below would get in trouble, I think
indx = (uint8_t**)malloc(sizeof(uint8_t*) * size);
if(indx == NULL) {
printf("could not allocate memory\n");
return 0;
}
for(int i = 0; i < size + 1; i++) {
indx[i] = NULL;
}
printf("%d\n", getIndx(111));
printf("%d\n", getIndx(222));
printf("%d\n", getIndx(333));
printf("%d\n", getIndx(111));
printf("%d\n", getIndx(222));
printf("%d\n", getIndx(333));
return 0;
}
static uint8_t getIndx(uint32_t value) {
if(indx[value] == NULL) {
if(count > 255) return 0;
indx[value] = (uint8_t*)malloc(sizeof(uint8_t));
*indx[value] = count;
count++;
}
return *(indx[value]);
}
输出为:
0
1
2
0
1
2
无论我怎么想,我总是以那样的方式结束。输入范围为 32 位(4294967296 个状态),我需要分配太多内存才能获得 256 个可能的输出。并形成 256 if else
,是否在 for
循环内,也不是我想要的。
是否有一些快速的方法,无论是 table 还是最终具有相同功能的方法,我还没有听说过?
非常感谢!
您可以使用散列 table 将 32 位值映射到 look-up table 索引。散列的长度 table 应明显长于 look-up table 以减少散列冲突的机会。
以下示例使用散列 table 的线性搜索,从从输入值派生的散列值开始,直到找到匹配条目或找到空散列 table。如果未找到输入值,并且 look-up table 和散列 table 中有空间(应该有,因为它比 look-up table),将输入值添加到 look-up table 并更新散列 table。它使用的散列 table 是 look-up table.
大小的四倍该示例使用相同的 pseudo-random 序列执行两遍。第一遍应该主要填充 look-up table 并更新散列 table。第二遍不应再对 look-up table 或散列 table 进行任何更改,因为它只是重复第一个序列。
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>
struct hash_index {
uint8_t index;
char used;
};
#define HASHBITS 10
#define HASHSIZE (1U << HASHBITS)
#define HASHMASK (HASHSIZE - 1)
#define LOOKUPSIZE 256U
static uint8_t getIndx(uint32_t value);
static uint32_t lookup[LOOKUPSIZE];
static struct hash_index hashtab[HASHSIZE];
static unsigned int count = 0;
static unsigned int tot_collisions = 0;
static unsigned int max_collisions = 0;
static unsigned int hash_used = 0;
int main(void) {
int pass;
unsigned int i;
uint32_t value;
uint8_t index;
unsigned int successes;
unsigned int failures;
int ok;
printf("Lookup size: %u, Hash size: %u\n", LOOKUPSIZE, HASHSIZE);
for (pass = 1; pass <= 2; pass++) {
successes = 0;
failures = 0;
tot_collisions = 0;
max_collisions = 0;
/* Not resetting hash_used because it shouldn't change after first pass. */
printf("\nPass %d, currently used hashes: %u\n\n", pass, hash_used);
srand(1);
for (i = 0; i < 260; i++) {
static const char * const outcomes[2] = {"FAIL", "OK"};
value = rand();
index = getIndx(value);
ok = lookup[index] == value;
printf("%" PRIu32 " -> %" PRIu8 " (%s)\n", value, index, outcomes[ok]);
if (ok) {
successes++;
} else {
failures++;
}
}
printf("\nSuccesses: %u, Failures: %u\n", successes, failures);
printf("Used hashes: %u, Total collisions: %u, Max collisions: %u\n\n",
hash_used, tot_collisions, max_collisions);
}
return 0;
}
static uint8_t getIndx(uint32_t value) {
unsigned int initial_hash;
unsigned int hash;
unsigned int collisions = 0;
uint8_t index;
/*
* Search for value using hash table, starting at position hashed from value.
*
* The hash table is longer than the maximum number of used entries,
* so we should always be able to find an unused entry in the hash table.
*/
initial_hash = ((value * UINT32_C(0x61c88647)) >> (32 - HASHBITS)) & HASHMASK;
for (hash = initial_hash;
collisions < HASHSIZE && hashtab[hash].used;
hash = (hash + 1) & HASHMASK) {
/*
* This hash table entry is used. Get the corresponding index in the
* main lookup table to check if the value matches.
*/
index = hashtab[hash].index;
if (lookup[index] == value) {
/* Matching value found. Return its index in the main table. */
return index;
}
/* Count hash collisions and total hash collisions. */
collisions++;
tot_collisions++;
if (max_collisions < collisions) {
/* Update max hash collisions for diagnostics. */
max_collisions = collisions;
}
}
/* Value not found. */
if (count < LOOKUPSIZE && collisions < HASHSIZE) {
/*
* There is room in the main lookup table for the new value
* and room in the hash table. The index of the new value in the
* main lookup table will be the current count, which will be incremented.
*/
index = count++;
/*
* Add value to main lookup table,
* add index in main lookup table to hash table,
* and return the index in the main lookup table.
*/
lookup[index] = value;
hashtab[hash].index = index;
hashtab[hash].used = 1;
hash_used++; /* Count of used hash table entries for diagnostics. */
return index;
}
/*
* Value not found and main lookup table is full or hash table is full.
* Give up.
*/
return 0;
}
另一种处理冲突的方法是使每个散列 table 入口指向匹配值的链表,但这更复杂。