大整数的小型查找 Table

Question

我想进行快速、相对较小的查找 table，但输入范围较大： - 输入：最大 32 位值。（一个 32 位颜色值） -输出：最大 8 位索引。（索引到 table）

类似于下面的代码。（如果索引的值超过256个，索引将直接为0）

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

static uint8_t getIndx(uint32_t value);
static uint8_t **indx;
static uint8_t count = 0;

int main(void) {
  // set up the indx
  const uint32_t size = 0xFFFF;  // for demonstrative purposes not even nearly as large as wished to be (0xFFFFFFFF) plus my for loop below would get in trouble, I think
  indx = (uint8_t**)malloc(sizeof(uint8_t*) * size);
  if(indx == NULL) {
    printf("could not allocate memory\n");
    return 0;
  }
  for(int i = 0; i < size + 1; i++) {
    indx[i] = NULL;
  }

  printf("%d\n", getIndx(111));
  printf("%d\n", getIndx(222));
  printf("%d\n", getIndx(333));
  printf("%d\n", getIndx(111));
  printf("%d\n", getIndx(222));
  printf("%d\n", getIndx(333));

  return 0;
}

static uint8_t getIndx(uint32_t value) {
  if(indx[value] == NULL) {
    if(count > 255) return 0;
    indx[value] = (uint8_t*)malloc(sizeof(uint8_t));
    *indx[value] = count;
    count++;
  }
  return *(indx[value]);
}

输出为：

无论我怎么想，我总是以那样的方式结束。输入范围为 32 位（4294967296 个状态），我需要分配太多内存才能获得 256 个可能的输出。并形成 256 if else，是否在 for 循环内，也不是我想要的。

是否有一些快速的方法，无论是 table 还是最终具有相同功能的方法，我还没有听说过？

非常感谢！

Answer 1

您可以使用散列 table 将 32 位值映射到 look-up table 索引。散列的长度 table 应明显长于 look-up table 以减少散列冲突的机会。

以下示例使用散列 table 的线性搜索，从从输入值派生的散列值开始，直到找到匹配条目或找到空散列 table。如果未找到输入值，并且 look-up table 和散列 table 中有空间（应该有，因为它比 look-up table)，将输入值添加到 look-up table 并更新散列 table。它使用的散列 table 是 look-up table.

大小的四倍

该示例使用相同的 pseudo-random 序列执行两遍。第一遍应该主要填充 look-up table 并更新散列 table。第二遍不应再对 look-up table 或散列 table 进行任何更改，因为它只是重复第一个序列。

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include <string.h>

struct hash_index {
  uint8_t index;
  char used;
};

#define HASHBITS  10
#define HASHSIZE  (1U << HASHBITS)
#define HASHMASK  (HASHSIZE - 1)
#define LOOKUPSIZE  256U

static uint8_t getIndx(uint32_t value);
static uint32_t lookup[LOOKUPSIZE];
static struct hash_index hashtab[HASHSIZE];
static unsigned int count = 0;
static unsigned int tot_collisions = 0;
static unsigned int max_collisions = 0;
static unsigned int hash_used = 0;

int main(void) {
  int pass;
  unsigned int i;
  uint32_t value;
  uint8_t index;
  unsigned int successes;
  unsigned int failures;
  int ok;

  printf("Lookup size: %u, Hash size: %u\n", LOOKUPSIZE, HASHSIZE);
  for (pass = 1; pass <= 2; pass++) {
    successes = 0;
    failures = 0;
    tot_collisions = 0;
    max_collisions = 0;
    /* Not resetting hash_used because it shouldn't change after first pass. */
    printf("\nPass %d, currently used hashes: %u\n\n", pass, hash_used);
    srand(1);
    for (i = 0; i < 260; i++) {
      static const char * const outcomes[2] = {"FAIL", "OK"};
      value = rand();
      index = getIndx(value);
      ok = lookup[index] == value;
      printf("%" PRIu32 " -> %" PRIu8 " (%s)\n", value, index, outcomes[ok]);
      if (ok) {
        successes++;
      } else {
        failures++;
      }
    }
    printf("\nSuccesses: %u, Failures: %u\n", successes, failures);
    printf("Used hashes: %u, Total collisions: %u, Max collisions: %u\n\n",
           hash_used, tot_collisions, max_collisions);
  }

  return 0;
}

static uint8_t getIndx(uint32_t value) {
  unsigned int initial_hash;
  unsigned int hash;
  unsigned int collisions = 0;
  uint8_t index;

  /*
   * Search for value using hash table, starting at position hashed from value.
   *
   * The hash table is longer than the maximum number of used entries,
   * so we should always be able to find an unused entry in the hash table.
   */
  initial_hash = ((value * UINT32_C(0x61c88647)) >> (32 - HASHBITS)) & HASHMASK;
  for (hash = initial_hash;
       collisions < HASHSIZE && hashtab[hash].used;
       hash = (hash + 1) & HASHMASK) {
    /*
     * This hash table entry is used.  Get the corresponding index in the
     * main lookup table to check if the value matches.
     */
    index = hashtab[hash].index;
    if (lookup[index] == value) {
      /* Matching value found.  Return its index in the main table. */
      return index;
    }
    /* Count hash collisions and total hash collisions. */
    collisions++;
    tot_collisions++;
    if (max_collisions < collisions) {
      /* Update max hash collisions for diagnostics. */
      max_collisions = collisions;
    }
  }
  /* Value not found. */
  if (count < LOOKUPSIZE && collisions < HASHSIZE) {
    /*
     * There is room in the main lookup table for the new value
     * and room in the hash table.  The index of the new value in the
     * main lookup table will be the current count, which will be incremented.
     */
    index = count++;
    /*
     * Add value to main lookup table,
     * add index in main lookup table to hash table,
     * and return the index in the main lookup table.
     */
    lookup[index] = value;
    hashtab[hash].index = index;
    hashtab[hash].used = 1;
    hash_used++;  /* Count of used hash table entries for diagnostics. */
    return index;
  }
  /*
   * Value not found and main lookup table is full or hash table is full.
   * Give up.
   */
  return 0;
}

另一种处理冲突的方法是使每个散列 table 入口指向匹配值的链表，但这更复杂。

大整数的小型查找 Table

Small Lookup Table for Large Integers

c

performance

lookup-tables