哪个哈希函数更适合在小哈希中表示 128 位随机 id-table

Question

在我的类中，我有以下练习：

我有 128 位的 GUID（全球唯一标识符）。

哪个哈希函数更适合表示 hashID 000 到 899 的桶中的值，每个桶有 100 个空闲位置来存储哈希冲突？

我想比较以下哈希函数：

a) h(a) = a mod 900
b) h(a) = a mod 887
c) h(a) = a^2 mod 887
d) there are not enough information to answer this question

我得到的：

我认为使用 a^2 并不好，因为它只会在前几千个 id 中给我们带来好处，它们应该更好地分布，但之后，我可能不得不进行更多的碰撞探测将这些值存储在其他存储桶中。

我已尝试完成上述行为：在下面的代码片段中，我生成了 90000 'randomly' 个唯一数字，这些数字存储在映射中，散列函数位于 mod 900 之后。我知道出于某些原因，素数更适合用于散列函数。

随机性仅实现到最大 32 位。但我认为这应该不是太重要，我没有使用最大 128 位。

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 900);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

具有相同功能但具有 mod 887 的下一个片段：

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

并带有 ^2:

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(Math.pow(getRandomInt(2147483647),2), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

全部在一个里面：

m = null;
uniqueMap = new Map();
hash = (z, p) => z % p ;

function getRandomInt(max) {
  guid = Math.floor(Math.random() * Math.floor(max));
  if (uniqueMap.has(guid)) return getRandomInt(max);
  return guid;
}


map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 900);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

m = null;
uniqueMap = new Map();
map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(getRandomInt(2147483647), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

m = null;
uniqueMap = new Map();
map = new Map();
for (var i = 1; i <= 90000; i++) {
  h = hash(Math.pow(getRandomInt(2147483647),2), 887);
  map.has(h) ? map.set(h, map.get(h) + 1) : map.set(h, 1);
}

map.forEach((a) => m = Math.max(a, m))

console.log(m);

如果我比较这 3 种方法，他们会告诉我，在不为 guid 供电的情况下，与 mod a^2 的最高碰撞计数高于 887 和 900。所以我认为这不是正确的答案。

但是我应该如何比较其他两个呢？他们向我展示了相似的峰，只有很小的差异。

Answer 1

您可以通过简单地检查哪个具有较少的因子来比较其他两个，因为素数具有较少的因子用于哈希。

之所以两者之间的差异可以忽略不计，主要是因为你使用的散列函数。您的哈希函数已经给出了分布良好的值。但是由于问题是关于直接比较的。最好的方法是选择素数 a mod 887

cs.stackexchange

对此有很好的解释

请访问此 link 了解更多信息 https://cs.stackexchange.com/questions/11029/why-is-it-best-to-use-a-prime-number-as-a-mod-in-a-hashing-function

以及有关 modular 散列的更多详细信息 https://algs4.cs.princeton.edu/34hash/

哪个哈希函数更适合在小哈希中表示 128 位随机 id-table

Which hash function is better fitted to represent 128bit random id in a small hash-table

javascript

hash

hash-function