2 个哈希码之和发生冲突的可能性有多大?
What are the chances for collision in sum of 2 hash codes?
如果在 Java
中添加 2 个其他哈希码生成新的哈希码,则发生冲突的概率是多少
例如:
Integer reportHashCode = reportFields.hashCode() + reportId.hashCode();
我们假设 Java 的哈希码是 32 位,我们可以忽略哈希码本身的正常冲突。
我会在这里 XOR
而不是加法,因为 xor 有 50-50% 的分布 1
和 0
。
我们找出来怎么样?下面的程序将为您模拟这一点。请注意,总和的两个加数是随机生成的,因此两者都大约具有完整的整数概率范围。实际上,您求和的两个哈希码可能不会在整个整数 space 上呈平坦分布。可以调整程序来模拟那个。
package hashcode;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
public class HashCode {
// Number of test cases
private static final int TEST_CASES = 10_000_000;
public static void main(String[] args) {
// Random number generator
Random rand = new Random();
// Map from integers (result hash codes) to a list of addend pairs that formed those hash codes
Map<Integer, Set<Pair>> hashCodesToComposites = new HashMap<>();
// Number of collissions
int collisions = 0;
// Running simulations
for (int i = 0; i < TEST_CASES; ++i) {
if (TEST_CASES / 4 == i) {
System.out.println("25 %");
}
if (TEST_CASES / 2 == i) {
System.out.println("50 %");
}
if ((TEST_CASES * 3) / 4 == i) {
System.out.println("75 %");
}
// Generating addends as random integers
int first = rand.nextInt();
int second = rand.nextInt();
// The pair; its hash code is the sum of the above
Pair pair = new Pair(first, second);
// Did it occur before?
if (hashCodesToComposites.containsKey(pair.hashCode())) {
// Getting the set of addend pairs that created this hash code
Set<Pair> composites = hashCodesToComposites.get(pair.hashCode());
// Checking if by any chance the two random numbers happened to be the same (almost negligible)
if (!composites.contains(pair)) {
// Actual collision from different numbers
collisions++;
// Adding to the set of composites
composites.add(pair);
} // Same numbers; doesn't count as collision
} else {
// First occurrence of this hash code
Set<Pair> composites = new HashSet<>();
composites.add(pair);
hashCodesToComposites.put(pair.hashCode(), composites);
}
}
// Results
System.out.println("Test cases: " + TEST_CASES);
System.out.println("Collisions: " + collisions);
System.out.println("Probability: " + ((double) collisions / (double) TEST_CASES));
}
private static class Pair {
final int first;
final int second;
final int hashCode;
Pair(int first, int second) {
this.first = first;
this.second = second;
hashCode = first + second;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
final Pair other = (Pair) obj;
return (this.first == other.first && this.second == other.second) || (this.first == other.second && this.second == other.first);
}
}
}
结果通常在 0.00115 左右。这意味着大约有 0.115% 的碰撞几率。我在下面 运行 找出随机整数之间发生碰撞的几率。
package hashcode;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
public class HashCode2 {
// Number of test cases
private static final int TEST_CASES = 10_000_000;
public static void main(String[] args) {
// Random number generator
Random rand = new Random();
Set<Integer> hashCodes = new HashSet<>();
// Number of collissions
int collisions = 0;
// Running simulations
for (int i = 0; i < TEST_CASES; ++i) {
if (TEST_CASES / 4 == i) {
System.out.println("25 %");
}
if (TEST_CASES / 2 == i) {
System.out.println("50 %");
}
if ((TEST_CASES * 3) / 4 == i) {
System.out.println("75 %");
}
int next = rand.nextInt();
if (hashCodes.contains(next)) {
collisions++;
} else {
hashCodes.add(next);
}
}
// Results
System.out.println("Test cases: " + TEST_CASES);
System.out.println("Collisions: " + collisions);
System.out.println("Probability: " + ((double) collisions / (double) TEST_CASES));
}
}
概率其实差不多。它仅略低,但仍四舍五入为 0.115%。最后,我再次尝试了第一个程序,但是在 Pair
的 hashCode 方法中使用了异或,而不是求和。结果?几乎又是同样的事情。
所以最后,对于两个哈希码和异或的总和,您可以预期非常接近与随机整数相同的冲突率,前提是两个哈希码都summed/xor'ed具有良好的分布.
如果在 Java
中添加 2 个其他哈希码生成新的哈希码,则发生冲突的概率是多少例如:
Integer reportHashCode = reportFields.hashCode() + reportId.hashCode();
我们假设 Java 的哈希码是 32 位,我们可以忽略哈希码本身的正常冲突。
我会在这里 XOR
而不是加法,因为 xor 有 50-50% 的分布 1
和 0
。
我们找出来怎么样?下面的程序将为您模拟这一点。请注意,总和的两个加数是随机生成的,因此两者都大约具有完整的整数概率范围。实际上,您求和的两个哈希码可能不会在整个整数 space 上呈平坦分布。可以调整程序来模拟那个。
package hashcode;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
public class HashCode {
// Number of test cases
private static final int TEST_CASES = 10_000_000;
public static void main(String[] args) {
// Random number generator
Random rand = new Random();
// Map from integers (result hash codes) to a list of addend pairs that formed those hash codes
Map<Integer, Set<Pair>> hashCodesToComposites = new HashMap<>();
// Number of collissions
int collisions = 0;
// Running simulations
for (int i = 0; i < TEST_CASES; ++i) {
if (TEST_CASES / 4 == i) {
System.out.println("25 %");
}
if (TEST_CASES / 2 == i) {
System.out.println("50 %");
}
if ((TEST_CASES * 3) / 4 == i) {
System.out.println("75 %");
}
// Generating addends as random integers
int first = rand.nextInt();
int second = rand.nextInt();
// The pair; its hash code is the sum of the above
Pair pair = new Pair(first, second);
// Did it occur before?
if (hashCodesToComposites.containsKey(pair.hashCode())) {
// Getting the set of addend pairs that created this hash code
Set<Pair> composites = hashCodesToComposites.get(pair.hashCode());
// Checking if by any chance the two random numbers happened to be the same (almost negligible)
if (!composites.contains(pair)) {
// Actual collision from different numbers
collisions++;
// Adding to the set of composites
composites.add(pair);
} // Same numbers; doesn't count as collision
} else {
// First occurrence of this hash code
Set<Pair> composites = new HashSet<>();
composites.add(pair);
hashCodesToComposites.put(pair.hashCode(), composites);
}
}
// Results
System.out.println("Test cases: " + TEST_CASES);
System.out.println("Collisions: " + collisions);
System.out.println("Probability: " + ((double) collisions / (double) TEST_CASES));
}
private static class Pair {
final int first;
final int second;
final int hashCode;
Pair(int first, int second) {
this.first = first;
this.second = second;
hashCode = first + second;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
final Pair other = (Pair) obj;
return (this.first == other.first && this.second == other.second) || (this.first == other.second && this.second == other.first);
}
}
}
结果通常在 0.00115 左右。这意味着大约有 0.115% 的碰撞几率。我在下面 运行 找出随机整数之间发生碰撞的几率。
package hashcode;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
public class HashCode2 {
// Number of test cases
private static final int TEST_CASES = 10_000_000;
public static void main(String[] args) {
// Random number generator
Random rand = new Random();
Set<Integer> hashCodes = new HashSet<>();
// Number of collissions
int collisions = 0;
// Running simulations
for (int i = 0; i < TEST_CASES; ++i) {
if (TEST_CASES / 4 == i) {
System.out.println("25 %");
}
if (TEST_CASES / 2 == i) {
System.out.println("50 %");
}
if ((TEST_CASES * 3) / 4 == i) {
System.out.println("75 %");
}
int next = rand.nextInt();
if (hashCodes.contains(next)) {
collisions++;
} else {
hashCodes.add(next);
}
}
// Results
System.out.println("Test cases: " + TEST_CASES);
System.out.println("Collisions: " + collisions);
System.out.println("Probability: " + ((double) collisions / (double) TEST_CASES));
}
}
概率其实差不多。它仅略低,但仍四舍五入为 0.115%。最后,我再次尝试了第一个程序,但是在 Pair
的 hashCode 方法中使用了异或,而不是求和。结果?几乎又是同样的事情。
所以最后,对于两个哈希码和异或的总和,您可以预期非常接近与随机整数相同的冲突率,前提是两个哈希码都summed/xor'ed具有良好的分布.