C# 覆盖 Dictionary/HashSet 中的存储桶查找行为
C# override behaviour of bucket lookup in Dictionary/HashSet
我想在 C# 中为双打实现向量 class 并且需要覆盖 Equals
和 GetHashCode
以便我可以使用我的向量 class 作为键字典或使用哈希集。由于我需要对平等有一定的容忍度,我知道没有办法实现可传递的 Equals 方法和相应的 GetHashCode 方法。
我偶然发现了类似主题的答案:
而且我想知道,有没有一种方法可以更改 C# 中 HashSet/Dictionaries 的查找行为,使其不仅检查一个桶,而且检查多个桶?
或者是否有某些 class 对于 C# 具有此行为?
由于 HashSet 不提供自定义桶搜索行为的方法,我编写了一个自定义 class 来执行多个桶搜索。包括一个现实生活中使用的例子:一个 3 维向量 class.
// Implementing this interface introduces the concept of neighbouring buckets.
public interface IHasNeighbourConcept
{
int[] GetSeveralHashCodes();
// The returned int[] must at least contain the return value of GetHashCode.
}
// Custom HashSet-like class that can search in several buckets.
public class NeighbourSearchHashSet<T> where T : IHasNeighbourConcept
{
// Internal data storage.
private Dictionary<int, List<T>> buckets;
// Constructor.
public NeighbourSearchHashSet()
{
buckets = new Dictionary<int, List<T>>();
}
// Classic implementation utilizing GetHashCode.
public bool Add(T elem)
{
int hash = elem.GetHashCode();
if(!buckets.ContainsKey(hash))
{
buckets[hash] = new List<T>();
buckets[hash].Add(elem);
return true;
}
foreach(T t in buckets[hash])
{
if(elem.Equals(t))
return false;
}
buckets[hash].Add(elem);
return true;
}
/// Nonclassic implementation utilizing GetSeveralHashCodes.
public bool Contains(T elem)
{
int[] hashes = elem.GetSeveralHashCodes();
foreach(int h in hashes)
foreach(T t in buckets[h])
if(elem.Equals(t))
return true;
return false;
}
}
// A 3-dimensional vector class. Since its Equals method is not transitive,
// there can be vectors that are considered equal but have different HashCodes.
// So the Contains method of HashSet<Vector> does not work as expected.
public class Vector : IHasNeighbourConcept
{
private double[] coords;
private static double TOL = 1E-10;
// Tolerance for considering two doubles as equal
public Vector(double x, double y, double z)
{
if(double.IsNaN(x) || double.IsInfinity(x) ||
double.IsNaN(y) || double.IsInfinity(y) ||
double.IsNaN(z) || double.IsInfinity(z))
throw new NotFiniteNumberException("All input must be finite!");
coords = new double[] { x, y, z };
}
// Two vectors are equal iff the distance of each
// corresponding component pair is significantly small.
public override bool Equals(object obj)
{
if(!(obj is Vector))
throw new ArgumentException("Input argument is not a Vector!");
Vector other = obj as Vector;
bool retval = true;
for(int i = 0; i < 2; i++)
retval = retval && (Math.Abs(coords[i] - other.coords[i]) < TOL);
return retval;
}
// The set of all Vectors with the same HashCode
// is a cube with side length TOL.
// Two Vectors considered equal may have different
// HashCodes, but the x, y, z intermediate values
// differ by at most 1.
public override int GetHashCode()
{
int x =(int) Math.Truncate(coords[0] / TOL);
int y =(int) Math.Truncate(coords[1] / TOL);
int z =(int) Math.Truncate(coords[2] / TOL);
return x + 3*y + 5*z; // The purpose of the factors is to make
// permuting the coordinates result
// in different HashCodes.
}
// Gets the HashCode of the given Vector as well as the 26
// HashCodes of the surrounding cubes.
public int[] GetSeveralHashCodes()
{
int[] hashes = new int[27];
int x =(int) Math.Truncate(coords[0] / TOL);
int y =(int) Math.Truncate(coords[1] / TOL);
int z =(int) Math.Truncate(coords[2] / TOL);
for(int i = -1; i <= 1; i++)
for(int j = -1; j <= 1; j++)
for(int k = -1; k <= 1; k++)
hashes[(i+1)+3*(j+1)+9*(k+1)] = (x+i) + 3*(y+j) + 5*(z+k);
return hashes;
}
}
编辑:
上述实现扩展了 HashSet 的概念,即使没有可传递的 Equals
方法,集合的 Contains
方法也能正常工作。它之所以有效,是因为对于 Contains
我们不需要知道我们寻找的元素所在的确切等价 class。
但是,对于词典来说,情况就不同了。我们确实需要获得正确的等价 class(即 hashCode),否则我们会得到不同的图像。因此,不同的哈希码必须导致元素不相等。
我想在 C# 中为双打实现向量 class 并且需要覆盖 Equals
和 GetHashCode
以便我可以使用我的向量 class 作为键字典或使用哈希集。由于我需要对平等有一定的容忍度,我知道没有办法实现可传递的 Equals 方法和相应的 GetHashCode 方法。
我偶然发现了类似主题的答案:
而且我想知道,有没有一种方法可以更改 C# 中 HashSet/Dictionaries 的查找行为,使其不仅检查一个桶,而且检查多个桶?
或者是否有某些 class 对于 C# 具有此行为?
由于 HashSet 不提供自定义桶搜索行为的方法,我编写了一个自定义 class 来执行多个桶搜索。包括一个现实生活中使用的例子:一个 3 维向量 class.
// Implementing this interface introduces the concept of neighbouring buckets.
public interface IHasNeighbourConcept
{
int[] GetSeveralHashCodes();
// The returned int[] must at least contain the return value of GetHashCode.
}
// Custom HashSet-like class that can search in several buckets.
public class NeighbourSearchHashSet<T> where T : IHasNeighbourConcept
{
// Internal data storage.
private Dictionary<int, List<T>> buckets;
// Constructor.
public NeighbourSearchHashSet()
{
buckets = new Dictionary<int, List<T>>();
}
// Classic implementation utilizing GetHashCode.
public bool Add(T elem)
{
int hash = elem.GetHashCode();
if(!buckets.ContainsKey(hash))
{
buckets[hash] = new List<T>();
buckets[hash].Add(elem);
return true;
}
foreach(T t in buckets[hash])
{
if(elem.Equals(t))
return false;
}
buckets[hash].Add(elem);
return true;
}
/// Nonclassic implementation utilizing GetSeveralHashCodes.
public bool Contains(T elem)
{
int[] hashes = elem.GetSeveralHashCodes();
foreach(int h in hashes)
foreach(T t in buckets[h])
if(elem.Equals(t))
return true;
return false;
}
}
// A 3-dimensional vector class. Since its Equals method is not transitive,
// there can be vectors that are considered equal but have different HashCodes.
// So the Contains method of HashSet<Vector> does not work as expected.
public class Vector : IHasNeighbourConcept
{
private double[] coords;
private static double TOL = 1E-10;
// Tolerance for considering two doubles as equal
public Vector(double x, double y, double z)
{
if(double.IsNaN(x) || double.IsInfinity(x) ||
double.IsNaN(y) || double.IsInfinity(y) ||
double.IsNaN(z) || double.IsInfinity(z))
throw new NotFiniteNumberException("All input must be finite!");
coords = new double[] { x, y, z };
}
// Two vectors are equal iff the distance of each
// corresponding component pair is significantly small.
public override bool Equals(object obj)
{
if(!(obj is Vector))
throw new ArgumentException("Input argument is not a Vector!");
Vector other = obj as Vector;
bool retval = true;
for(int i = 0; i < 2; i++)
retval = retval && (Math.Abs(coords[i] - other.coords[i]) < TOL);
return retval;
}
// The set of all Vectors with the same HashCode
// is a cube with side length TOL.
// Two Vectors considered equal may have different
// HashCodes, but the x, y, z intermediate values
// differ by at most 1.
public override int GetHashCode()
{
int x =(int) Math.Truncate(coords[0] / TOL);
int y =(int) Math.Truncate(coords[1] / TOL);
int z =(int) Math.Truncate(coords[2] / TOL);
return x + 3*y + 5*z; // The purpose of the factors is to make
// permuting the coordinates result
// in different HashCodes.
}
// Gets the HashCode of the given Vector as well as the 26
// HashCodes of the surrounding cubes.
public int[] GetSeveralHashCodes()
{
int[] hashes = new int[27];
int x =(int) Math.Truncate(coords[0] / TOL);
int y =(int) Math.Truncate(coords[1] / TOL);
int z =(int) Math.Truncate(coords[2] / TOL);
for(int i = -1; i <= 1; i++)
for(int j = -1; j <= 1; j++)
for(int k = -1; k <= 1; k++)
hashes[(i+1)+3*(j+1)+9*(k+1)] = (x+i) + 3*(y+j) + 5*(z+k);
return hashes;
}
}
编辑:
上述实现扩展了 HashSet 的概念,即使没有可传递的 Equals
方法,集合的 Contains
方法也能正常工作。它之所以有效,是因为对于 Contains
我们不需要知道我们寻找的元素所在的确切等价 class。
但是,对于词典来说,情况就不同了。我们确实需要获得正确的等价 class(即 hashCode),否则我们会得到不同的图像。因此,不同的哈希码必须导致元素不相等。