Distinct() 如何在对象列表中查找唯一元素
Distinct() How to find unique elements in list of objects
有个很简单的class:
public class LinkInformation
{
public LinkInformation(string link, string text, string group)
{
this.Link = link;
this.Text = text;
this.Group = group;
}
public string Link { get; set; }
public string Text { get; set; }
public string Group { get; set; }
public override string ToString()
{
return Link.PadRight(70) + Text.PadRight(40) + Group;
}
}
然后我创建了这个 class 的对象列表,其中包含多个重复项。
因此,我尝试使用 Distinct()
来获取唯一值列表。
但是不行,所以我实现了
IComparable<LinkInformation>
int IComparable<LinkInformation>.CompareTo(LinkInformation other)
{
return this.ToString().CompareTo(other.ToString());
}
然后...
IEqualityComparer<LinkInformation>
public bool Equals(LinkInformation x, LinkInformation y)
{
return x.ToString().CompareTo(y.ToString()) == 0;
}
public int GetHashCode(LinkInformation obj)
{
int hash = 17;
// Suitable nullity checks etc, of course :)
hash = hash * 23 + obj.Link.GetHashCode();
hash = hash * 23 + obj.Text.GetHashCode();
hash = hash * 23 + obj.Group.GetHashCode();
return hash;
}
使用Distinct
的代码是:
static void Main(string[] args)
{
string[] filePath = { @"C:\temp\html.html",
@"C:\temp\html.html",
@"C:\temp\html.html",
@"C:\temp\html.html",
@"C:\temp\html.html"};
int index = 0;
foreach (var path in filePath)
{
var parser = new HtmlParser();
var list = parser.Parse(path);
var unique = list.Distinct();
foreach (var elem in unique)
{
var full = new FileInfo(path).Name;
var file = full.Substring(0, full.Length - 5);
Console.WriteLine((++index).ToString().PadRight(5) + file.PadRight(20) + elem);
}
}
Console.ReadKey();
}
要使 Distinct()
正常工作需要做什么?
如果您想 return 区分某些自定义数据类型的对象序列中的元素,您必须在 class.
中实现 IEquatable 通用接口
这是一个示例实现:
public class Product : IEquatable<Product>
{
public string Name { get; set; }
public int Code { get; set; }
public bool Equals(Product other)
{
//Check whether the compared object is null.
if (Object.ReferenceEquals(other, null)) return false;
//Check whether the compared object references the same data.
if (Object.ReferenceEquals(this, other)) return true;
//Check whether the products' properties are equal.
return Code.Equals(other.Code) && Name.Equals(other.Name);
}
// If Equals() returns true for a pair of objects
// then GetHashCode() must return the same value for these objects.
public override int GetHashCode()
{
//Get hash code for the Name field if it is not null.
int hashProductName = Name == null ? 0 : Name.GetHashCode();
//Get hash code for the Code field.
int hashProductCode = Code.GetHashCode();
//Calculate the hash code for the product.
return hashProductName ^ hashProductCode;
}
}
这就是您执行实际不同操作的方式:
Product[] products = { new Product { Name = "apple", Code = 9 },
new Product { Name = "orange", Code = 4 },
new Product { Name = "apple", Code = 9 },
new Product { Name = "lemon", Code = 12 } };
//Exclude duplicates.
IEnumerable<Product> noduplicates =
products.Distinct();
您需要在调用它时实际将您创建的 IEqualityComparer
传递给 Disctinct
。它有两个重载,一个不接受任何参数,一个接受 IEqualityComparer
。如果您不提供比较器,则使用默认值,并且默认比较器不会按照您希望的那样比较对象。
如果您愿意用单个 属性 定义 "distinctness",您可以
list
.GroupBy(x => x.Text)
.Select(x => x.First())
获取 "unique" 项的列表。
不用跟IEqualityComparer
等人纠缠
不使用 Distinct 和比较器,怎么样:
list.GroupBy(x => x.ToString()).Select(x => x.First())
我知道这个解决方案不是确切问题的答案,但我认为对其他解决方案开放是有效的。
有个很简单的class:
public class LinkInformation
{
public LinkInformation(string link, string text, string group)
{
this.Link = link;
this.Text = text;
this.Group = group;
}
public string Link { get; set; }
public string Text { get; set; }
public string Group { get; set; }
public override string ToString()
{
return Link.PadRight(70) + Text.PadRight(40) + Group;
}
}
然后我创建了这个 class 的对象列表,其中包含多个重复项。
因此,我尝试使用 Distinct()
来获取唯一值列表。
但是不行,所以我实现了
IComparable<LinkInformation>
int IComparable<LinkInformation>.CompareTo(LinkInformation other)
{
return this.ToString().CompareTo(other.ToString());
}
然后...
IEqualityComparer<LinkInformation>
public bool Equals(LinkInformation x, LinkInformation y)
{
return x.ToString().CompareTo(y.ToString()) == 0;
}
public int GetHashCode(LinkInformation obj)
{
int hash = 17;
// Suitable nullity checks etc, of course :)
hash = hash * 23 + obj.Link.GetHashCode();
hash = hash * 23 + obj.Text.GetHashCode();
hash = hash * 23 + obj.Group.GetHashCode();
return hash;
}
使用Distinct
的代码是:
static void Main(string[] args)
{
string[] filePath = { @"C:\temp\html.html",
@"C:\temp\html.html",
@"C:\temp\html.html",
@"C:\temp\html.html",
@"C:\temp\html.html"};
int index = 0;
foreach (var path in filePath)
{
var parser = new HtmlParser();
var list = parser.Parse(path);
var unique = list.Distinct();
foreach (var elem in unique)
{
var full = new FileInfo(path).Name;
var file = full.Substring(0, full.Length - 5);
Console.WriteLine((++index).ToString().PadRight(5) + file.PadRight(20) + elem);
}
}
Console.ReadKey();
}
要使 Distinct()
正常工作需要做什么?
如果您想 return 区分某些自定义数据类型的对象序列中的元素,您必须在 class.
中实现 IEquatable 通用接口这是一个示例实现:
public class Product : IEquatable<Product>
{
public string Name { get; set; }
public int Code { get; set; }
public bool Equals(Product other)
{
//Check whether the compared object is null.
if (Object.ReferenceEquals(other, null)) return false;
//Check whether the compared object references the same data.
if (Object.ReferenceEquals(this, other)) return true;
//Check whether the products' properties are equal.
return Code.Equals(other.Code) && Name.Equals(other.Name);
}
// If Equals() returns true for a pair of objects
// then GetHashCode() must return the same value for these objects.
public override int GetHashCode()
{
//Get hash code for the Name field if it is not null.
int hashProductName = Name == null ? 0 : Name.GetHashCode();
//Get hash code for the Code field.
int hashProductCode = Code.GetHashCode();
//Calculate the hash code for the product.
return hashProductName ^ hashProductCode;
}
}
这就是您执行实际不同操作的方式:
Product[] products = { new Product { Name = "apple", Code = 9 },
new Product { Name = "orange", Code = 4 },
new Product { Name = "apple", Code = 9 },
new Product { Name = "lemon", Code = 12 } };
//Exclude duplicates.
IEnumerable<Product> noduplicates =
products.Distinct();
您需要在调用它时实际将您创建的 IEqualityComparer
传递给 Disctinct
。它有两个重载,一个不接受任何参数,一个接受 IEqualityComparer
。如果您不提供比较器,则使用默认值,并且默认比较器不会按照您希望的那样比较对象。
如果您愿意用单个 属性 定义 "distinctness",您可以
list
.GroupBy(x => x.Text)
.Select(x => x.First())
获取 "unique" 项的列表。
不用跟IEqualityComparer
等人纠缠
不使用 Distinct 和比较器,怎么样:
list.GroupBy(x => x.ToString()).Select(x => x.First())
我知道这个解决方案不是确切问题的答案,但我认为对其他解决方案开放是有效的。