通过删除重复条目优化 DataTable
Optimizing DataTable by removing duplicate entries
我有一个如下所示的数据表:
RefID RefDescription ReferenceUrl SortOrder
0 Interdiscip http://www.ncbi.nlm.nih.gov/ 1
0 Entropy 20133 http://www.mdpi.com/1099-4300 2
0 Interdiscip http://www.ncbi.nlm.nih.gov/ 3
0 Agriculture http://www1.agric.gov.ab.ca/ 4
0 Interdiscip http://www.ncbi.nlm.nih.gov/ 5
在上面的数据集中,如果 RefDescription 和 ReferenceUrl 的值相同,则删除那些重复的行并保留单行,并根据排序顺序向其附加数字。
这里,上面的dataSet--RefDescription: Interdiscip重复了3次,ReferenceUrl也一样。所以我想要以下输出作为数据集。
我想要的结果:
RefID RefDescription ReferenceUrl SortOrder
0 3,5 Interdiscip http://www.ncbi.nlm.nih.gov/ 1
0 Entropy 20133 http://www.mdpi.com/1099-4300 2
0 Agriculture http://www1.agric.gov.ab.ca/ 4
Note:RefDescription and ReferenceUrl both should have same values in
this scenarios otherwise no need to remove and append.
C# 代码:我试过了
protected void Page_Load(object sender, EventArgs e)
{
int rowcount = 0;
DataTable dt = new DataTable("ReferenceData");
dt.Columns.Add("ReferenceID");
dt.Columns.Add("ReferenceDescription");
dt.Columns.Add("ReferenceUrl");
dt.Columns.Add("SortOrder");
dt.Rows.Add("0","Interdiscip","http://www.ncbi.nlm.nih.gov/","1");
dt.Rows.Add("0", "Entropy 20133", "http://www.mdpi.com/1099-4300", "2");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "3");
dt.Rows.Add("0", "Agriculture", "http://www1.agric.gov.ab.ca/", "4");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "5");
DataSet ds = new DataSet();
DataTable dtOut = null;
ds.Tables.Add(dt);
DataView dv = dt.DefaultView;
dv.Sort = "ReferenceDescription,ReferenceUrl";
dtOut = dv.ToTable();
for (int t = 0; t < dtOut.Rows.Count; t++)
{
int i = 0;
int count = 0;
int sortorder = 0;
string space = null;
string x = dtOut.Rows[t][1].ToString();
string y = dtOut.Rows[t][2].ToString();
sortorder = Convert.ToInt32(dtOut.Rows[rowcount][3]);
for (int j = 0; j < dtOut.Rows.Count; j++)
{
if (x == dtOut.Rows[i][1].ToString() && y == dtOut.Rows[i][2].ToString())
{
count++;
if (count > 1)
{
sortorder = Convert.ToInt32(dtOut.Rows[i][3]);
space += sortorder + " ";
dtOut.Rows[i].Delete();
dtOut.AcceptChanges();
}
}
i++;
}
dtOut.Rows[rowcount][1] = space + x;
rowcount++;
}
}
您可以使用 Linq-To-DataSet
:
var grps = from row in dt.AsEnumerable()
let RefDescription = row.Field<string>("RefDescription")
let ReferenceUrl = row.Field<string>("ReferenceUrl")
group row by new { RefDescription, ReferenceUrl } into groups
select groups;
dt = grps.Select(g =>
{
DataRow first = g.First();
if (g.Skip(1).Any())
{
// duplicates
string otherSortOrders = String.Join(",", g.Skip(1).Select(r => r.Field<int>("SortOrder")));
first.SetField("RefDescription", string.Format("{0} {1}",
otherSortOrders,
g.Key.RefDescription));
}
return first;
})
.CopyToDataTable();
或使用纯方法语法的一个查询(我更喜欢组合):
dt = dt.AsEnumerable()
.GroupBy(r => new {
RefDescription = r.Field<string>("RefDescription"),
ReferenceUrl = r.Field<string>("ReferenceUrl")
})
.Select(grp =>
{
DataRow first = grp.First();
if (grp.Skip(1).Any())
{
// duplicates
string otherSortOrders = String.Join(",", grp.Skip(1).Select(r => r.Field<int>("SortOrder")));
first.SetField("RefDescription", string.Format("{0} {1}",
otherSortOrders,
grp.Key.RefDescription));
}
return first;
}).CopyToDataTable();
我做了一些改动,现在就试试这个,如果它能解决问题...
int rowcount = 0;
DataTable dt = new DataTable("ReferenceData");
dt.Columns.Add("ReferenceID");
dt.Columns.Add("ReferenceDescription");
dt.Columns.Add("ReferenceUrl");
dt.Columns.Add("SortOrder");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "1");
dt.Rows.Add("0", "Entropy 20133", "http://www.mdpi.com/1099-4300", "2");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "3");
dt.Rows.Add("0", "Agriculture", "http://www1.agric.gov.ab.ca/", "4");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "5");
DataSet ds = new DataSet();
DataTable dtOut = null;
ds.Tables.Add(dt);
DataView dv = dt.DefaultView;
dv.Sort = "ReferenceDescription,ReferenceUrl";
dtOut = dv.ToTable();
dt = dv.ToTable();
for (int t = 0; t < dtOut.Rows.Count; t++)
{
int i = 0;
int count = 0;
int sortorder = 0;
string space = null;
string x = dtOut.Rows[t][1].ToString();
string y = dtOut.Rows[t][2].ToString();
sortorder = Convert.ToInt32(dtOut.Rows[rowcount][3]);
for (int j = 0; j < dtOut.Rows.Count; j++)
{
if (x == dtOut.Rows[j][1].ToString() && y == dtOut.Rows[j][2].ToString())
{
count = dtOut.AsEnumerable().Where(s => s.Field<string>("ReferenceDescription").EndsWith(x) && s.Field<string>("ReferenceUrl") == y).Count();
//count++;
if (count > 1)
{
sortorder = Convert.ToInt32(dtOut.Rows[j][3]);
space += sortorder + " ";
dtOut.Rows[j].Delete();
dtOut.AcceptChanges();
}
}
i++;
}
dtOut.Rows[rowcount][1] = space + x;
rowcount++;
}
我有一个如下所示的数据表:
RefID RefDescription ReferenceUrl SortOrder
0 Interdiscip http://www.ncbi.nlm.nih.gov/ 1
0 Entropy 20133 http://www.mdpi.com/1099-4300 2
0 Interdiscip http://www.ncbi.nlm.nih.gov/ 3
0 Agriculture http://www1.agric.gov.ab.ca/ 4
0 Interdiscip http://www.ncbi.nlm.nih.gov/ 5
在上面的数据集中,如果 RefDescription 和 ReferenceUrl 的值相同,则删除那些重复的行并保留单行,并根据排序顺序向其附加数字。
这里,上面的dataSet--RefDescription: Interdiscip重复了3次,ReferenceUrl也一样。所以我想要以下输出作为数据集。
我想要的结果:
RefID RefDescription ReferenceUrl SortOrder
0 3,5 Interdiscip http://www.ncbi.nlm.nih.gov/ 1
0 Entropy 20133 http://www.mdpi.com/1099-4300 2
0 Agriculture http://www1.agric.gov.ab.ca/ 4
Note:RefDescription and ReferenceUrl both should have same values in this scenarios otherwise no need to remove and append.
C# 代码:我试过了
protected void Page_Load(object sender, EventArgs e)
{
int rowcount = 0;
DataTable dt = new DataTable("ReferenceData");
dt.Columns.Add("ReferenceID");
dt.Columns.Add("ReferenceDescription");
dt.Columns.Add("ReferenceUrl");
dt.Columns.Add("SortOrder");
dt.Rows.Add("0","Interdiscip","http://www.ncbi.nlm.nih.gov/","1");
dt.Rows.Add("0", "Entropy 20133", "http://www.mdpi.com/1099-4300", "2");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "3");
dt.Rows.Add("0", "Agriculture", "http://www1.agric.gov.ab.ca/", "4");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "5");
DataSet ds = new DataSet();
DataTable dtOut = null;
ds.Tables.Add(dt);
DataView dv = dt.DefaultView;
dv.Sort = "ReferenceDescription,ReferenceUrl";
dtOut = dv.ToTable();
for (int t = 0; t < dtOut.Rows.Count; t++)
{
int i = 0;
int count = 0;
int sortorder = 0;
string space = null;
string x = dtOut.Rows[t][1].ToString();
string y = dtOut.Rows[t][2].ToString();
sortorder = Convert.ToInt32(dtOut.Rows[rowcount][3]);
for (int j = 0; j < dtOut.Rows.Count; j++)
{
if (x == dtOut.Rows[i][1].ToString() && y == dtOut.Rows[i][2].ToString())
{
count++;
if (count > 1)
{
sortorder = Convert.ToInt32(dtOut.Rows[i][3]);
space += sortorder + " ";
dtOut.Rows[i].Delete();
dtOut.AcceptChanges();
}
}
i++;
}
dtOut.Rows[rowcount][1] = space + x;
rowcount++;
}
}
您可以使用 Linq-To-DataSet
:
var grps = from row in dt.AsEnumerable()
let RefDescription = row.Field<string>("RefDescription")
let ReferenceUrl = row.Field<string>("ReferenceUrl")
group row by new { RefDescription, ReferenceUrl } into groups
select groups;
dt = grps.Select(g =>
{
DataRow first = g.First();
if (g.Skip(1).Any())
{
// duplicates
string otherSortOrders = String.Join(",", g.Skip(1).Select(r => r.Field<int>("SortOrder")));
first.SetField("RefDescription", string.Format("{0} {1}",
otherSortOrders,
g.Key.RefDescription));
}
return first;
})
.CopyToDataTable();
或使用纯方法语法的一个查询(我更喜欢组合):
dt = dt.AsEnumerable()
.GroupBy(r => new {
RefDescription = r.Field<string>("RefDescription"),
ReferenceUrl = r.Field<string>("ReferenceUrl")
})
.Select(grp =>
{
DataRow first = grp.First();
if (grp.Skip(1).Any())
{
// duplicates
string otherSortOrders = String.Join(",", grp.Skip(1).Select(r => r.Field<int>("SortOrder")));
first.SetField("RefDescription", string.Format("{0} {1}",
otherSortOrders,
grp.Key.RefDescription));
}
return first;
}).CopyToDataTable();
我做了一些改动,现在就试试这个,如果它能解决问题...
int rowcount = 0;
DataTable dt = new DataTable("ReferenceData");
dt.Columns.Add("ReferenceID");
dt.Columns.Add("ReferenceDescription");
dt.Columns.Add("ReferenceUrl");
dt.Columns.Add("SortOrder");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "1");
dt.Rows.Add("0", "Entropy 20133", "http://www.mdpi.com/1099-4300", "2");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "3");
dt.Rows.Add("0", "Agriculture", "http://www1.agric.gov.ab.ca/", "4");
dt.Rows.Add("0", "Interdiscip", "http://www.ncbi.nlm.nih.gov/", "5");
DataSet ds = new DataSet();
DataTable dtOut = null;
ds.Tables.Add(dt);
DataView dv = dt.DefaultView;
dv.Sort = "ReferenceDescription,ReferenceUrl";
dtOut = dv.ToTable();
dt = dv.ToTable();
for (int t = 0; t < dtOut.Rows.Count; t++)
{
int i = 0;
int count = 0;
int sortorder = 0;
string space = null;
string x = dtOut.Rows[t][1].ToString();
string y = dtOut.Rows[t][2].ToString();
sortorder = Convert.ToInt32(dtOut.Rows[rowcount][3]);
for (int j = 0; j < dtOut.Rows.Count; j++)
{
if (x == dtOut.Rows[j][1].ToString() && y == dtOut.Rows[j][2].ToString())
{
count = dtOut.AsEnumerable().Where(s => s.Field<string>("ReferenceDescription").EndsWith(x) && s.Field<string>("ReferenceUrl") == y).Count();
//count++;
if (count > 1)
{
sortorder = Convert.ToInt32(dtOut.Rows[j][3]);
space += sortorder + " ";
dtOut.Rows[j].Delete();
dtOut.AcceptChanges();
}
}
i++;
}
dtOut.Rows[rowcount][1] = space + x;
rowcount++;
}