条件重组 sortedDictionary (unionFind)
Conditional restructure a sortedDictionary (unionFind)
我编写了一些输出 SortedDictionary <int index, list<int>values>>
的 C# 代码,其中索引始终以列表<> 的最低整数开头。这是一个简短的输入示例(实际上输入更大):
index- Values<>
2 - 2,3,6,7
3 - 3,5
5 - 5,7,9
11 - 11,12,12
现在我想在这里重新排序。这些值是 linked 索引。我想对它们进行排序,以便将连接的索引组合在一起,没有双重索引。这应该导致输出
2 - 2,3,5,6,7,9
11 - 11,12
最初我在使用 foreach 处理 sortedDictionary 时遇到了问题,同时还减小了字典集的大小。我解决了这个问题,现在用我的最新代码更新这个问题的描述。它不再使用 foreach,一些排序问题现在需要修复,但作为副作用,它变得非常复杂和庞大。我怀疑它是否应该如此复杂,或者是否可以写得更短、更简单。
我称每个列表为 树,字典在哪里 树
而 Cursor 或 c 我使用就像从屏幕上逐位读出文本。
目前我把它放在控制台应用程序的一个小概念功能代码中。只是为了检查是否一切正常。测试非常复杂,因为数字集可以很复杂地 linked。所以如果你有很多集合并且有很多数字应该如何排序,它就不会直接可见。因此,手动检查代码有效性和结果也不容易。
虽然我还不确定它现在是否确实 100% 有效。它比以前更好地工作。但我认为这段代码远非完美,因为我走了两次树。带有预排序和最终排序。
static SortedDictionary<int, List<int>> NewSort(SortedDictionary<int, List<int>> trees)
{
bool debugmode = false;
//pre sort
List<int> indexTree = new List<int>();
foreach (KeyValuePair<int, List<int>> tree in trees)
{
indexTree.Add(tree.Key);
}
for (int i = 0; i < indexTree.Count; i++)
{
int cursor = 1;
List<int> tree = new List<int>();
int index = indexTree[i];
tree = trees[index];
while ((tree !=null)&& (cursor<tree.Count) )
{
int c = tree[cursor ];
if (trees.ContainsKey(c))
{
if (trees[c] != null)
{
List<int> u = new List<int>();
u = trees[c];
tree.AddRange(u);
tree.Sort();
trees[index] = tree;
trees[c] = null;
}
}
cursor++;
}
}
for (int i = trees.Count; i > 0; i--)
{
int c = indexTree[i - 1];
if (trees[c] == null)
{ trees.Remove(indexTree[i - 1]); }
else
{
trees[c] = trees[c].Distinct().ToList(); //removing all duplicates
}
}
indexTree = new List<int>();
//resort.
foreach (KeyValuePair<int, List<int>> tree in trees)
{
indexTree.Add(tree.Key);
if(debugmode) Console.WriteLine("* " +DumpIntList(trees[tree.Key]));
}
for (int i = (indexTree.Count); i > 0; i--)
{
if (debugmode) Console.WriteLine(i.ToString());
List<int> tree = new List<int>();
tree = trees[indexTree[i-1]];
for (int j = 0; j < tree.Count; j++)
{
int c = tree[j];
for (int k = (i - 2); k > 0; k--)
{
List<int> compareTree = new List<int>();
compareTree = trees[indexTree[k]];
if (compareTree.IndexOf(c) != -1) // found !
{
if (debugmode) Console.Write("found " + c.ToString() + " from ");
if (debugmode) Console.WriteLine(DumpIntList(tree) + " in (" + DumpIntList(compareTree)+ ")");
tree.Remove(c); // or we would create a duplicate
compareTree.AddRange(tree);
compareTree = compareTree.Distinct().ToList(); //removing doubles again, doubles as side effect of merging
compareTree.Sort();
trees.Remove(indexTree[i - 1]);
trees[indexTree[k]] = compareTree;
}
}
}
}
return trees;
}
也许我试图让一些人明白这一点,我在这里试图做的是我试着看看系列是否有重叠的数字,如果是的话,合并它们。
每个系列始终排序并从该系列的最低编号开始。正如我最近发现的那样,这可能是 UnionFind 问题的一个版本。该问题也出现在 Blob 检测中,以及在一组网页中查找哪些网页是 link 彼此。 (但我的数据是一组奇怪的实验室测量值)。
难点是系列比较多,连起来可能不是很直接
1-3-4
4-7-9
11-12
would result in 2 series :
1) 1-3-4-7-9
2) 11-12
But after you add series 12-3 then it would all become one series.
更多测试数据:
2 - 2,3,5,6,7 // note my data is always ordered like this
5 - 5,7,9 // dictionary starts with lowest key
11 - 11,12,12,27,30,31 // each list inside a tree key
22 - 22,27 // is ordered low to high
23 - 23,25 // lowest int, equals the dict key.
28 - 28,30
34 - 34
使用上述函数输出
2 - 2,3,5,6,7,9
11 - 11,12,22,27,28,30,31
23 - 23,25
34 - 34
因此,虽然代码现在可以正常工作,但我非常怀疑它的理想代码,我两次激怒了树集。所以我想知道是否有更好的解决方案。也可能是代码没有做我希望它做的事情;因为我还在测试它。
除了反转 if 以避免 1 级嵌套,我还没有看到如何使用 LINQ
来提高此代码块的可读性。
static SortedDictionary<int, List<int>> SortTree(SortedDictionary<int, List<int>> trees)
{
//SortedDictionary<int, List<int>> newtrees = new SortedDictionary<int, List<int>>();
if (trees.Count < 2) { return trees; } // dont process if ntrees contains 1 or 0 trees
foreach (KeyValuePair<int, List<int>> singletree in trees)
{
int cursor = 1;
bool nFinish = false;
List<int> n = singletree.Value;
if (n.Count <= 1) continue;
while (nFinish == false)
{
if (trees.ContainsKey(n[cursor]))
{
List<int> t = trees[n[cursor]]; // think of a screen cursor going over the list table
t.AddRange(n);
trees.Remove(n[cursor]);
n.Sort();
trees[singletree.Key] = n;
}
cursor++;
if (cursor != n.Count) continue;
nFinish = true;
}
}
return trees;
}
这是您的解决方案 test cases
using System;
using System.Collections.Generic;
using System.Linq;
namespace Demo
{
public class Example
{
public static void Main()
{
SortedDictionary<int, List<int>> tempRepositary = new SortedDictionary<int, List<int>>();
//test 1
tempRepositary.Add(2, new List<int>(new[] { 2, 3, 5, 6, 7 }));
tempRepositary.Add(5, new List<int>(new[] { 5, 7, 9 }));
tempRepositary.Add(11, new List<int>(new[] { 11, 12, 12, 27, 30, 31 }));
tempRepositary.Add(22, new List<int>(new[] { 22, 27 }));
tempRepositary.Add(23, new List<int>(new[] { 23, 25 }));
tempRepositary.Add(28, new List<int>(new[] { 28, 30 }));
tempRepositary.Add(34, new List<int>(new[] { 34 }));
//test 2
//tempRepositary.Add(2, new List<int>(new[] { 2,3,6,7 }));
//tempRepositary.Add(3, new List<int>(new[] { 3,5 }));
//tempRepositary.Add(5, new List<int>(new[] { 5,7,9 }));
//tempRepositary.Add(11, new List<int>(new[] { 11,12,12 }));
var refreshOne = SortTree(tempRepositary);
foreach (var item in refreshOne)
{
Console.Write("Key:" + item.Key + " ");
Console.WriteLine(string.Join(",", item.Value));
}
Console.ReadKey();
}
private static SortedDictionary<int, List<int>> SortTree(SortedDictionary<int, List<int>> trees)
{
if (trees.Count < 2) { return trees; } // dont process if ntrees contains 1 or 0 trees
SortedDictionary<int, List<int>> compressedTree
= new SortedDictionary<int, List<int>>();
var allKeys = trees.Keys.ToList();
var allValues = trees.Values.ToList();
for (int i = 0; i < allKeys.Count; i++)
{
var tempValues = allValues[i];
var tempMax = tempValues.Max();
for (int j = i + 1; j < allKeys.Count; )
{
if (tempMax >= allKeys[j])
{
tempValues.AddRange(allValues[j]);
allKeys.Remove(allKeys[j]);
allValues.Remove(allValues[j]);
//
tempMax = tempValues.Max();
continue;
}
j++;
}
compressedTree.Add(allKeys[i], tempValues.Distinct().OrderBy(i1 => i1).ToList());
}
return compressedTree;
}
}
}
好吧,我减小了函数的大小并对其进行了改进。
它现在应该是对所有树木的单一刺激。
除非有人知道更好的答案,否则我认为是 "the" 答案。
该代码已经过测试,可以处理更大的集合,我无法发现错误。
static SortedDictionary<int, List<int>> NewSort(SortedDictionary<int, List<int>> trees)
{
bool debugmode = false;
//pre sort
List<int> indexTree = new List<int>();
foreach (KeyValuePair<int, List<int>> tree in trees)
{
indexTree.Add(tree.Key);
if(debugmode) Console.WriteLine("* " +DumpIntList(trees[tree.Key]));
}
for (int i = (indexTree.Count); i > 0; i--)
{
if (debugmode) Console.WriteLine(i.ToString());
List<int> tree = new List<int>();
tree = trees[indexTree[i-1]];
for (int j = 0; j < tree.Count; j++)
{
int c = tree[j];
for (int k = (i - 2); k > -1; k--) // k can be 0 but i can minimally be 1
{
List<int> compareTree = new List<int>();
compareTree = trees[indexTree[k]]; // for loop > checking all trees
if (compareTree.IndexOf(c) != -1) // found !
{
if (debugmode) Console.Write("found " + c.ToString() + " from ");
if (debugmode) Console.WriteLine(DumpIntList(tree) + " in (" + DumpIntList(compareTree)+ ")");
// tree.Remove(c); // or we would create a duplicate
compareTree.AddRange(tree);
compareTree = compareTree.Distinct().ToList();
compareTree.Sort();
trees.Remove(indexTree[i - 1]);
trees[indexTree[k]] = compareTree;
j =tree.Count; //break from more checks. maybe dirty code but it increases speed
break; //break checking loop on all trees for current tree
}
}
}
}
return trees;
}
我编写了一些输出 SortedDictionary <int index, list<int>values>>
的 C# 代码,其中索引始终以列表<> 的最低整数开头。这是一个简短的输入示例(实际上输入更大):
index- Values<>
2 - 2,3,6,7
3 - 3,5
5 - 5,7,9
11 - 11,12,12
现在我想在这里重新排序。这些值是 linked 索引。我想对它们进行排序,以便将连接的索引组合在一起,没有双重索引。这应该导致输出
2 - 2,3,5,6,7,9
11 - 11,12
最初我在使用 foreach 处理 sortedDictionary 时遇到了问题,同时还减小了字典集的大小。我解决了这个问题,现在用我的最新代码更新这个问题的描述。它不再使用 foreach,一些排序问题现在需要修复,但作为副作用,它变得非常复杂和庞大。我怀疑它是否应该如此复杂,或者是否可以写得更短、更简单。
我称每个列表为 树,字典在哪里 树 而 Cursor 或 c 我使用就像从屏幕上逐位读出文本。
目前我把它放在控制台应用程序的一个小概念功能代码中。只是为了检查是否一切正常。测试非常复杂,因为数字集可以很复杂地 linked。所以如果你有很多集合并且有很多数字应该如何排序,它就不会直接可见。因此,手动检查代码有效性和结果也不容易。
虽然我还不确定它现在是否确实 100% 有效。它比以前更好地工作。但我认为这段代码远非完美,因为我走了两次树。带有预排序和最终排序。
static SortedDictionary<int, List<int>> NewSort(SortedDictionary<int, List<int>> trees)
{
bool debugmode = false;
//pre sort
List<int> indexTree = new List<int>();
foreach (KeyValuePair<int, List<int>> tree in trees)
{
indexTree.Add(tree.Key);
}
for (int i = 0; i < indexTree.Count; i++)
{
int cursor = 1;
List<int> tree = new List<int>();
int index = indexTree[i];
tree = trees[index];
while ((tree !=null)&& (cursor<tree.Count) )
{
int c = tree[cursor ];
if (trees.ContainsKey(c))
{
if (trees[c] != null)
{
List<int> u = new List<int>();
u = trees[c];
tree.AddRange(u);
tree.Sort();
trees[index] = tree;
trees[c] = null;
}
}
cursor++;
}
}
for (int i = trees.Count; i > 0; i--)
{
int c = indexTree[i - 1];
if (trees[c] == null)
{ trees.Remove(indexTree[i - 1]); }
else
{
trees[c] = trees[c].Distinct().ToList(); //removing all duplicates
}
}
indexTree = new List<int>();
//resort.
foreach (KeyValuePair<int, List<int>> tree in trees)
{
indexTree.Add(tree.Key);
if(debugmode) Console.WriteLine("* " +DumpIntList(trees[tree.Key]));
}
for (int i = (indexTree.Count); i > 0; i--)
{
if (debugmode) Console.WriteLine(i.ToString());
List<int> tree = new List<int>();
tree = trees[indexTree[i-1]];
for (int j = 0; j < tree.Count; j++)
{
int c = tree[j];
for (int k = (i - 2); k > 0; k--)
{
List<int> compareTree = new List<int>();
compareTree = trees[indexTree[k]];
if (compareTree.IndexOf(c) != -1) // found !
{
if (debugmode) Console.Write("found " + c.ToString() + " from ");
if (debugmode) Console.WriteLine(DumpIntList(tree) + " in (" + DumpIntList(compareTree)+ ")");
tree.Remove(c); // or we would create a duplicate
compareTree.AddRange(tree);
compareTree = compareTree.Distinct().ToList(); //removing doubles again, doubles as side effect of merging
compareTree.Sort();
trees.Remove(indexTree[i - 1]);
trees[indexTree[k]] = compareTree;
}
}
}
}
return trees;
}
也许我试图让一些人明白这一点,我在这里试图做的是我试着看看系列是否有重叠的数字,如果是的话,合并它们。 每个系列始终排序并从该系列的最低编号开始。正如我最近发现的那样,这可能是 UnionFind 问题的一个版本。该问题也出现在 Blob 检测中,以及在一组网页中查找哪些网页是 link 彼此。 (但我的数据是一组奇怪的实验室测量值)。
难点是系列比较多,连起来可能不是很直接
1-3-4
4-7-9
11-12
would result in 2 series :
1) 1-3-4-7-9
2) 11-12
But after you add series 12-3 then it would all become one series.
更多测试数据:
2 - 2,3,5,6,7 // note my data is always ordered like this
5 - 5,7,9 // dictionary starts with lowest key
11 - 11,12,12,27,30,31 // each list inside a tree key
22 - 22,27 // is ordered low to high
23 - 23,25 // lowest int, equals the dict key.
28 - 28,30
34 - 34
使用上述函数输出
2 - 2,3,5,6,7,9
11 - 11,12,22,27,28,30,31
23 - 23,25
34 - 34
因此,虽然代码现在可以正常工作,但我非常怀疑它的理想代码,我两次激怒了树集。所以我想知道是否有更好的解决方案。也可能是代码没有做我希望它做的事情;因为我还在测试它。
除了反转 if 以避免 1 级嵌套,我还没有看到如何使用 LINQ
来提高此代码块的可读性。
static SortedDictionary<int, List<int>> SortTree(SortedDictionary<int, List<int>> trees)
{
//SortedDictionary<int, List<int>> newtrees = new SortedDictionary<int, List<int>>();
if (trees.Count < 2) { return trees; } // dont process if ntrees contains 1 or 0 trees
foreach (KeyValuePair<int, List<int>> singletree in trees)
{
int cursor = 1;
bool nFinish = false;
List<int> n = singletree.Value;
if (n.Count <= 1) continue;
while (nFinish == false)
{
if (trees.ContainsKey(n[cursor]))
{
List<int> t = trees[n[cursor]]; // think of a screen cursor going over the list table
t.AddRange(n);
trees.Remove(n[cursor]);
n.Sort();
trees[singletree.Key] = n;
}
cursor++;
if (cursor != n.Count) continue;
nFinish = true;
}
}
return trees;
}
这是您的解决方案 test cases
using System;
using System.Collections.Generic;
using System.Linq;
namespace Demo
{
public class Example
{
public static void Main()
{
SortedDictionary<int, List<int>> tempRepositary = new SortedDictionary<int, List<int>>();
//test 1
tempRepositary.Add(2, new List<int>(new[] { 2, 3, 5, 6, 7 }));
tempRepositary.Add(5, new List<int>(new[] { 5, 7, 9 }));
tempRepositary.Add(11, new List<int>(new[] { 11, 12, 12, 27, 30, 31 }));
tempRepositary.Add(22, new List<int>(new[] { 22, 27 }));
tempRepositary.Add(23, new List<int>(new[] { 23, 25 }));
tempRepositary.Add(28, new List<int>(new[] { 28, 30 }));
tempRepositary.Add(34, new List<int>(new[] { 34 }));
//test 2
//tempRepositary.Add(2, new List<int>(new[] { 2,3,6,7 }));
//tempRepositary.Add(3, new List<int>(new[] { 3,5 }));
//tempRepositary.Add(5, new List<int>(new[] { 5,7,9 }));
//tempRepositary.Add(11, new List<int>(new[] { 11,12,12 }));
var refreshOne = SortTree(tempRepositary);
foreach (var item in refreshOne)
{
Console.Write("Key:" + item.Key + " ");
Console.WriteLine(string.Join(",", item.Value));
}
Console.ReadKey();
}
private static SortedDictionary<int, List<int>> SortTree(SortedDictionary<int, List<int>> trees)
{
if (trees.Count < 2) { return trees; } // dont process if ntrees contains 1 or 0 trees
SortedDictionary<int, List<int>> compressedTree
= new SortedDictionary<int, List<int>>();
var allKeys = trees.Keys.ToList();
var allValues = trees.Values.ToList();
for (int i = 0; i < allKeys.Count; i++)
{
var tempValues = allValues[i];
var tempMax = tempValues.Max();
for (int j = i + 1; j < allKeys.Count; )
{
if (tempMax >= allKeys[j])
{
tempValues.AddRange(allValues[j]);
allKeys.Remove(allKeys[j]);
allValues.Remove(allValues[j]);
//
tempMax = tempValues.Max();
continue;
}
j++;
}
compressedTree.Add(allKeys[i], tempValues.Distinct().OrderBy(i1 => i1).ToList());
}
return compressedTree;
}
}
}
好吧,我减小了函数的大小并对其进行了改进。 它现在应该是对所有树木的单一刺激。 除非有人知道更好的答案,否则我认为是 "the" 答案。 该代码已经过测试,可以处理更大的集合,我无法发现错误。
static SortedDictionary<int, List<int>> NewSort(SortedDictionary<int, List<int>> trees)
{
bool debugmode = false;
//pre sort
List<int> indexTree = new List<int>();
foreach (KeyValuePair<int, List<int>> tree in trees)
{
indexTree.Add(tree.Key);
if(debugmode) Console.WriteLine("* " +DumpIntList(trees[tree.Key]));
}
for (int i = (indexTree.Count); i > 0; i--)
{
if (debugmode) Console.WriteLine(i.ToString());
List<int> tree = new List<int>();
tree = trees[indexTree[i-1]];
for (int j = 0; j < tree.Count; j++)
{
int c = tree[j];
for (int k = (i - 2); k > -1; k--) // k can be 0 but i can minimally be 1
{
List<int> compareTree = new List<int>();
compareTree = trees[indexTree[k]]; // for loop > checking all trees
if (compareTree.IndexOf(c) != -1) // found !
{
if (debugmode) Console.Write("found " + c.ToString() + " from ");
if (debugmode) Console.WriteLine(DumpIntList(tree) + " in (" + DumpIntList(compareTree)+ ")");
// tree.Remove(c); // or we would create a duplicate
compareTree.AddRange(tree);
compareTree = compareTree.Distinct().ToList();
compareTree.Sort();
trees.Remove(indexTree[i - 1]);
trees[indexTree[k]] = compareTree;
j =tree.Count; //break from more checks. maybe dirty code but it increases speed
break; //break checking loop on all trees for current tree
}
}
}
}
return trees;
}