条件重组 sortedDictionary (unionFind)

Conditional restructure a sortedDictionary (unionFind)

我编写了一些输出 SortedDictionary <int index, list<int>values>> 的 C# 代码,其中索引始终以列表<> 的最低整数开头。这是一个简短的输入示例(实际上输入更大):

index- Values<>
 2   - 2,3,6,7
 3   - 3,5
 5   - 5,7,9
11   - 11,12,12

现在我想在这里重新排序。这些值是 linked 索引。我想对它们进行排序,以便将连接的索引组合在一起,没有双重索引。这应该导致输出

 2 - 2,3,5,6,7,9
11 - 11,12

最初我在使用 foreach 处理 sortedDictionary 时遇到了问题,同时还减小了字典集的大小。我解决了这个问题,现在用我的最新代码更新这个问题的描述。它不再使用 foreach,一些排序问题现在需要修复,但作为副作用,它变得非常复杂和庞大。我怀疑它是否应该如此复杂,或者是否可以写得更短、更简单。

我称每个列表为 ,字典在哪里 Cursorc 我使用就像从屏幕上逐位读出文本。

目前我把它放在控制台应用程序的一个小概念功能代码中。只是为了检查是否一切正常。测试非常复杂,因为数字集可以很复杂地 linked。所以如果你有很多集合并且有很多数字应该如何排序,它就不会直接可见。因此,手动检查代码有效性和结果也不容易。

虽然我还不确定它现在是否确实 100% 有效。它比以前更好地工作。但我认为这段代码远非完美,因为我走了两次树。带有预排序和最终排序。

            static SortedDictionary<int, List<int>> NewSort(SortedDictionary<int, List<int>> trees)
    {
        bool debugmode = false;
        //pre sort
        List<int> indexTree = new List<int>();
        foreach (KeyValuePair<int, List<int>> tree in trees)
        {
            indexTree.Add(tree.Key);
        }
        for (int i = 0; i < indexTree.Count; i++)
        {
            int cursor = 1;
            List<int> tree = new List<int>();
            int index = indexTree[i];
            tree = trees[index];
            while ((tree !=null)&& (cursor<tree.Count) )
            {
                int c = tree[cursor ];
                if (trees.ContainsKey(c))
                {
                    if (trees[c] != null)
                    {
                        List<int> u = new List<int>();
                        u = trees[c];
                        tree.AddRange(u);
                        tree.Sort();
                        trees[index] = tree;
                        trees[c] = null;
                    }
                }
                cursor++;
            }
        }
        for (int i = trees.Count; i > 0; i--)
        {
            int c = indexTree[i - 1];
            if (trees[c] == null)
            { trees.Remove(indexTree[i - 1]); }
            else
            {
                trees[c] = trees[c].Distinct().ToList();   //removing all duplicates
            }
        }
        indexTree = new List<int>();

        //resort.
        foreach (KeyValuePair<int, List<int>> tree in trees)
        {
            indexTree.Add(tree.Key);
            if(debugmode) Console.WriteLine("* " +DumpIntList(trees[tree.Key]));
        }
        for (int i = (indexTree.Count); i > 0; i--)
        {
            if (debugmode) Console.WriteLine(i.ToString());
            List<int> tree = new List<int>();
            tree = trees[indexTree[i-1]];
            for (int j = 0; j < tree.Count; j++)
            {
                int c = tree[j];
                for (int k = (i - 2); k > 0; k--)
                {
                    List<int> compareTree = new List<int>();
                    compareTree = trees[indexTree[k]];
                    if (compareTree.IndexOf(c) != -1)   // found !
                    {
                        if (debugmode) Console.Write("found " + c.ToString() + " from ");
                        if (debugmode) Console.WriteLine(DumpIntList(tree) + " in (" + DumpIntList(compareTree)+ ")");
                        tree.Remove(c); // or we would create a duplicate
                        compareTree.AddRange(tree);
                        compareTree = compareTree.Distinct().ToList(); //removing doubles again, doubles as side effect of merging
                        compareTree.Sort();
                        trees.Remove(indexTree[i - 1]);
                        trees[indexTree[k]] = compareTree;
                    }
                }
            }
        }
        return trees;
    }

也许我试图让一些人明白这一点,我在这里试图做的是我试着看看系列是否有重叠的数字,如果是的话,合并它们。 每个系列始终排序并从该系列的最低编号开始。正如我最近发现的那样,这可能是 UnionFind 问题的一个版本。该问题也出现在 Blob 检测中,以及在一组网页中查找哪些网页是 link 彼此。 (但我的数据是一组奇怪的实验室测量值)。

难点是系列比较多,连起来可能不是很直接

1-3-4
4-7-9
11-12
would result in 2 series :
1) 1-3-4-7-9
2) 11-12
But after you add series 12-3 then it would all become one series.

更多测试数据:

 2 - 2,3,5,6,7           // note my data is always ordered like this
 5 - 5,7,9               // dictionary starts with lowest key
11 - 11,12,12,27,30,31   // each list inside a tree key 
22 - 22,27               // is ordered low to high
23 - 23,25               // lowest int, equals the dict key.
28 - 28,30
34 - 34

使用上述函数输出

 2 - 2,3,5,6,7,9
11 - 11,12,22,27,28,30,31
23 - 23,25
34 - 34

因此,虽然代码现在可以正常工作,但我非常怀疑它的理想代码,我两次激怒了树集。所以我想知道是否有更好的解决方案。也可能是代码没有做我希望它做的事情;因为我还在测试它。

除了反转 if 以避免 1 级嵌套,我还没有看到如何使用 LINQ 来提高此代码块的可读性。

        static SortedDictionary<int, List<int>> SortTree(SortedDictionary<int, List<int>> trees)
        {
            //SortedDictionary<int, List<int>> newtrees = new SortedDictionary<int, List<int>>();

            if (trees.Count < 2) { return trees; }  // dont process if ntrees contains 1 or 0 trees

            foreach (KeyValuePair<int, List<int>> singletree in trees)
            {
                int cursor = 1;
                bool nFinish = false;
                List<int> n = singletree.Value;
                if (n.Count <= 1) continue;
                while (nFinish == false)
                {
                    if (trees.ContainsKey(n[cursor]))
                    {
                        List<int> t = trees[n[cursor]];   // think of a screen cursor going over the list table
                        t.AddRange(n);
                        trees.Remove(n[cursor]);
                        n.Sort();
                        trees[singletree.Key] = n;
                    }
                    cursor++;
                    if (cursor != n.Count) continue;
                    nFinish = true;
                }
            }
            return trees;
        }

这是您的解决方案 test cases

using System;
using System.Collections.Generic;
using System.Linq;

namespace Demo
{
    public class Example
    {
        public static void Main()
        {
            SortedDictionary<int, List<int>> tempRepositary = new SortedDictionary<int, List<int>>();

            //test 1
            tempRepositary.Add(2, new List<int>(new[] { 2, 3, 5, 6, 7 }));
            tempRepositary.Add(5, new List<int>(new[] { 5, 7, 9 }));
            tempRepositary.Add(11, new List<int>(new[] { 11, 12, 12, 27, 30, 31 }));
            tempRepositary.Add(22, new List<int>(new[] { 22, 27 }));
            tempRepositary.Add(23, new List<int>(new[] { 23, 25 }));
            tempRepositary.Add(28, new List<int>(new[] { 28, 30 }));
            tempRepositary.Add(34, new List<int>(new[] { 34 }));

            //test 2
            //tempRepositary.Add(2, new List<int>(new[] { 2,3,6,7 }));
            //tempRepositary.Add(3, new List<int>(new[] { 3,5 }));
            //tempRepositary.Add(5, new List<int>(new[] { 5,7,9 }));
            //tempRepositary.Add(11, new List<int>(new[] { 11,12,12 }));

            var refreshOne = SortTree(tempRepositary);    

            foreach (var item in refreshOne)
            {
                Console.Write("Key:" + item.Key + " ");
                Console.WriteLine(string.Join(",", item.Value));                 
            }

            Console.ReadKey();
        }

        private static SortedDictionary<int, List<int>> SortTree(SortedDictionary<int, List<int>> trees)
        {
            if (trees.Count < 2) { return trees; }  // dont process if ntrees contains 1 or 0 trees

            SortedDictionary<int, List<int>> compressedTree
                = new SortedDictionary<int, List<int>>();

            var allKeys = trees.Keys.ToList();
            var allValues = trees.Values.ToList();

            for (int i = 0; i < allKeys.Count; i++)
            {
                var tempValues = allValues[i];
                var tempMax = tempValues.Max();

                for (int j = i + 1; j < allKeys.Count; )
                {
                    if (tempMax >= allKeys[j])
                    {
                        tempValues.AddRange(allValues[j]);
                        allKeys.Remove(allKeys[j]);
                        allValues.Remove(allValues[j]);
                        //
                        tempMax = tempValues.Max();
                        continue;
                    }
                    j++;
                }

                compressedTree.Add(allKeys[i], tempValues.Distinct().OrderBy(i1 => i1).ToList());
            }

            return compressedTree;
        }
    }
}

好吧,我减小了函数的大小并对其进行了改进。 它现在应该是对所有树木的单一刺激。 除非有人知道更好的答案,否则我认为是 "the" 答案。 该代码已经过测试,可以处理更大的集合,我无法发现错误。

    static SortedDictionary<int, List<int>> NewSort(SortedDictionary<int, List<int>> trees)
    {
        bool debugmode = false;
        //pre sort
        List<int> indexTree = new List<int>();
        foreach (KeyValuePair<int, List<int>> tree in trees)
        {
            indexTree.Add(tree.Key);
            if(debugmode) Console.WriteLine("* " +DumpIntList(trees[tree.Key]));
        }
        for (int i = (indexTree.Count); i > 0; i--)
        {
            if (debugmode) Console.WriteLine(i.ToString());
            List<int> tree = new List<int>();
            tree = trees[indexTree[i-1]];
            for (int j = 0; j < tree.Count; j++)
            {
                int c = tree[j];
                for (int k = (i - 2); k > -1; k--)      // k can be 0 but i can minimally be 1
                {
                    List<int> compareTree = new List<int>();
                    compareTree = trees[indexTree[k]];  // for loop > checking all trees
                    if (compareTree.IndexOf(c) != -1)   // found !
                    {
                        if (debugmode) Console.Write("found " + c.ToString() + " from ");
                        if (debugmode) Console.WriteLine(DumpIntList(tree) + " in (" + DumpIntList(compareTree)+ ")");
                      //  tree.Remove(c);               // or we would create a duplicate
                        compareTree.AddRange(tree);
                        compareTree = compareTree.Distinct().ToList();

                        compareTree.Sort();
                        trees.Remove(indexTree[i - 1]);
                        trees[indexTree[k]] = compareTree;
                        j =tree.Count;                  //break from more checks. maybe dirty code but it increases speed
                        break;                          //break checking loop on all trees for current tree
                    }
                }
            }
        }
        return trees;
    }