是否可以使用 Parallel.ForEach 加快排序算法
Is it possible to speed up sorting algorithm with Parallel.ForEach
我正在尝试了解 "Parallel.ForEach" 函数的工作原理。我已经阅读了有关它的 MSDN 并且作为一个整体理解似乎,与普通的 foreach 循环相比,您在循环中进行的计算越多,它的运行速度就越快。
当前计算机有 4 个内核。
(我的24核电脑两天就装完了)
但是我认为我需要一些专业知识来了解这方面的知识。我有一个排序算法,可以在 1 个核心上进行大约 10 秒的排序。
我已经放置了一个完整的代码,其中我在一个核心上执行它,我在一个:Parallel.ForEach 循环中使用 4 个核心。
我只是想知道是否有可能以任何可能的方式使用更多内核来加速这种排序?
运行 testsortFunction() 产生以下结果:
void testsortFunction()
{
String resultstring1 = ""; String resultstring2 = "";
resultstring1 = sortingtestBENCHMARKS(false);
resultstring2 = sortingtestBENCHMARKS(true);
MessageBox.Show(resultstring1 + "\n\n" + resultstring2);
}
String sortingtestBENCHMARKS(bool useParallel)
{
List<String> sortedLIST = new List<String>();
List<String> minusLIST = new List<String>();
List<String> plusLIST = new List<String>();
var stopwatch = new Stopwatch();
//Add 3 million elements
for (double i = 0; i < 1000000; i += 1)
{
plusLIST.Add(i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
for (double i = 1; i < 2000000; i += 1)
{
minusLIST.Add("-" + i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
//Do the sorting!
if (useParallel == false)
{
stopwatch.Start();
sortedLIST = sortLIST(minusLIST, plusLIST); //11 seconds
stopwatch.Stop();
}
else
{
stopwatch.Start();
Parallel.ForEach("dummy", (c) =>
{
sortedLIST = sortLIST(minusLIST, plusLIST); //32 seconds
});
stopwatch.Stop();
}
return "Elapsed Times in seconds(Using Parallel: " + useParallel + "):\n\n" + stopwatch.Elapsed.TotalSeconds; //10.57 seconds
}
List<String> sortLIST(List<String> minusLIST, List<String> plusLIST)
{
plusLIST = plusLIST.OrderBy(i => double.Parse(i.Split(',')[0])).ToList();plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(i.Split(',')[0].TrimStart('-'))).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
Parallel.ForEach
对于可分割问题非常有用:也就是说,您可以将问题拆分为完全不同的子问题。它的第一个参数是一个可枚举的集合,集合中的每一项代表一个分区。
对单个项目列表进行排序不可分区。您的并行化代码只是重复了很多次排序。
顺便说一句,考虑使用 list.Sort()
method of dotnet 进行排序。非常聪明的人已经优化了它。当被排序的项目不是简单的字符串或标量时,你可以提供一个比较函数。
我 运行 一些基准测试,并设法将其速度提高了 2 倍...
public List<String> ParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.AsParallel().OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.AsParallel().OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
这是基准测试,使用 BenchmarkDotNet。
(我将列表大小缩小了 10 倍,因为基准测试需要很长时间,我今晚想回家)。
| Method | Mean | Error | StdDev | Median | Gen 0/1k Op | Gen 1/1k Op | Gen 2/1k Op | Allocated Memory/Op |
|----------------------- |----------:|---------:|----------:|----------:|------------:|------------:|------------:|--------------------:|
| OPSort | 142.35 ms | 5.150 ms | 14.693 ms | 137.40 ms | 29000.0000 | 1000.0000 | - | 135.99 MB |
| OPSortByDescendingLazy | 118.19 ms | 2.672 ms | 7.752 ms | 117.01 ms | 29000.0000 | 1000.0000 | - | 127.32 MB |
| SlightlyParallelSort | 122.62 ms | 3.063 ms | 8.538 ms | 120.45 ms | 29000.0000 | 1000.0000 | - | 127.32 MB |
| ParallelSort | 71.37 ms | 2.190 ms | 6.389 ms | 70.30 ms | 28000.0000 | 1000.0000 | - | 148.63 MB |
| RegexSort | 250.80 ms | 4.887 ms | 7.315 ms | 251.70 ms | 32000.0000 | 1000.0000 | - | 145.99 MB |
和代码:
[MemoryDiagnoser]
public class Tests
{
private List<String> minusLIST = new List<string>(100000);
private List<String> plusLIST = new List<string>(200000);
[IterationSetup]
public void IterationSetup()
{
plusLIST.Clear();
minusLIST.Clear();
//Add 3 million elements
for (double i = 0; i < 100000; i += 1)
{
plusLIST.Add(i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
for (double i = 1; i < 200000; i += 1)
{
minusLIST.Add("-" + i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
}
[Benchmark]
public List<String> OPSort()
{
plusLIST = plusLIST.OrderBy(i => double.Parse(i.Split(',')[0])).ToList(); plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(i.Split(',')[0].TrimStart('-'))).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
[Benchmark]
public List<String> OPSortByDescendingLazy()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
result.AddRange(plusLIST.OrderByDescending(i => int.Parse(i.Split(',')[0])));
result.AddRange(minusLIST.OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
return result;
}
[Benchmark]
public List<String> SlightlyParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
[Benchmark]
public List<String> ParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.AsParallel().OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.AsParallel().OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
private static readonly Regex splitRegex = new Regex(@"^(\d+)");
private static readonly Regex splitWithMinusRegex = new Regex(@"^-(\d+)");
// To test the suggestion from @PanagiotisKanavos
[Benchmark]
public List<String> RegexSort()
{
plusLIST = plusLIST.OrderBy(i => double.Parse(splitRegex.Match(i).Groups[1].Value)).ToList(); plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(splitWithMinusRegex.Match(i).Groups[1].Value)).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
}
class Program
{
static void Main(string[] args)
{
var summary = BenchmarkRunner.Run<Tests>();
Console.WriteLine("========= DONE! ============");
Console.ReadLine();
}
}
可能可以进一步加快速度:下一步是开发一个分析器。
请注意,您的数组足够大,可以分配到大对象堆上,这通常是个坏消息。您希望尽可能多地避免此类分配,因此不要让您的列表自行确定它们需要的大小。
我正在尝试了解 "Parallel.ForEach" 函数的工作原理。我已经阅读了有关它的 MSDN 并且作为一个整体理解似乎,与普通的 foreach 循环相比,您在循环中进行的计算越多,它的运行速度就越快。
当前计算机有 4 个内核。 (我的24核电脑两天就装完了)
但是我认为我需要一些专业知识来了解这方面的知识。我有一个排序算法,可以在 1 个核心上进行大约 10 秒的排序。
我已经放置了一个完整的代码,其中我在一个核心上执行它,我在一个:Parallel.ForEach 循环中使用 4 个核心。
我只是想知道是否有可能以任何可能的方式使用更多内核来加速这种排序?
运行 testsortFunction() 产生以下结果:
void testsortFunction()
{
String resultstring1 = ""; String resultstring2 = "";
resultstring1 = sortingtestBENCHMARKS(false);
resultstring2 = sortingtestBENCHMARKS(true);
MessageBox.Show(resultstring1 + "\n\n" + resultstring2);
}
String sortingtestBENCHMARKS(bool useParallel)
{
List<String> sortedLIST = new List<String>();
List<String> minusLIST = new List<String>();
List<String> plusLIST = new List<String>();
var stopwatch = new Stopwatch();
//Add 3 million elements
for (double i = 0; i < 1000000; i += 1)
{
plusLIST.Add(i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
for (double i = 1; i < 2000000; i += 1)
{
minusLIST.Add("-" + i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
//Do the sorting!
if (useParallel == false)
{
stopwatch.Start();
sortedLIST = sortLIST(minusLIST, plusLIST); //11 seconds
stopwatch.Stop();
}
else
{
stopwatch.Start();
Parallel.ForEach("dummy", (c) =>
{
sortedLIST = sortLIST(minusLIST, plusLIST); //32 seconds
});
stopwatch.Stop();
}
return "Elapsed Times in seconds(Using Parallel: " + useParallel + "):\n\n" + stopwatch.Elapsed.TotalSeconds; //10.57 seconds
}
List<String> sortLIST(List<String> minusLIST, List<String> plusLIST)
{
plusLIST = plusLIST.OrderBy(i => double.Parse(i.Split(',')[0])).ToList();plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(i.Split(',')[0].TrimStart('-'))).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
Parallel.ForEach
对于可分割问题非常有用:也就是说,您可以将问题拆分为完全不同的子问题。它的第一个参数是一个可枚举的集合,集合中的每一项代表一个分区。
对单个项目列表进行排序不可分区。您的并行化代码只是重复了很多次排序。
顺便说一句,考虑使用 list.Sort()
method of dotnet 进行排序。非常聪明的人已经优化了它。当被排序的项目不是简单的字符串或标量时,你可以提供一个比较函数。
我 运行 一些基准测试,并设法将其速度提高了 2 倍...
public List<String> ParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.AsParallel().OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.AsParallel().OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
这是基准测试,使用 BenchmarkDotNet。
(我将列表大小缩小了 10 倍,因为基准测试需要很长时间,我今晚想回家)。
| Method | Mean | Error | StdDev | Median | Gen 0/1k Op | Gen 1/1k Op | Gen 2/1k Op | Allocated Memory/Op |
|----------------------- |----------:|---------:|----------:|----------:|------------:|------------:|------------:|--------------------:|
| OPSort | 142.35 ms | 5.150 ms | 14.693 ms | 137.40 ms | 29000.0000 | 1000.0000 | - | 135.99 MB |
| OPSortByDescendingLazy | 118.19 ms | 2.672 ms | 7.752 ms | 117.01 ms | 29000.0000 | 1000.0000 | - | 127.32 MB |
| SlightlyParallelSort | 122.62 ms | 3.063 ms | 8.538 ms | 120.45 ms | 29000.0000 | 1000.0000 | - | 127.32 MB |
| ParallelSort | 71.37 ms | 2.190 ms | 6.389 ms | 70.30 ms | 28000.0000 | 1000.0000 | - | 148.63 MB |
| RegexSort | 250.80 ms | 4.887 ms | 7.315 ms | 251.70 ms | 32000.0000 | 1000.0000 | - | 145.99 MB |
和代码:
[MemoryDiagnoser]
public class Tests
{
private List<String> minusLIST = new List<string>(100000);
private List<String> plusLIST = new List<string>(200000);
[IterationSetup]
public void IterationSetup()
{
plusLIST.Clear();
minusLIST.Clear();
//Add 3 million elements
for (double i = 0; i < 100000; i += 1)
{
plusLIST.Add(i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
for (double i = 1; i < 200000; i += 1)
{
minusLIST.Add("-" + i + ",awb/aje" + " - " + "ddfas/asa" + " - " + "asoo/qwa");
}
}
[Benchmark]
public List<String> OPSort()
{
plusLIST = plusLIST.OrderBy(i => double.Parse(i.Split(',')[0])).ToList(); plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(i.Split(',')[0].TrimStart('-'))).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
[Benchmark]
public List<String> OPSortByDescendingLazy()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
result.AddRange(plusLIST.OrderByDescending(i => int.Parse(i.Split(',')[0])));
result.AddRange(minusLIST.OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
return result;
}
[Benchmark]
public List<String> SlightlyParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
[Benchmark]
public List<String> ParallelSort()
{
var result = new List<string>(plusLIST.Count + minusLIST.Count);
var t1 = Task.Run(() => plusLIST.AsParallel().OrderByDescending(i => int.Parse(i.Split(',')[0])));
var t2 = Task.Run(() => minusLIST.AsParallel().OrderBy(i => int.Parse(i.Split(',')[0].TrimStart('-'))));
Task.WaitAll(t1, t2);
result.AddRange(t1.Result);
result.AddRange(t2.Result);
return result;
}
private static readonly Regex splitRegex = new Regex(@"^(\d+)");
private static readonly Regex splitWithMinusRegex = new Regex(@"^-(\d+)");
// To test the suggestion from @PanagiotisKanavos
[Benchmark]
public List<String> RegexSort()
{
plusLIST = plusLIST.OrderBy(i => double.Parse(splitRegex.Match(i).Groups[1].Value)).ToList(); plusLIST.Reverse();
minusLIST = minusLIST.OrderBy(i => double.Parse(splitWithMinusRegex.Match(i).Groups[1].Value)).ToList();
plusLIST.AddRange(minusLIST);
return plusLIST;
}
}
class Program
{
static void Main(string[] args)
{
var summary = BenchmarkRunner.Run<Tests>();
Console.WriteLine("========= DONE! ============");
Console.ReadLine();
}
}
可能可以进一步加快速度:下一步是开发一个分析器。
请注意,您的数组足够大,可以分配到大对象堆上,这通常是个坏消息。您希望尽可能多地避免此类分配,因此不要让您的列表自行确定它们需要的大小。