C# - ConcurrentBag 与 List 的性能比较
C# - Performance comparison of ConcurrentBag vs List
前言:我问这个只是因为我没有环境(足够大的数据集 + 计算能力)以可靠的方式对其进行测试。
问题:给出 Concurrent Bag, loaded with billions of items, being accessed/used by a single thread, does it perform similar to a List?换句话说,Concurrent Bag
上的枚举是否比 List<T>
上的枚举性能更高或更低?
ConcurrentBag<T>
的性能将不可避免地低于 List<T>
。虽然您只能从单个线程访问它,但该结构仍然需要有适当的机制来防止出现并发访问时出现竞争危险的可能性。
如果您要在 开始枚举之前从单个线程加载集合,则可以通过使用 ConcurrentBag(IEnumerable<T>)
构造函数来避免性能开销,而不是添加每个项目单独通过其 Add
方法。
ConcurrentBag<T>
为枚举提供“即时快照”语义;请参阅其 GetEnumerator
方法的备注。当您从 foreach
循环访问 ConcurrentBag<T>
时,它会首先将其全部内容复制到一个普通的 List<T>
中,然后对其进行枚举。每次在循环中使用它时,都会产生大量的性能开销(计算和内存方面)。
如果您的场景是您的列表将由多个线程填充,但随后仅由一个线程读取,那么您应该在写入完成后立即将其转换为 List<T>
。
数十亿项和List或并发包?那是一个"no go".
就性能而言,试试这个来测试添加:(随意修改这个以测试其他操作)
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace ConcurrentBagTest
{
// You must compile this for x64 or you will get OutOfMemory exception
class Program
{
static void Main(string[] args)
{
ListTest(10000000);
ListTest(100000000);
ListTest(1000000000);
ConcurrentBagTest(10000000);
ConcurrentBagTest(100000000);
Console.ReadKey();
}
static void ConcurrentBagTest(long count)
{
try
{
var bag = new ConcurrentBag<long>();
Console.WriteLine($"--- ConcurrentBagTest count = {count}");
Console.WriteLine($"I will use {(count * sizeof(long)) / Math.Pow(1024, 2)} MiB of RAM");
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
for (long i = 0; i < count; i++)
{
bag.Add(i);
}
stopwatch.Stop();
Console.WriteLine($"Inserted {bag.LongCount()} items in {stopwatch.Elapsed.TotalSeconds} s");
Console.WriteLine();
Console.WriteLine();
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
GC.Collect();
GC.WaitForPendingFinalizers();
}
static void ListTest(long count)
{
try
{
var list = new List<long>();
Console.WriteLine($"--- ListTest count = {count}");
Console.WriteLine($"I will use {(count * sizeof(long)) / Math.Pow(1024, 2)} MiB of RAM");
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
for (long i = 0; i < count; i++)
{
list.Add(i);
}
stopwatch.Stop();
Console.WriteLine($"Inserted {list.LongCount()} items in {stopwatch.Elapsed.TotalSeconds} s");
Console.WriteLine();
Console.WriteLine();
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
}
我的输出:
--- ListTest count = 10000000
I will use 76,2939453125 MiB of RAM
Inserted 10000000 items in 0,0807315 s
--- ListTest count = 100000000
I will use 762,939453125 MiB of RAM
Inserted 100000000 items in 0,7741546 s
--- ListTest count = 1000000000
I will use 7629,39453125 MiB of RAM
System.OutOfMemoryException: Array dimensions exceeded supported range.
--- ConcurrentBagTest count = 10000000
I will use 76,2939453125 MiB of RAM
Inserted 10000000 items in 1,0744069 s
--- ConcurrentBagTest count = 100000000
I will use 762,939453125 MiB of RAM
Inserted 100000000 items in 11,3976436 s
使用 CPU:Intel Core i7-2600 @ 3.4 GHz,
使用 RAM:16 GB
另请查看 this answer 了解限制。
但是,如果您需要删除项目,ConcurrentBag 比 List 快得多
void Main()
{
ConcurrentBag<int> bag = new ConcurrentBag<int>();
ConcurrentStack<int> stack = new ConcurrentStack<int>();
ConcurrentQueue<int> q = new ConcurrentQueue<int>();
List<int> list = new List<int>();
Stopwatch sw = new Stopwatch();
int count = 100000;
sw.Start();
for (int i = 0; i < count; i++)
{
bag.Add(i);
}
for (int i = 0; i< count; i++)
{
bag.TryTake(out _);
}
sw.Elapsed.Dump("BAG");
sw.Start();
for (int i = 0; i < count; i++)
{
stack.Push(i);
}
for (int i = 0; i < count; i++)
{
stack.TryPop(out _);
}
sw.Elapsed.Dump("Stack");
sw.Start();
for (int i = 0; i < count; i++)
{
q.Enqueue(i);
}
for (int i = 0; i < count; i++)
{
q.TryDequeue(out _);
}
sw.Elapsed.Dump("Q");
sw.Start();
for (int i = 0; i < count; i++)
{
list.Add(i);
}
for (int i = 0; i < count; i++)
{
list.RemoveAt(0);
}
sw.Elapsed.Dump("list remove at 0");
sw.Start();
for (int i = 0; i < count; i++)
{
list.Add(i);
}
for (int i = 0; i < count; i++)
{
list.RemoveAt(list.Count -1);
}
sw.Elapsed.Dump("list remove at end");
}
结果:
包
00:00:00.0144421
堆栈
00:00:00.0341379
Q
00:00:00.0400114
列表在 0 处删除
00:00:00.6188329
最后删除列表
00:00:00.6202170
前言:我问这个只是因为我没有环境(足够大的数据集 + 计算能力)以可靠的方式对其进行测试。
问题:给出 Concurrent Bag, loaded with billions of items, being accessed/used by a single thread, does it perform similar to a List?换句话说,Concurrent Bag
上的枚举是否比 List<T>
上的枚举性能更高或更低?
ConcurrentBag<T>
的性能将不可避免地低于 List<T>
。虽然您只能从单个线程访问它,但该结构仍然需要有适当的机制来防止出现并发访问时出现竞争危险的可能性。
如果您要在 开始枚举之前从单个线程加载集合,则可以通过使用 ConcurrentBag(IEnumerable<T>)
构造函数来避免性能开销,而不是添加每个项目单独通过其 Add
方法。
ConcurrentBag<T>
为枚举提供“即时快照”语义;请参阅其 GetEnumerator
方法的备注。当您从 foreach
循环访问 ConcurrentBag<T>
时,它会首先将其全部内容复制到一个普通的 List<T>
中,然后对其进行枚举。每次在循环中使用它时,都会产生大量的性能开销(计算和内存方面)。
如果您的场景是您的列表将由多个线程填充,但随后仅由一个线程读取,那么您应该在写入完成后立即将其转换为 List<T>
。
数十亿项和List或并发包?那是一个"no go".
就性能而言,试试这个来测试添加:(随意修改这个以测试其他操作)
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace ConcurrentBagTest
{
// You must compile this for x64 or you will get OutOfMemory exception
class Program
{
static void Main(string[] args)
{
ListTest(10000000);
ListTest(100000000);
ListTest(1000000000);
ConcurrentBagTest(10000000);
ConcurrentBagTest(100000000);
Console.ReadKey();
}
static void ConcurrentBagTest(long count)
{
try
{
var bag = new ConcurrentBag<long>();
Console.WriteLine($"--- ConcurrentBagTest count = {count}");
Console.WriteLine($"I will use {(count * sizeof(long)) / Math.Pow(1024, 2)} MiB of RAM");
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
for (long i = 0; i < count; i++)
{
bag.Add(i);
}
stopwatch.Stop();
Console.WriteLine($"Inserted {bag.LongCount()} items in {stopwatch.Elapsed.TotalSeconds} s");
Console.WriteLine();
Console.WriteLine();
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
GC.Collect();
GC.WaitForPendingFinalizers();
}
static void ListTest(long count)
{
try
{
var list = new List<long>();
Console.WriteLine($"--- ListTest count = {count}");
Console.WriteLine($"I will use {(count * sizeof(long)) / Math.Pow(1024, 2)} MiB of RAM");
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
for (long i = 0; i < count; i++)
{
list.Add(i);
}
stopwatch.Stop();
Console.WriteLine($"Inserted {list.LongCount()} items in {stopwatch.Elapsed.TotalSeconds} s");
Console.WriteLine();
Console.WriteLine();
}
catch (Exception ex)
{
Console.WriteLine(ex.ToString());
}
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
}
我的输出:
--- ListTest count = 10000000
I will use 76,2939453125 MiB of RAM
Inserted 10000000 items in 0,0807315 s
--- ListTest count = 100000000
I will use 762,939453125 MiB of RAM
Inserted 100000000 items in 0,7741546 s
--- ListTest count = 1000000000
I will use 7629,39453125 MiB of RAM
System.OutOfMemoryException: Array dimensions exceeded supported range.
--- ConcurrentBagTest count = 10000000
I will use 76,2939453125 MiB of RAM
Inserted 10000000 items in 1,0744069 s
--- ConcurrentBagTest count = 100000000
I will use 762,939453125 MiB of RAM
Inserted 100000000 items in 11,3976436 s
使用 CPU:Intel Core i7-2600 @ 3.4 GHz,
使用 RAM:16 GB
另请查看 this answer 了解限制。
但是,如果您需要删除项目,ConcurrentBag 比 List 快得多
void Main()
{
ConcurrentBag<int> bag = new ConcurrentBag<int>();
ConcurrentStack<int> stack = new ConcurrentStack<int>();
ConcurrentQueue<int> q = new ConcurrentQueue<int>();
List<int> list = new List<int>();
Stopwatch sw = new Stopwatch();
int count = 100000;
sw.Start();
for (int i = 0; i < count; i++)
{
bag.Add(i);
}
for (int i = 0; i< count; i++)
{
bag.TryTake(out _);
}
sw.Elapsed.Dump("BAG");
sw.Start();
for (int i = 0; i < count; i++)
{
stack.Push(i);
}
for (int i = 0; i < count; i++)
{
stack.TryPop(out _);
}
sw.Elapsed.Dump("Stack");
sw.Start();
for (int i = 0; i < count; i++)
{
q.Enqueue(i);
}
for (int i = 0; i < count; i++)
{
q.TryDequeue(out _);
}
sw.Elapsed.Dump("Q");
sw.Start();
for (int i = 0; i < count; i++)
{
list.Add(i);
}
for (int i = 0; i < count; i++)
{
list.RemoveAt(0);
}
sw.Elapsed.Dump("list remove at 0");
sw.Start();
for (int i = 0; i < count; i++)
{
list.Add(i);
}
for (int i = 0; i < count; i++)
{
list.RemoveAt(list.Count -1);
}
sw.Elapsed.Dump("list remove at end");
}
结果:
包 00:00:00.0144421
堆栈 00:00:00.0341379
Q 00:00:00.0400114
列表在 0 处删除 00:00:00.6188329
最后删除列表 00:00:00.6202170