为什么在 ArrayPool 上分配比在 Stack 上分配更快?
Why allocation on ArrayPool is faster then allocation on Stack?
我有以下基准测试,它使用堆栈分配、堆分配和 ArrayPool 分配从文件中读取字符串。
我预计 Stack 分配是最快的,因为它只是堆栈指针递增,但根据基准 ArrayPool 稍快一些。
怎么可能?
static void Main(string[] args)
{
BenchmarkRunner.Run<BenchmarkRead>();
}
using BenchmarkDotNet.Attributes;
using System;
using System.Buffers;
using System.IO;
using System.Linq;
namespace RealTime.Benchmark
{
[MemoryDiagnoser]
public class BenchmarkRead
{
const string TestFile = "TestFiles/animals.txt";
public BenchmarkRead()
{
Directory.CreateDirectory(Path.GetDirectoryName(TestFile));
// cca 100 KB of text
string content = string.Concat(Enumerable.Repeat("dog,cat,spider,cat,bird,", 4000));
File.WriteAllText(TestFile, content);
}
[Benchmark]
public void ReadFileOnPool() => ReadFileOnPool(TestFile);
[Benchmark]
public void ReadFileOnHeap() => ReadFileOnHeap(TestFile);
[Benchmark]
public void ReadFileOnStack() => ReadFileOnStack(TestFile);
public void ReadFileOnHeap(string filename)
{
string text = File.ReadAllText(filename);
// ....call parse
}
public void ReadFileOnStack(string filename)
{
Span<byte> span = stackalloc byte[1024 * 200];
using (var stream = File.OpenRead(filename))
{
int count = stream.Read(span);
if (count == span.Length)
throw new Exception($"Buffer size {span.Length} too small, use array pooling.");
span = span.Slice(0, count);
// ....call parse
}
}
public void ReadFileOnPool(string filename)
{
ArrayPool<byte> pool = ArrayPool<byte>.Shared;
using (var stream = File.OpenRead(filename))
{
long len = stream.Length;
byte[] buffer = pool.Rent((int)len);
try
{
int count = stream.Read(buffer, 0, (int)len);
if (count != len)
throw new Exception($"{count} != {len}");
Span<byte> span = new Span<byte>(buffer).Slice(0, count);
// ....call parse
}
finally
{
pool.Return(buffer);
}
}
}
}
}
结果:
| Method | Mean | Gen 0/1k Op | Gen 2/1k Op |Al. memory/Op|
|---------------- |---------:|------------:|------------:|------------:|
| ReadFileOnPool | 109.9 us | 0.1221 | - | 480 B |
| ReadFileOnHeap | 506.0 us | 87.8906 | 58.5938 | 393440 B |
| ReadFileOnStack | 115.2 us | 0.1221 | - | 480 B |
Span<byte> span = stackalloc byte[1024 * 200]
由于 InitLocals.
将被零初始化
byte[] buffer = pool.Rent((int)len);
根本不会被零初始化。
所以你已经达到零初始化本地数组的成本比整个 Rent()
例程更昂贵的地步。
我实际上在几个月前为此专门创建了一个 nuget 包 https://github.com/josetr/InitLocals but we'll soon have something similar from Microsoft as well: https://github.com/dotnet/corefx/issues/29026。
我有以下基准测试,它使用堆栈分配、堆分配和 ArrayPool 分配从文件中读取字符串。
我预计 Stack 分配是最快的,因为它只是堆栈指针递增,但根据基准 ArrayPool 稍快一些。
怎么可能?
static void Main(string[] args)
{
BenchmarkRunner.Run<BenchmarkRead>();
}
using BenchmarkDotNet.Attributes;
using System;
using System.Buffers;
using System.IO;
using System.Linq;
namespace RealTime.Benchmark
{
[MemoryDiagnoser]
public class BenchmarkRead
{
const string TestFile = "TestFiles/animals.txt";
public BenchmarkRead()
{
Directory.CreateDirectory(Path.GetDirectoryName(TestFile));
// cca 100 KB of text
string content = string.Concat(Enumerable.Repeat("dog,cat,spider,cat,bird,", 4000));
File.WriteAllText(TestFile, content);
}
[Benchmark]
public void ReadFileOnPool() => ReadFileOnPool(TestFile);
[Benchmark]
public void ReadFileOnHeap() => ReadFileOnHeap(TestFile);
[Benchmark]
public void ReadFileOnStack() => ReadFileOnStack(TestFile);
public void ReadFileOnHeap(string filename)
{
string text = File.ReadAllText(filename);
// ....call parse
}
public void ReadFileOnStack(string filename)
{
Span<byte> span = stackalloc byte[1024 * 200];
using (var stream = File.OpenRead(filename))
{
int count = stream.Read(span);
if (count == span.Length)
throw new Exception($"Buffer size {span.Length} too small, use array pooling.");
span = span.Slice(0, count);
// ....call parse
}
}
public void ReadFileOnPool(string filename)
{
ArrayPool<byte> pool = ArrayPool<byte>.Shared;
using (var stream = File.OpenRead(filename))
{
long len = stream.Length;
byte[] buffer = pool.Rent((int)len);
try
{
int count = stream.Read(buffer, 0, (int)len);
if (count != len)
throw new Exception($"{count} != {len}");
Span<byte> span = new Span<byte>(buffer).Slice(0, count);
// ....call parse
}
finally
{
pool.Return(buffer);
}
}
}
}
}
结果:
| Method | Mean | Gen 0/1k Op | Gen 2/1k Op |Al. memory/Op|
|---------------- |---------:|------------:|------------:|------------:|
| ReadFileOnPool | 109.9 us | 0.1221 | - | 480 B |
| ReadFileOnHeap | 506.0 us | 87.8906 | 58.5938 | 393440 B |
| ReadFileOnStack | 115.2 us | 0.1221 | - | 480 B |
Span<byte> span = stackalloc byte[1024 * 200]
由于 InitLocals. 将被零初始化
byte[] buffer = pool.Rent((int)len);
根本不会被零初始化。
所以你已经达到零初始化本地数组的成本比整个 Rent()
例程更昂贵的地步。
我实际上在几个月前为此专门创建了一个 nuget 包 https://github.com/josetr/InitLocals but we'll soon have something similar from Microsoft as well: https://github.com/dotnet/corefx/issues/29026。