不安全 直接从数组创建字符串
Unsafe Create string directly from Array
目前我知道的将字符串首字母大写的最快方法如下:
var array = str.ToCharArray();
array[0] = char.ToUpper(array[0]);
return new string(array);
这涉及 2 个数组分配:char.ToUpper(array[0])
和复制到 new string(array)
中的字符串内部缓冲区。
既然我知道 array
不会逃避这个方法,有没有办法使用不安全的代码来避免第二次分配?
感谢@SomeBody 为我指明了正确的方向:
public static string ToUpperFirstLetter(string str)
{
if (string.IsNullOrEmpty(str))
throw new ArgumentException(str);
return string.Create(str.Length, str, (span, str) =>
{
str.AsSpan().CopyTo(span);
span[0] = char.ToUpper(span[0]);
});
}
既然您要求的是“最快”方式,那么让我们对一些事情进行基准测试。毕竟我们作为程序员喜欢使用经验证据*令人信服地点头*
老派
public string OldSchool(string value)
=> char.ToUpper(value[0]) + value[1..];
跨度
public string TestSpan(string value)
=> string.Create(value.Length, value, (span, str) =>
{
str.AsSpan().CopyTo(span);
span[0] = char.ToUpper(span[0]);
});
不安全
public unsafe string TestUnsafe(string value)
{
var result = new string(value);
fixed (char* p = result) p[0] = char.ToUpper(p[0]);
return result;
}
仅不安全的 Ascii
public unsafe string TestUnsafeAscii(string value)
{
var result = new string(value);
fixed (char* p = result)
if(p[0] >= 'a' && p[0] <= 'z') *p += (char)32;
return result;
}
超级不安全
注意:这是给超级勇者的,字符串不转位,变异不是问题
public unsafe string SuperUnsafe(string value)
{
fixed (char* p = value) p[0] = char.ToUpper(p[0]);
return value;
}
基准
配置
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.804 (2004/?/20H1)
AMD Ryzen 9 3900X, 1 CPU, 24 logical and 12 physical cores
.NET Core SDK=5.0.201
[Host] : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT
.NET Core 5.0 : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT
Job=.NET Core 5.0 Runtime=.NET Core 5.0
结果
Method
N
Mean
Error
StdDev
Ratio
Gen 0
Gen 1
Gen 2
Allocated
OldSchool
5
44.89 ns
0.389 ns
0.364 ns
1.00
0.0105
-
-
88 B
Span
5
26.37 ns
0.170 ns
0.159 ns
1.00
0.0038
-
-
32 B
Unsafe
5
25.15 ns
0.128 ns
0.119 ns
1.00
0.0038
-
-
32 B
UnsafeAscii
5
11.92 ns
0.093 ns
0.073 ns
1.00
0.0038
-
-
32 B
SuperUnsafe
5
10.22 ns
0.051 ns
0.045 ns
1.00
-
-
-
-
Method
N
Mean
Error
StdDev
Ratio
Gen 0
Gen 1
Gen 2
Allocated
OldSchool
10
49.31 ns
0.595 ns
0.527 ns
1.00
0.0134
-
-
112 B
Span
10
27.71 ns
0.548 ns
0.512 ns
1.00
0.0057
-
-
48 B
Unsafe
10
26.76 ns
0.142 ns
0.126 ns
1.00
0.0057
-
-
48 B
UnsafeAscii
10
13.40 ns
0.103 ns
0.096 ns
1.00
0.0057
-
-
48 B
SuperUnsafe
10
10.28 ns
0.106 ns
0.094 ns
1.00
-
-
-
-
Method
N
Mean
Error
StdDev
Ratio
Gen 0
Gen 1
Gen 2
Allocated
OldSchool
100
83.52 ns
0.966 ns
0.903 ns
1.00
0.0564
-
-
472 B
Span
100
45.77 ns
0.441 ns
0.412 ns
1.00
0.0268
-
-
224 B
Unsafe
100
44.07 ns
0.511 ns
0.453 ns
1.00
0.0268
-
-
224 B
UnsafeAscii
100
31.45 ns
0.382 ns
0.357 ns
1.00
0.0268
-
-
224 B
SuperUnsafe
100
10.26 ns
0.078 ns
0.073 ns
1.00
-
-
-
-
Method
N
Mean
Error
StdDev
Ratio
Gen 0
Gen 1
Gen 2
Allocated
OldSchool
1000
512.05 ns
2.909 ns
2.578 ns
1.00
0.4864
0.0052
-
4072 B
Span
1000
260.35 ns
2.593 ns
2.425 ns
1.00
0.2418
0.0017
-
2024 B
Unsafe
1000
255.06 ns
1.587 ns
1.407 ns
1.00
0.2418
0.0017
-
2024 B
UnsafeAscii
1000
247.60 ns
2.500 ns
2.338 ns
1.00
0.2418
0.0017
-
2024 B
SuperUnsafe
1000
10.21 ns
0.060 ns
0.056 ns
1.00
-
-
-
-
完整测试代码
public class Test
{
private string _data;
private static readonly Random random = new Random(42);
public static string RandomString(int length)
{
const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
return new string(Enumerable.Repeat(chars, length)
.Select(s => s[random.Next(s.Length)]).ToArray());
}
[Params(5, 10, 100, 1000)] public int N;
[GlobalSetup]
public void Setup()
{
_data = RandomString(N);
}
[Benchmark]
public string OldSchool() => OldSchool(_data);
public string OldSchool(string value)
=> char.ToUpper(value[0]) + value[1..];
[Benchmark]
public string Span() => TestSpan(_data);
public string TestSpan(string value)
=> string.Create(value.Length, value, (span, str) =>
{
str.AsSpan().CopyTo(span);
span[0] = char.ToUpper(span[0]);
});
[Benchmark]
public string Unsafe() => TestUnsafe(_data);
public unsafe string TestUnsafe(string value)
{
var result = new string(value);
fixed (char* p = result) p[0] = char.ToUpper(p[0]);
return result;
}
[Benchmark]
public unsafe string SuperUnsafe() => SuperUnsafe(_data);
public unsafe string SuperUnsafe(string value)
{
fixed (char* p = value) p[0] = char.ToUpper(p[0]);
return value;
}
[Benchmark]
public string UnsafeAscii() => TestUnsafeAscii(_data);
public unsafe string TestUnsafeAscii(string value)
{
var result = new string(value);
fixed (char* p = result)
if(p[0] >= 'a' && p[0] <= 'z') *p += (char)32;
return result;
}
}
目前我知道的将字符串首字母大写的最快方法如下:
var array = str.ToCharArray();
array[0] = char.ToUpper(array[0]);
return new string(array);
这涉及 2 个数组分配:char.ToUpper(array[0])
和复制到 new string(array)
中的字符串内部缓冲区。
既然我知道 array
不会逃避这个方法,有没有办法使用不安全的代码来避免第二次分配?
感谢@SomeBody 为我指明了正确的方向:
public static string ToUpperFirstLetter(string str)
{
if (string.IsNullOrEmpty(str))
throw new ArgumentException(str);
return string.Create(str.Length, str, (span, str) =>
{
str.AsSpan().CopyTo(span);
span[0] = char.ToUpper(span[0]);
});
}
既然您要求的是“最快”方式,那么让我们对一些事情进行基准测试。毕竟我们作为程序员喜欢使用经验证据*令人信服地点头*
老派
public string OldSchool(string value)
=> char.ToUpper(value[0]) + value[1..];
跨度
public string TestSpan(string value)
=> string.Create(value.Length, value, (span, str) =>
{
str.AsSpan().CopyTo(span);
span[0] = char.ToUpper(span[0]);
});
不安全
public unsafe string TestUnsafe(string value)
{
var result = new string(value);
fixed (char* p = result) p[0] = char.ToUpper(p[0]);
return result;
}
仅不安全的 Ascii
public unsafe string TestUnsafeAscii(string value)
{
var result = new string(value);
fixed (char* p = result)
if(p[0] >= 'a' && p[0] <= 'z') *p += (char)32;
return result;
}
超级不安全
注意:这是给超级勇者的,字符串不转位,变异不是问题
public unsafe string SuperUnsafe(string value)
{
fixed (char* p = value) p[0] = char.ToUpper(p[0]);
return value;
}
基准
配置
BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.804 (2004/?/20H1)
AMD Ryzen 9 3900X, 1 CPU, 24 logical and 12 physical cores
.NET Core SDK=5.0.201
[Host] : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT
.NET Core 5.0 : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT
Job=.NET Core 5.0 Runtime=.NET Core 5.0
结果
Method | N | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
---|---|---|---|---|---|---|---|---|---|
OldSchool | 5 | 44.89 ns | 0.389 ns | 0.364 ns | 1.00 | 0.0105 | - | - | 88 B |
Span | 5 | 26.37 ns | 0.170 ns | 0.159 ns | 1.00 | 0.0038 | - | - | 32 B |
Unsafe | 5 | 25.15 ns | 0.128 ns | 0.119 ns | 1.00 | 0.0038 | - | - | 32 B |
UnsafeAscii | 5 | 11.92 ns | 0.093 ns | 0.073 ns | 1.00 | 0.0038 | - | - | 32 B |
SuperUnsafe | 5 | 10.22 ns | 0.051 ns | 0.045 ns | 1.00 | - | - | - | - |
Method | N | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
---|---|---|---|---|---|---|---|---|---|
OldSchool | 10 | 49.31 ns | 0.595 ns | 0.527 ns | 1.00 | 0.0134 | - | - | 112 B |
Span | 10 | 27.71 ns | 0.548 ns | 0.512 ns | 1.00 | 0.0057 | - | - | 48 B |
Unsafe | 10 | 26.76 ns | 0.142 ns | 0.126 ns | 1.00 | 0.0057 | - | - | 48 B |
UnsafeAscii | 10 | 13.40 ns | 0.103 ns | 0.096 ns | 1.00 | 0.0057 | - | - | 48 B |
SuperUnsafe | 10 | 10.28 ns | 0.106 ns | 0.094 ns | 1.00 | - | - | - | - |
Method | N | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
---|---|---|---|---|---|---|---|---|---|
OldSchool | 100 | 83.52 ns | 0.966 ns | 0.903 ns | 1.00 | 0.0564 | - | - | 472 B |
Span | 100 | 45.77 ns | 0.441 ns | 0.412 ns | 1.00 | 0.0268 | - | - | 224 B |
Unsafe | 100 | 44.07 ns | 0.511 ns | 0.453 ns | 1.00 | 0.0268 | - | - | 224 B |
UnsafeAscii | 100 | 31.45 ns | 0.382 ns | 0.357 ns | 1.00 | 0.0268 | - | - | 224 B |
SuperUnsafe | 100 | 10.26 ns | 0.078 ns | 0.073 ns | 1.00 | - | - | - | - |
Method | N | Mean | Error | StdDev | Ratio | Gen 0 | Gen 1 | Gen 2 | Allocated |
---|---|---|---|---|---|---|---|---|---|
OldSchool | 1000 | 512.05 ns | 2.909 ns | 2.578 ns | 1.00 | 0.4864 | 0.0052 | - | 4072 B |
Span | 1000 | 260.35 ns | 2.593 ns | 2.425 ns | 1.00 | 0.2418 | 0.0017 | - | 2024 B |
Unsafe | 1000 | 255.06 ns | 1.587 ns | 1.407 ns | 1.00 | 0.2418 | 0.0017 | - | 2024 B |
UnsafeAscii | 1000 | 247.60 ns | 2.500 ns | 2.338 ns | 1.00 | 0.2418 | 0.0017 | - | 2024 B |
SuperUnsafe | 1000 | 10.21 ns | 0.060 ns | 0.056 ns | 1.00 | - | - | - | - |
完整测试代码
public class Test
{
private string _data;
private static readonly Random random = new Random(42);
public static string RandomString(int length)
{
const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
return new string(Enumerable.Repeat(chars, length)
.Select(s => s[random.Next(s.Length)]).ToArray());
}
[Params(5, 10, 100, 1000)] public int N;
[GlobalSetup]
public void Setup()
{
_data = RandomString(N);
}
[Benchmark]
public string OldSchool() => OldSchool(_data);
public string OldSchool(string value)
=> char.ToUpper(value[0]) + value[1..];
[Benchmark]
public string Span() => TestSpan(_data);
public string TestSpan(string value)
=> string.Create(value.Length, value, (span, str) =>
{
str.AsSpan().CopyTo(span);
span[0] = char.ToUpper(span[0]);
});
[Benchmark]
public string Unsafe() => TestUnsafe(_data);
public unsafe string TestUnsafe(string value)
{
var result = new string(value);
fixed (char* p = result) p[0] = char.ToUpper(p[0]);
return result;
}
[Benchmark]
public unsafe string SuperUnsafe() => SuperUnsafe(_data);
public unsafe string SuperUnsafe(string value)
{
fixed (char* p = value) p[0] = char.ToUpper(p[0]);
return value;
}
[Benchmark]
public string UnsafeAscii() => TestUnsafeAscii(_data);
public unsafe string TestUnsafeAscii(string value)
{
var result = new string(value);
fixed (char* p = result)
if(p[0] >= 'a' && p[0] <= 'z') *p += (char)32;
return result;
}
}