不安全 直接从数组创建字符串

Unsafe Create string directly from Array

目前我知道的将字符串首字母大写的最快方法如下:

var array = str.ToCharArray();
array[0] = char.ToUpper(array[0]);
return new string(array);

这涉及 2 个数组分配:char.ToUpper(array[0]) 和复制到 new string(array) 中的字符串内部缓冲区。

既然我知道 array 不会逃避这个方法,有没有办法使用不安全的代码来避免第二次分配?

感谢@SomeBody 为我指明了正确的方向:

public static string ToUpperFirstLetter(string str)
{
    if (string.IsNullOrEmpty(str))
        throw new ArgumentException(str);

    return string.Create(str.Length, str, (span, str) =>
    {
        str.AsSpan().CopyTo(span);
        span[0] = char.ToUpper(span[0]);
    });
}

既然您要求的是“最快”方式,那么让我们对一些事情进行基准测试。毕竟我们作为程序员喜欢使用经验证据*令人信服地点头*

老派

public string OldSchool(string value) 
   => char.ToUpper(value[0]) + value[1..];

跨度

public string TestSpan(string value)
   => string.Create(value.Length, value, (span, str) =>
   {
      str.AsSpan().CopyTo(span);
      span[0] = char.ToUpper(span[0]);
   });

不安全

public unsafe string TestUnsafe(string value)
{
   var result = new string(value);
   fixed (char* p = result) p[0] = char.ToUpper(p[0]);
   return result;
}

仅不安全的 Ascii

public unsafe string TestUnsafeAscii(string value)
{
   var result = new string(value);
   fixed (char* p = result) 
      if(p[0] >= 'a' && p[0] <= 'z') *p += (char)32;
   return result;
}

超级不安全

注意:这是给超级勇者的,字符串不转位,变异不是问题

public unsafe string SuperUnsafe(string value)
{
   fixed (char* p = value) p[0] = char.ToUpper(p[0]);
   return value;
}

基准

配置

BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19041.804 (2004/?/20H1)
AMD Ryzen 9 3900X, 1 CPU, 24 logical and 12 physical cores
.NET Core SDK=5.0.201
  [Host]        : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT
  .NET Core 5.0 : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT

Job=.NET Core 5.0  Runtime=.NET Core 5.0

结果

Method N Mean Error StdDev Ratio Gen 0 Gen 1 Gen 2 Allocated
OldSchool 5 44.89 ns 0.389 ns 0.364 ns 1.00 0.0105 - - 88 B
Span 5 26.37 ns 0.170 ns 0.159 ns 1.00 0.0038 - - 32 B
Unsafe 5 25.15 ns 0.128 ns 0.119 ns 1.00 0.0038 - - 32 B
UnsafeAscii 5 11.92 ns 0.093 ns 0.073 ns 1.00 0.0038 - - 32 B
SuperUnsafe 5 10.22 ns 0.051 ns 0.045 ns 1.00 - - - -
Method N Mean Error StdDev Ratio Gen 0 Gen 1 Gen 2 Allocated
OldSchool 10 49.31 ns 0.595 ns 0.527 ns 1.00 0.0134 - - 112 B
Span 10 27.71 ns 0.548 ns 0.512 ns 1.00 0.0057 - - 48 B
Unsafe 10 26.76 ns 0.142 ns 0.126 ns 1.00 0.0057 - - 48 B
UnsafeAscii 10 13.40 ns 0.103 ns 0.096 ns 1.00 0.0057 - - 48 B
SuperUnsafe 10 10.28 ns 0.106 ns 0.094 ns 1.00 - - - -
Method N Mean Error StdDev Ratio Gen 0 Gen 1 Gen 2 Allocated
OldSchool 100 83.52 ns 0.966 ns 0.903 ns 1.00 0.0564 - - 472 B
Span 100 45.77 ns 0.441 ns 0.412 ns 1.00 0.0268 - - 224 B
Unsafe 100 44.07 ns 0.511 ns 0.453 ns 1.00 0.0268 - - 224 B
UnsafeAscii 100 31.45 ns 0.382 ns 0.357 ns 1.00 0.0268 - - 224 B
SuperUnsafe 100 10.26 ns 0.078 ns 0.073 ns 1.00 - - - -
Method N Mean Error StdDev Ratio Gen 0 Gen 1 Gen 2 Allocated
OldSchool 1000 512.05 ns 2.909 ns 2.578 ns 1.00 0.4864 0.0052 - 4072 B
Span 1000 260.35 ns 2.593 ns 2.425 ns 1.00 0.2418 0.0017 - 2024 B
Unsafe 1000 255.06 ns 1.587 ns 1.407 ns 1.00 0.2418 0.0017 - 2024 B
UnsafeAscii 1000 247.60 ns 2.500 ns 2.338 ns 1.00 0.2418 0.0017 - 2024 B
SuperUnsafe 1000 10.21 ns 0.060 ns 0.056 ns 1.00 - - - -

完整测试代码

public class Test
{

   private string _data;

   private static readonly Random random = new Random(42);

   public static string RandomString(int length)
   {
      const string chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
      return new string(Enumerable.Repeat(chars, length)
         .Select(s => s[random.Next(s.Length)]).ToArray());
   }

   [Params(5, 10, 100, 1000)] public int N;

   [GlobalSetup]
   public void Setup()
   {
      _data = RandomString(N);
   }

   [Benchmark]
   public string OldSchool() => OldSchool(_data);

   public string OldSchool(string value)
      => char.ToUpper(value[0]) + value[1..];

   [Benchmark]
   public string Span() => TestSpan(_data);

   public string TestSpan(string value)
      => string.Create(value.Length, value, (span, str) =>
      {
         str.AsSpan().CopyTo(span);
         span[0] = char.ToUpper(span[0]);
      });

   [Benchmark]
   public string Unsafe() => TestUnsafe(_data);

   public unsafe string TestUnsafe(string value)
   {
      var result = new string(value);
      fixed (char* p = result) p[0] = char.ToUpper(p[0]);
      return result;
   }

   [Benchmark]
   public unsafe string SuperUnsafe() => SuperUnsafe(_data);

   public unsafe string SuperUnsafe(string value)
   {
      fixed (char* p = value) p[0] = char.ToUpper(p[0]);
      return value;
   }

   [Benchmark]
   public string UnsafeAscii() => TestUnsafeAscii(_data);

   public unsafe string TestUnsafeAscii(string value)
   {
      var result = new string(value);
      fixed (char* p = result) 
         if(p[0] >= 'a' && p[0] <= 'z') *p += (char)32;
      return result;
   }
}