为什么 JSON 反序列化 System.Text.Json 这么慢?

Why is JSON deserialisation with System.Text.Json so slow?

我有一个用 C# 和 Go 编写的反序列化 json 100,000 次相同的最小项目。性能差异很大。虽然很高兴知道使用 Go 可以实现性能目标,但我更愿意在 C# 中实现类似的结果。鉴于 C# 慢了 193 倍,我认为错误在我这边,但我不明白为什么。

性能

$ dotnet run .
real    1m37.555s
user    1m39.552s
sys     0m0.729s

$ ./jsonperf
real    0m0.478s
user    0m0.500s
sys     0m0.011s

源代码 C#

using System;

namespace jsonperf
{
    class Program
    {
        static void Main(string[] args)
        {
            var json = "{\"e\":\"trade\",\"E\":1633046399882,\"s\":\"BTCBUSD\",\"t\":243216662,\"p\":\"43818.22000000\",\"q\":\"0.00452000\",\"b\":3422298876,\"a\":3422298789,\"T\":1633046399882,\"m\":false,\"M\":true}";

            for (int i = 0; i < 100000; i++)
            {
                if (0 == i % 1000)
                {
                    Console.WriteLine($"Completed: {i}");
                }

                var obj = BinanceTradeUpdate.FromJson(json);
            }

            Console.WriteLine("Done");
        }
    }
}

using System;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace jsonperf
{
    public class BinanceTradeUpdate
    {
        [JsonPropertyName("e")]
        public string EventType
        {
            get;
            set;
        }

        [JsonPropertyName("E")]
        public long EventUnixTimestamp
        {
            get;
            set;
        }

        [JsonIgnore]
        public DateTime EventTime
        {
            get
            {
                return new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddMilliseconds(EventUnixTimestamp);
            }
        }

        [JsonPropertyName("s")]
        public string MarketSymbol
        {
            get;
            set;
        }

        [JsonPropertyName("t")]
        public long TradeId
        {
            get;
            set;
        }

        [JsonPropertyName("p")]
        public double Price
        {
            get;
            set;
        }

        [JsonPropertyName("q")]

        public double Quantity
        {
            get;
            set;
        }

        [JsonPropertyName("b")]
        public long BuyerOrderId
        {
            get;
            set;
        }

        [JsonPropertyName("a")]
        public long SellerOrderId
        {
            get;
            set;
        }

        [JsonPropertyName("T")]
        public long TradeUnixTimestamp
        {
            get;
            set;
        }

        [JsonIgnore]
        public DateTime TradeTime
        {
            get
            {
                return new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddMilliseconds(TradeUnixTimestamp);
            }
        }

        [JsonPropertyName("m")]
        public bool BuyerIsMarketMaker
        {
            get;
            set;
        }

        [JsonPropertyName("M")]
        public bool UndocumentedFlag
        {
            get;
            set;
        }

        public static BinanceTradeUpdate FromJson(string json)
        {
            return JsonSerializer.Deserialize<BinanceTradeUpdate>(
                json,
                new JsonSerializerOptions()
                {
                    NumberHandling = JsonNumberHandling.AllowReadingFromString
                });
        }
    }
}

源码Go

package main

import (
  "encoding/csv"
  "encoding/json"
  "fmt"
  "os"
  "strconv"
)

type Float64Str float64

func (f *Float64Str) UnmarshalJSON(b []byte) error {
  var s string

  // Try to unmarshal string first
  if err := json.Unmarshal(b, &s); err == nil {

    value, err := strconv.ParseFloat(s, 64)

    if err != nil {
      return err
    }

    *f = Float64Str(value)
    return nil
  }

  // If unsuccessful, unmarshal as float64
  return json.Unmarshal(b, (*float64)(f))
}

// Trade represents an exchange of assets in a given market
type Trade struct {
  EventType     string     json:"e"
  EventTime     int64      json:"E"
  MarketSymbol  string     json:"s"
  TradeID       int64      json:"t"
  Price         Float64Str json:"p"
  Quantity      Float64Str json:"q"
  BuyerOrderID  int64      json:"b"
  SellerOrderID int64      json:"a"
  TradeTime     int64      json:"T"
  IsBuyerMaker  bool       json:"m"
  Flag          bool       json:"M"
}

func main() {

  jsonString := "{\"e\":\"trade\",\"E\":1633046399882,\"s\":\"BTCBUSD\",\"t\":243216662,\"p\":\"43818.22000000\",\"q\":\"0.00452000\",\"b\":3422298876,\"a\":3422298789,\"T\":1633046399882,\"m\":false,\"M\":true}"

  // open stdout
  var stdwrite = csv.NewWriter(os.Stdout)

  // convert string several times into obj
  var trade = Trade{}
  counter := 0

  for i := 0; i < 100000; i++ {
    if err := json.Unmarshal([]byte(jsonString), &trade); err != nil {
      stdwrite.Flush()
      panic(err)
    } else {
      counter++

      if counter%1000 == 0 {
        fmt.Printf("%d elements read\n", counter)
      }
    }
  }
}

这需要这么长时间的原因是你每次都在初始化一个新的 JsonSerializerOptions 对象。

初始化序列化器一次,你会看到巨大的性能提升(对我来说是 70%+)。