提高将字节转换为 UInt32 的性能
Improving performance converting bytes into UInt32
我正在处理处理 2GB 数据的源代码,这些数据代表 60 秒的网络流量。总处理时间约为 40 秒。我正在尝试尽可能优化我的代码以提高性能,以尝试将总处理时间控制在 30 秒以下。
我目前在 dotTrace 中的分析表明,在我的代码进行的 330 万次调用中,有 7.62% 的时间花费在我的 Timestamp 结构的构造函数中。
具体来说,我正在努力改进以下两个陈述:
TimestampHigh = BitConverter.ToUInt32(timestampBytes, 0);
TimestampLow = BitConverter.ToUInt32(timestampBytes, 4);
这是完整的结构:
public readonly struct Timestamp
{
public uint TimestampHigh { get; }
public uint TimestampLow { get; }
public uint Seconds { get; }
public uint Microseconds { get; }
public DateTime LocalTime => new DateTime(EpochTicks + _ticks, DateTimeKind.Utc).ToLocalTime();
private const ulong MicrosecondsPerSecond = 1000000UL;
private const ulong HighFactor = 4294967296UL;
private readonly ulong _timestamp;
private const long EpochTicks = 621355968000000000L;
private const long TicksPerMicrosecond = 10L;
private readonly long _ticks;
public Timestamp(byte[] timestampBytes, bool reverseByteOrder)
{
if (timestampBytes == null)
throw new ArgumentNullException($"{nameof(timestampBytes)} cannot be null.");
if (timestampBytes.Length != 8)
throw new ArgumentException($"{nameof(timestampBytes)} must have a length of 8.");
TimestampHigh = BitConverter.ToUInt32(timestampBytes, 0).ReverseByteOrder(reverseByteOrder);
TimestampLow = BitConverter.ToUInt32(timestampBytes, 4).ReverseByteOrder(reverseByteOrder);
_timestamp = ((ulong)TimestampHigh * HighFactor) + (ulong)TimestampLow;
_ticks = (long)_timestamp * TicksPerMicrosecond;
Seconds = (uint)(_timestamp / MicrosecondsPerSecond);
Microseconds = (uint)(_timestamp % MicrosecondsPerSecond);
}
public Timestamp(uint seconds, uint microseconds)
{
Seconds = seconds;
Microseconds = microseconds;
_timestamp = seconds * MicrosecondsPerSecond + microseconds;
_ticks = (long)_timestamp * TicksPerMicrosecond;
TimestampHigh = (uint)(_timestamp / HighFactor);
TimestampLow = (uint)(_timestamp % HighFactor);
}
public byte[] ConvertToBytes(bool reverseByteOrder)
{
List<byte> bytes = new List<byte>();
bytes.AddRange(BitConverter.GetBytes(TimestampHigh.ReverseByteOrder(reverseByteOrder)));
bytes.AddRange(BitConverter.GetBytes(TimestampLow.ReverseByteOrder(reverseByteOrder)));
return bytes.ToArray();
}
public bool Equals(Timestamp other)
{
return TimestampLow == other.TimestampLow && TimestampHigh == other.TimestampHigh;
}
public static bool operator ==(Timestamp left, Timestamp right)
{
return left.Equals(right);
}
public static bool operator !=(Timestamp left, Timestamp right)
{
return !left.Equals(right);
}
public override bool Equals(object obj)
{
return obj is Timestamp other && Equals(other);
}
public override int GetHashCode()
{
return _timestamp.GetHashCode();
}
}
根据 dotTrace,方法 ReverseByteOrder 似乎不会产生太大的性能损失,因为它代表不到 0.5% 的时间,但这里仅供参考:
public static UInt32 ReverseByteOrder(this UInt32 value, bool reverseByteOrder)
{
if (!reverseByteOrder)
{
return value;
}
else
{
byte[] bytes = BitConverter.GetBytes(value);
Array.Reverse(bytes);
return BitConverter.ToUInt32(bytes, 0);
}
}
看来您正在做很多工作来对抗字节序。那就是 BitConverter
falls on it's face, honestly. The good news is that in modern runtimes we have BinaryPrimitives
, which has endian aware operations that are then JIT-optimized. Meaning: it is written with a check on CPU-endianness, but that check gets removed during JIT, with just the CPU-relevant code being retained. So: avoid BitConverter
的地方。 需要对您的代码进行一些修改,因为 reverseByteOrder
不再是 输入 ,但请考虑:
(注意:您可以将 byte[]
作为 Span<byte>
/ReadOnlySpan<byte>
传递 - 它是隐式的)
public Timestamp(ReadOnlySpan<byte> timestampBytes)
{
static void ThrowInvalidLength() // can help inlining in some useful cases
=> throw new ArgumentException($"{nameof(timestampBytes)} must have a length of 8.");
if (timestampBytes.Length != 8) ThrowInvalidLength();
TimestampHigh = BinaryPrimitives.ReadUInt32BigEndian(timestampBytes);
TimestampLow = BinaryPrimitives.ReadUInt32BigEndian(timestampBytes.Slice(4));
// ...
}
和
public void ConvertToBytes(Span<byte> destination)
{
BinaryPrimitives.WriteUInt32BigEndian(destination, TimestampHigh);
BinaryPrimitives.WriteUInt32BigEndian(destination.Slice(4), TimestampLow);
}
我正在处理处理 2GB 数据的源代码,这些数据代表 60 秒的网络流量。总处理时间约为 40 秒。我正在尝试尽可能优化我的代码以提高性能,以尝试将总处理时间控制在 30 秒以下。
我目前在 dotTrace 中的分析表明,在我的代码进行的 330 万次调用中,有 7.62% 的时间花费在我的 Timestamp 结构的构造函数中。
具体来说,我正在努力改进以下两个陈述:
TimestampHigh = BitConverter.ToUInt32(timestampBytes, 0);
TimestampLow = BitConverter.ToUInt32(timestampBytes, 4);
这是完整的结构:
public readonly struct Timestamp
{
public uint TimestampHigh { get; }
public uint TimestampLow { get; }
public uint Seconds { get; }
public uint Microseconds { get; }
public DateTime LocalTime => new DateTime(EpochTicks + _ticks, DateTimeKind.Utc).ToLocalTime();
private const ulong MicrosecondsPerSecond = 1000000UL;
private const ulong HighFactor = 4294967296UL;
private readonly ulong _timestamp;
private const long EpochTicks = 621355968000000000L;
private const long TicksPerMicrosecond = 10L;
private readonly long _ticks;
public Timestamp(byte[] timestampBytes, bool reverseByteOrder)
{
if (timestampBytes == null)
throw new ArgumentNullException($"{nameof(timestampBytes)} cannot be null.");
if (timestampBytes.Length != 8)
throw new ArgumentException($"{nameof(timestampBytes)} must have a length of 8.");
TimestampHigh = BitConverter.ToUInt32(timestampBytes, 0).ReverseByteOrder(reverseByteOrder);
TimestampLow = BitConverter.ToUInt32(timestampBytes, 4).ReverseByteOrder(reverseByteOrder);
_timestamp = ((ulong)TimestampHigh * HighFactor) + (ulong)TimestampLow;
_ticks = (long)_timestamp * TicksPerMicrosecond;
Seconds = (uint)(_timestamp / MicrosecondsPerSecond);
Microseconds = (uint)(_timestamp % MicrosecondsPerSecond);
}
public Timestamp(uint seconds, uint microseconds)
{
Seconds = seconds;
Microseconds = microseconds;
_timestamp = seconds * MicrosecondsPerSecond + microseconds;
_ticks = (long)_timestamp * TicksPerMicrosecond;
TimestampHigh = (uint)(_timestamp / HighFactor);
TimestampLow = (uint)(_timestamp % HighFactor);
}
public byte[] ConvertToBytes(bool reverseByteOrder)
{
List<byte> bytes = new List<byte>();
bytes.AddRange(BitConverter.GetBytes(TimestampHigh.ReverseByteOrder(reverseByteOrder)));
bytes.AddRange(BitConverter.GetBytes(TimestampLow.ReverseByteOrder(reverseByteOrder)));
return bytes.ToArray();
}
public bool Equals(Timestamp other)
{
return TimestampLow == other.TimestampLow && TimestampHigh == other.TimestampHigh;
}
public static bool operator ==(Timestamp left, Timestamp right)
{
return left.Equals(right);
}
public static bool operator !=(Timestamp left, Timestamp right)
{
return !left.Equals(right);
}
public override bool Equals(object obj)
{
return obj is Timestamp other && Equals(other);
}
public override int GetHashCode()
{
return _timestamp.GetHashCode();
}
}
根据 dotTrace,方法 ReverseByteOrder 似乎不会产生太大的性能损失,因为它代表不到 0.5% 的时间,但这里仅供参考:
public static UInt32 ReverseByteOrder(this UInt32 value, bool reverseByteOrder)
{
if (!reverseByteOrder)
{
return value;
}
else
{
byte[] bytes = BitConverter.GetBytes(value);
Array.Reverse(bytes);
return BitConverter.ToUInt32(bytes, 0);
}
}
看来您正在做很多工作来对抗字节序。那就是 BitConverter
falls on it's face, honestly. The good news is that in modern runtimes we have BinaryPrimitives
, which has endian aware operations that are then JIT-optimized. Meaning: it is written with a check on CPU-endianness, but that check gets removed during JIT, with just the CPU-relevant code being retained. So: avoid BitConverter
的地方。 需要对您的代码进行一些修改,因为 reverseByteOrder
不再是 输入 ,但请考虑:
(注意:您可以将 byte[]
作为 Span<byte>
/ReadOnlySpan<byte>
传递 - 它是隐式的)
public Timestamp(ReadOnlySpan<byte> timestampBytes)
{
static void ThrowInvalidLength() // can help inlining in some useful cases
=> throw new ArgumentException($"{nameof(timestampBytes)} must have a length of 8.");
if (timestampBytes.Length != 8) ThrowInvalidLength();
TimestampHigh = BinaryPrimitives.ReadUInt32BigEndian(timestampBytes);
TimestampLow = BinaryPrimitives.ReadUInt32BigEndian(timestampBytes.Slice(4));
// ...
}
和
public void ConvertToBytes(Span<byte> destination)
{
BinaryPrimitives.WriteUInt32BigEndian(destination, TimestampHigh);
BinaryPrimitives.WriteUInt32BigEndian(destination.Slice(4), TimestampLow);
}