如何提高数据转换 to/from 二进制的 Dart 性能?
How to improve Dart performance of data conversion to/from binary?
为一家更大的德国公司 Future Technologies Group 做一些咨询工作,我已经将大约 6000 行 Java 服务器端软件移植到 Dart。这应该有助于回答 Dart 是否可以在服务器上高效使用的问题。 (由于寻找一种语言用于客户端和服务器端编程的优势,这本身会为 Dart 开绿灯。)
学习 Dart(我真的很喜欢用它工作)让我期望相对于 Java 有 30-50% 的性能损失,但在任何情况下都不差于 100%(慢两倍)这是上述决策过程的截止点。
移植很顺利。我学到了很多。单元测试很好。但结果证明性能非常糟糕......与 Java 程序相比,整体速度慢了七倍。
分析代码揭示了两个罪魁祸首:数据转换和文件 I/O。也许我做错了什么?在我回到我的客户那里并且他们取消了他们的 Dart 研究之前,我想搜索一些关于如何改进的建议。让我们从数据转换开始,将 Dart 原生数据类型转换成各种二进制格式,可用于数据的有效传输和存储。
通常这些转换很简单而且非常快,因为没有什么真正需要从使用的内部格式转换,但大部分都存储在缓冲区中。我创建了一个基准程序,它以某种方式反映了我程序中这些转换的典型用法:
import 'dart:typed_data';
import 'package:benchmark_harness/benchmark_harness.dart';
// Create a new benchmark by extending BenchmarkBase
class ConversionBenchmark extends BenchmarkBase {
Uint8List result;
ConversionBenchmark() : super("Conversion");
// The benchmark code.
void run() {
const int BufSize = 262144; // 256kBytes
const int SetSize = 64; // one "typical" set of data, gets repeated
ByteData buffer = new ByteData(BufSize);
double doubleContent = 0.0; // used to simulate double content
int intContent = 0; // used to simulate int content
int offset = 0;
for (int j = 0; j < buffer.lengthInBytes / SetSize; j++) {
// The following represents some "typical" conversion mix:
buffer.setFloat64(offset, doubleContent); offset += 8; doubleContent += 0.123;
for (int k = 0; k < 8; k++) { // main use case
buffer.setFloat32(offset, doubleContent); offset += 4; doubleContent += 0.123;
}
buffer.setInt32(offset, intContent); offset += 4; intContent++;
buffer.setInt32(offset, intContent); offset += 4; intContent++;
buffer.setInt16(offset, intContent); offset += 2; intContent++;
buffer.setInt16(offset, intContent); offset += 2; intContent++;
buffer.setInt8(offset, intContent); offset += 1; intContent++;
buffer.setInt8(offset, intContent); offset += 1; intContent++;
buffer.buffer.asUint8List(offset).setAll(0, "AsciiStrng".codeUnits); offset += 10;
// [ByteData] knows no other mechanism to transfer ASCII strings in
assert((offset % SetSize) == 0); // ensure the example content fits [SetSize] bytes
}
result = buffer.buffer.asUint8List(); // only this can be used for further processing
}
}
main() {
new ConversionBenchmark().report();
}
它基于来自 https://github.com/dart-lang/benchmark_harness. For comparisions I used the following Java program based on a port of the Dart benchmark harness from https://github.com/bono8106/benchmark_harness_java 的基准线束:
package ylib.tools;
import java.nio.ByteBuffer;
public class ConversionBenchmark extends BenchmarkBase {
public ByteBuffer result;
public ConversionBenchmark() { super("Conversion"); }
// The benchmark code.
@Override protected void run() {
final int BufSize = 262144; // 256kBytes
final int SetSize = 64; // one "typical" set of data, gets repeated
ByteBuffer buffer = ByteBuffer.allocate(BufSize);
double doubleContent = 0.0; // used to simulate double content
int intContent = 0; // used to simulate int content
for (int j = 0; j < (buffer.capacity() / SetSize); j++) {
// The following represents some "typical" conversion mix:
buffer.putDouble(doubleContent); doubleContent += 0.123;
for (int k = 0; k < 8; k++) { // main use case
buffer.putFloat((float)doubleContent); doubleContent += 0.123;
}
buffer.putInt(intContent); intContent++;
buffer.putInt(intContent); intContent++;
buffer.putShort((short)intContent); intContent++;
buffer.putShort((short)intContent); intContent++;
buffer.put((byte)intContent); intContent++;
buffer.put((byte)intContent); intContent++;
buffer.put("AsciiStrng".getBytes());
//assert((buffer.position() % SetSize) == 0); // ensure the example content fits [SetSize] bytes
}
buffer.flip(); // needed for further processing
result = buffer; // to avoid the compiler optimizing away everything
}
public static void main(String[] args) {
new ConversionBenchmark().report();
}
}
Java 代码 运行 几乎比我的 Intel Windows 7 机器上的 Dart 代码快 10 倍。 运行 都在各自的 VM 上处于生产模式。
代码中是否存在明显错误?或者是否有不同的 Dart 类 可用于完成这项工作?关于为什么 Dart 在这些简单的转换中慢得多的解释?还是我对 Dart VM 性能的期望完全错误?
与直接类型化数组访问相比,字节数据方法(ByteData.setXYZ
和 ByteData.getXYZ
)在 Dart VM 上的性能确实很差。我们开始着手解决这个问题,初步结果令人鼓舞[1]。
与此同时,您可以通过使用类型化数组(完整代码位于 [2])将您自己的转换滚动到大端来解决这个不幸的性能回归问题:
/// Writer wraps a fixed size Uint8List and writes values into it using
/// big-endian byte order.
class Writer {
/// Output buffer.
final Uint8List out;
/// Current position within [out].
var position = 0;
Writer._create(this.out);
factory Writer(size) {
final out = new Uint8List(size);
if (Endianness.HOST_ENDIAN == Endianness.LITTLE_ENDIAN) {
return new _WriterForLEHost._create(out);
} else {
return new _WriterForBEHost._create(out);
}
}
writeFloat64(double v);
}
/// Lists used for data convertion (alias each other).
final Uint8List _convU8 = new Uint8List(8);
final Float32List _convF32 = new Float32List.view(_convU8.buffer);
final Float64List _convF64 = new Float64List.view(_convU8.buffer);
/// Writer used on little-endian host.
class _WriterForLEHost extends Writer {
_WriterForLEHost._create(out) : super._create(out);
writeFloat64(double v) {
_convF64[0] = v;
out[position + 7] = _convU8[0];
out[position + 6] = _convU8[1];
out[position + 5] = _convU8[2];
out[position + 4] = _convU8[3];
out[position + 3] = _convU8[4];
out[position + 2] = _convU8[5];
out[position + 1] = _convU8[6];
out[position + 0] = _convU8[7];
position += 8;
}
}
在您的测试中对这种手动转换进行基准测试可获得大约 6 倍的改进:
import 'dart:typed_data';
import 'package:benchmark_harness/benchmark_harness.dart';
import 'writer.dart';
class ConversionBenchmarkManual extends BenchmarkBase {
Uint8List result;
ConversionBenchmarkManual() : super("Conversion (MANUAL)");
// The benchmark code.
void run() {
const int BufSize = 262144; // 256kBytes
const int SetSize = 64; // one "typical" set of data, gets repeated
final w = new Writer(BufSize);
double doubleContent = 0.0; // used to simulate double content
int intContent = 0; // used to simulate int content
int offset = 0;
for (int j = 0; j < (BufSize / SetSize); j++) {
// The following represents some "typical" conversion mix:
w.writeFloat64(doubleContent); doubleContent += 0.123;
for (int k = 0; k < 8; k++) { // main use case
w.writeFloat32(doubleContent); doubleContent += 0.123;
}
w.writeInt32(intContent); intContent++;
w.writeInt32(intContent); intContent++;
w.writeInt16(intContent); intContent++;
w.writeInt16(intContent); intContent++;
w.writeInt8(intContent); intContent++;
w.writeInt8(intContent); intContent++;
w.writeString("AsciiStrng");
assert((offset % SetSize) == 0); // ensure the example content fits [SetSize] bytes
}
result = w.out; // only this can be used for further processing
}
}
我想补充一些关于我最终如何解决性能问题以及结果如何的细节。
首先,我使用了 Vyacheslav Egorov 的 postet 方法,并从中开发了我自己的数据转换器 class,它提供双向转换。它仍然不是生产代码,但它对我的服务器软件端口运行良好,因此我将其附加在下面。我故意将 [buffer] 保留为 public 变量。这可能无法实现完美的封装,但可以轻松直接写入缓冲区和从缓冲区读取,例如通过 [RandomAccessFile.readInto] 和 [RandomAccessFile.writeFrom]。一切简单高效!
事实证明,这些数据转换是初始性能缓慢的罪魁祸首,比 Java 版本慢七倍。随着变化,性能差距大大缩小。 6000 行服务器应用程序的 Dart 版本现在仅落后 Java 版本约 30%。比我对具有如此灵活的打字概念的语言的预期要好。这将使 Dart 在我的客户未来的技术决策中处于有利地位。
在我看来,为客户端和服务器应用程序使用一种语言可能是 Dart 的一个很好的论据。
下面是用于该项目的数据转换器的代码:
part of ylib;
/// [DataConverter] wraps a fixed size [Uint8List] and converts values from and into it
/// using big-endian byte order.
///
abstract class DataConverter {
/// Buffer.
final Uint8List buffer;
/// Current position within [buffer].
int _position = 0;
DataConverter._create(this.buffer);
/// Creates the converter with its associated [buffer].
///
factory DataConverter(size) {
final out = new Uint8List(size);
if (Endianness.HOST_ENDIAN == Endianness.LITTLE_ENDIAN) {
return new _ConverterForLEHost._create(out);
} else {
return new _ConverterForBEHost._create(out);
}
}
int get length => buffer.length;
int get position => _position;
set position(int position) {
if ((position < 0) || (position > buffer.lengthInBytes)) throw new ArgumentError(position);
_position = position;
}
double getFloat64();
putFloat64(double v);
double getFloat32();
putFloat32(double v);
static const int _MaxSignedInt64plus1 = 9223372036854775808;
static const int _MaxSignedInt32plus1 = 2147483648;
static const int _MaxSignedInt16plus1 = 32768;
static const int _MaxSignedInt8plus1 = 128;
int getInt64() {
int v =
buffer[_position + 7] | (buffer[_position + 6] << 8) | (buffer[_position + 5] << 16) |
(buffer[_position + 4] << 24) | (buffer[_position + 3] << 32) |
(buffer[_position + 2] << 40) | (buffer[_position + 1] << 48) | (buffer[_position] << 56);
_position += 8;
if (v >= _MaxSignedInt64plus1) v -= 2 * _MaxSignedInt64plus1;
return v;
}
putInt64(int v) {
assert((v < _MaxSignedInt64plus1) && (v >= -_MaxSignedInt64plus1));
buffer[_position + 7] = v;
buffer[_position + 6] = (v >> 8);
buffer[_position + 5] = (v >> 16);
buffer[_position + 4] = (v >> 24);
buffer[_position + 3] = (v >> 32);
buffer[_position + 2] = (v >> 40);
buffer[_position + 1] = (v >> 48);
buffer[_position + 0] = (v >> 56);
_position += 8;
}
int getInt32() {
int v = buffer[_position + 3] | (buffer[_position + 2] << 8) | (buffer[_position + 1] << 16) |
(buffer[_position] << 24);
_position += 4;
if (v >= _MaxSignedInt32plus1) v -= 2 * _MaxSignedInt32plus1;
return v;
}
putInt32(int v) {
assert((v < _MaxSignedInt32plus1) && (v >= -_MaxSignedInt32plus1));
buffer[_position + 3] = v;
buffer[_position + 2] = (v >> 8);
buffer[_position + 1] = (v >> 16);
buffer[_position + 0] = (v >> 24);
_position += 4;
}
// The following code which uses the 'double' conversion methods works but is about 50% slower!
//
// final Int32List _convI32 = new Int32List.view(_convU8.buffer);
//
// int getInt32() {
// _convU8[0] = out[_position + 0]; _convU8[1] = out[_position + 1];
// _convU8[2] = out[_position + 2]; _convU8[3] = out[_position + 3];
// _position += 4;
// return _convI32[0];
// }
//
// putInt32(int v) {
// _convI32[0] = v;
// out[_position + 0] = _convU8[0]; out[_position + 1] = _convU8[1];
// out[_position + 2] = _convU8[2]; out[_position + 3] = _convU8[3];
// _position += 4;
// }
int getInt16() {
int v = buffer[_position + 1] | (buffer[_position] << 8);
_position += 2;
if (v >= _MaxSignedInt16plus1) v -= 2 * _MaxSignedInt16plus1;
return v;
}
putInt16(int v) {
assert((v < _MaxSignedInt16plus1) && (v >= -_MaxSignedInt16plus1));
buffer[_position + 1] = v;
buffer[_position + 0] = (v >> 8);
_position += 2;
}
int getInt8() {
int v = buffer[_position++];
if (v >= _MaxSignedInt8plus1) v -= 2 * _MaxSignedInt8plus1;
return v;
}
putInt8(int v) {
assert((v < _MaxSignedInt8plus1) && (v >= -_MaxSignedInt8plus1));
buffer[_position] = v;
_position++;
}
String getString(int length) {
String s = new String.fromCharCodes(buffer, _position, _position + length);
_position += length;
return s;
}
putString(String str) {
buffer.setAll(_position, str.codeUnits);
_position += str.codeUnits.length;
}
}
/// Lists used for data convertion (alias each other).
final Uint8List _convU8 = new Uint8List(8);
final Float32List _convF32 = new Float32List.view(_convU8.buffer);
final Float64List _convF64 = new Float64List.view(_convU8.buffer);
/// Writer used on little-endian host.
class _ConverterForLEHost extends DataConverter {
_ConverterForLEHost._create(out) : super._create(out);
double getFloat64() {
_convU8[0] = buffer[_position + 7]; _convU8[1] = buffer[_position + 6];
_convU8[2] = buffer[_position + 5]; _convU8[3] = buffer[_position + 4];
_convU8[4] = buffer[_position + 3]; _convU8[5] = buffer[_position + 2];
_convU8[6] = buffer[_position + 1]; _convU8[7] = buffer[_position + 0];
_position += 8;
return _convF64[0];
}
putFloat64(double v) {
_convF64[0] = v;
buffer[_position + 7] = _convU8[0]; buffer[_position + 6] = _convU8[1];
buffer[_position + 5] = _convU8[2]; buffer[_position + 4] = _convU8[3];
buffer[_position + 3] = _convU8[4]; buffer[_position + 2] = _convU8[5];
buffer[_position + 1] = _convU8[6]; buffer[_position + 0] = _convU8[7];
_position += 8;
}
double getFloat32() {
_convU8[0] = buffer[_position + 3]; _convU8[1] = buffer[_position + 2];
_convU8[2] = buffer[_position + 1]; _convU8[3] = buffer[_position + 0];
_position += 4;
return _convF32[0];
}
putFloat32(double v) {
_convF32[0] = v;
assert(_convF32[0].isFinite || !v.isFinite); // overflow check
buffer[_position + 3] = _convU8[0]; buffer[_position + 2] = _convU8[1];
buffer[_position + 1] = _convU8[2]; buffer[_position + 0] = _convU8[3];
_position += 4;
}
}
/// Writer used on the big-endian host.
class _ConverterForBEHost extends DataConverter {
_ConverterForBEHost._create(out) : super._create(out);
double getFloat64() {
_convU8[0] = buffer[_position + 0]; _convU8[1] = buffer[_position + 1];
_convU8[2] = buffer[_position + 2]; _convU8[3] = buffer[_position + 3];
_convU8[4] = buffer[_position + 4]; _convU8[5] = buffer[_position + 5];
_convU8[6] = buffer[_position + 6]; _convU8[7] = buffer[_position + 7];
_position += 8;
return _convF64[0];
}
putFloat64(double v) {
_convF64[0] = v;
buffer[_position + 0] = _convU8[0]; buffer[_position + 1] = _convU8[1];
buffer[_position + 2] = _convU8[2]; buffer[_position + 3] = _convU8[3];
buffer[_position + 4] = _convU8[4]; buffer[_position + 5] = _convU8[5];
buffer[_position + 6] = _convU8[6]; buffer[_position + 7] = _convU8[7];
_position += 8;
}
double getFloat32() {
_convU8[0] = buffer[_position + 0]; _convU8[1] = buffer[_position + 1];
_convU8[2] = buffer[_position + 2]; _convU8[3] = buffer[_position + 3];
_position += 4;
return _convF32[0];
}
putFloat32(double v) {
_convF32[0] = v;
assert(_convF32[0].isFinite || !v.isFinite); // overflow check
buffer[_position + 0] = _convU8[0]; buffer[_position + 1] = _convU8[1];
buffer[_position + 2] = _convU8[2]; buffer[_position + 3] = _convU8[3];
_position += 4;
}
}
还有一个非常小的基本测试单元:
import 'package:ylib/ylib.dart';
import 'package:unittest/unittest.dart';
// -------- Test program for [DataConverter]: --------
void main() {
DataConverter dc = new DataConverter(100);
test('Float64', () {
double d1 = 1.246e370, d2 = -0.0000745687436849437;
dc.position = 0;
dc..putFloat64(d1)..putFloat64(d2);
dc.position = 0; // reset it
expect(dc.getFloat64(), d1);
expect(dc.getFloat64(), d2);
});
test('Float32', () {
double d1 = -0.43478e32, d2 = -0.0;
dc.position = 0;
dc..putFloat32(d1)..putFloat32(d2);
dc.position = 0; // reset it
expect(dc.getFloat32(), closeTo(d1, 1.7e24));
expect(dc.getFloat32(), d2);
});
test('Int64', () {
int i1 = 9223372036854775807, i2 = -22337203685477580;
dc.position = 3;
dc..putInt64(i1)..putInt64(i2);
dc.position = 3; // reset it
expect(dc.getInt64(), i1);
expect(dc.getInt64(), i2);
});
test('Int32_16_8', () {
int i1 = 192233720, i2 = -7233, i3 = 32, i4 = -17;
dc.position = 0;
dc..putInt32(i1)..putInt16(i2)..putInt8(i3)..putInt32(i4);
dc.position = 0; // reset it
expect(dc.getInt32(), i1);
expect(dc.getInt16(), i2);
expect(dc.getInt8(), i3);
expect(dc.getInt32(), i4);
});
test('String', () {
String s1 = r"922337203!§$%&()=?68547/807", s2 = "-22337203685477580Anton";
int i1 = -33;
dc.position = 33;
dc..putString(s1)..putInt8(i1)..putString(s2);
dc.position = 33; // reset it
expect(dc.getString(s1.length), s1);
expect(dc.getInt8(), i1);
expect(dc.getString(s2.length), s2);
});
}
为一家更大的德国公司 Future Technologies Group 做一些咨询工作,我已经将大约 6000 行 Java 服务器端软件移植到 Dart。这应该有助于回答 Dart 是否可以在服务器上高效使用的问题。 (由于寻找一种语言用于客户端和服务器端编程的优势,这本身会为 Dart 开绿灯。)
学习 Dart(我真的很喜欢用它工作)让我期望相对于 Java 有 30-50% 的性能损失,但在任何情况下都不差于 100%(慢两倍)这是上述决策过程的截止点。
移植很顺利。我学到了很多。单元测试很好。但结果证明性能非常糟糕......与 Java 程序相比,整体速度慢了七倍。
分析代码揭示了两个罪魁祸首:数据转换和文件 I/O。也许我做错了什么?在我回到我的客户那里并且他们取消了他们的 Dart 研究之前,我想搜索一些关于如何改进的建议。让我们从数据转换开始,将 Dart 原生数据类型转换成各种二进制格式,可用于数据的有效传输和存储。
通常这些转换很简单而且非常快,因为没有什么真正需要从使用的内部格式转换,但大部分都存储在缓冲区中。我创建了一个基准程序,它以某种方式反映了我程序中这些转换的典型用法:
import 'dart:typed_data';
import 'package:benchmark_harness/benchmark_harness.dart';
// Create a new benchmark by extending BenchmarkBase
class ConversionBenchmark extends BenchmarkBase {
Uint8List result;
ConversionBenchmark() : super("Conversion");
// The benchmark code.
void run() {
const int BufSize = 262144; // 256kBytes
const int SetSize = 64; // one "typical" set of data, gets repeated
ByteData buffer = new ByteData(BufSize);
double doubleContent = 0.0; // used to simulate double content
int intContent = 0; // used to simulate int content
int offset = 0;
for (int j = 0; j < buffer.lengthInBytes / SetSize; j++) {
// The following represents some "typical" conversion mix:
buffer.setFloat64(offset, doubleContent); offset += 8; doubleContent += 0.123;
for (int k = 0; k < 8; k++) { // main use case
buffer.setFloat32(offset, doubleContent); offset += 4; doubleContent += 0.123;
}
buffer.setInt32(offset, intContent); offset += 4; intContent++;
buffer.setInt32(offset, intContent); offset += 4; intContent++;
buffer.setInt16(offset, intContent); offset += 2; intContent++;
buffer.setInt16(offset, intContent); offset += 2; intContent++;
buffer.setInt8(offset, intContent); offset += 1; intContent++;
buffer.setInt8(offset, intContent); offset += 1; intContent++;
buffer.buffer.asUint8List(offset).setAll(0, "AsciiStrng".codeUnits); offset += 10;
// [ByteData] knows no other mechanism to transfer ASCII strings in
assert((offset % SetSize) == 0); // ensure the example content fits [SetSize] bytes
}
result = buffer.buffer.asUint8List(); // only this can be used for further processing
}
}
main() {
new ConversionBenchmark().report();
}
它基于来自 https://github.com/dart-lang/benchmark_harness. For comparisions I used the following Java program based on a port of the Dart benchmark harness from https://github.com/bono8106/benchmark_harness_java 的基准线束:
package ylib.tools;
import java.nio.ByteBuffer;
public class ConversionBenchmark extends BenchmarkBase {
public ByteBuffer result;
public ConversionBenchmark() { super("Conversion"); }
// The benchmark code.
@Override protected void run() {
final int BufSize = 262144; // 256kBytes
final int SetSize = 64; // one "typical" set of data, gets repeated
ByteBuffer buffer = ByteBuffer.allocate(BufSize);
double doubleContent = 0.0; // used to simulate double content
int intContent = 0; // used to simulate int content
for (int j = 0; j < (buffer.capacity() / SetSize); j++) {
// The following represents some "typical" conversion mix:
buffer.putDouble(doubleContent); doubleContent += 0.123;
for (int k = 0; k < 8; k++) { // main use case
buffer.putFloat((float)doubleContent); doubleContent += 0.123;
}
buffer.putInt(intContent); intContent++;
buffer.putInt(intContent); intContent++;
buffer.putShort((short)intContent); intContent++;
buffer.putShort((short)intContent); intContent++;
buffer.put((byte)intContent); intContent++;
buffer.put((byte)intContent); intContent++;
buffer.put("AsciiStrng".getBytes());
//assert((buffer.position() % SetSize) == 0); // ensure the example content fits [SetSize] bytes
}
buffer.flip(); // needed for further processing
result = buffer; // to avoid the compiler optimizing away everything
}
public static void main(String[] args) {
new ConversionBenchmark().report();
}
}
Java 代码 运行 几乎比我的 Intel Windows 7 机器上的 Dart 代码快 10 倍。 运行 都在各自的 VM 上处于生产模式。
代码中是否存在明显错误?或者是否有不同的 Dart 类 可用于完成这项工作?关于为什么 Dart 在这些简单的转换中慢得多的解释?还是我对 Dart VM 性能的期望完全错误?
与直接类型化数组访问相比,字节数据方法(ByteData.setXYZ
和 ByteData.getXYZ
)在 Dart VM 上的性能确实很差。我们开始着手解决这个问题,初步结果令人鼓舞[1]。
与此同时,您可以通过使用类型化数组(完整代码位于 [2])将您自己的转换滚动到大端来解决这个不幸的性能回归问题:
/// Writer wraps a fixed size Uint8List and writes values into it using
/// big-endian byte order.
class Writer {
/// Output buffer.
final Uint8List out;
/// Current position within [out].
var position = 0;
Writer._create(this.out);
factory Writer(size) {
final out = new Uint8List(size);
if (Endianness.HOST_ENDIAN == Endianness.LITTLE_ENDIAN) {
return new _WriterForLEHost._create(out);
} else {
return new _WriterForBEHost._create(out);
}
}
writeFloat64(double v);
}
/// Lists used for data convertion (alias each other).
final Uint8List _convU8 = new Uint8List(8);
final Float32List _convF32 = new Float32List.view(_convU8.buffer);
final Float64List _convF64 = new Float64List.view(_convU8.buffer);
/// Writer used on little-endian host.
class _WriterForLEHost extends Writer {
_WriterForLEHost._create(out) : super._create(out);
writeFloat64(double v) {
_convF64[0] = v;
out[position + 7] = _convU8[0];
out[position + 6] = _convU8[1];
out[position + 5] = _convU8[2];
out[position + 4] = _convU8[3];
out[position + 3] = _convU8[4];
out[position + 2] = _convU8[5];
out[position + 1] = _convU8[6];
out[position + 0] = _convU8[7];
position += 8;
}
}
在您的测试中对这种手动转换进行基准测试可获得大约 6 倍的改进:
import 'dart:typed_data';
import 'package:benchmark_harness/benchmark_harness.dart';
import 'writer.dart';
class ConversionBenchmarkManual extends BenchmarkBase {
Uint8List result;
ConversionBenchmarkManual() : super("Conversion (MANUAL)");
// The benchmark code.
void run() {
const int BufSize = 262144; // 256kBytes
const int SetSize = 64; // one "typical" set of data, gets repeated
final w = new Writer(BufSize);
double doubleContent = 0.0; // used to simulate double content
int intContent = 0; // used to simulate int content
int offset = 0;
for (int j = 0; j < (BufSize / SetSize); j++) {
// The following represents some "typical" conversion mix:
w.writeFloat64(doubleContent); doubleContent += 0.123;
for (int k = 0; k < 8; k++) { // main use case
w.writeFloat32(doubleContent); doubleContent += 0.123;
}
w.writeInt32(intContent); intContent++;
w.writeInt32(intContent); intContent++;
w.writeInt16(intContent); intContent++;
w.writeInt16(intContent); intContent++;
w.writeInt8(intContent); intContent++;
w.writeInt8(intContent); intContent++;
w.writeString("AsciiStrng");
assert((offset % SetSize) == 0); // ensure the example content fits [SetSize] bytes
}
result = w.out; // only this can be used for further processing
}
}
我想补充一些关于我最终如何解决性能问题以及结果如何的细节。
首先,我使用了 Vyacheslav Egorov 的 postet 方法,并从中开发了我自己的数据转换器 class,它提供双向转换。它仍然不是生产代码,但它对我的服务器软件端口运行良好,因此我将其附加在下面。我故意将 [buffer] 保留为 public 变量。这可能无法实现完美的封装,但可以轻松直接写入缓冲区和从缓冲区读取,例如通过 [RandomAccessFile.readInto] 和 [RandomAccessFile.writeFrom]。一切简单高效!
事实证明,这些数据转换是初始性能缓慢的罪魁祸首,比 Java 版本慢七倍。随着变化,性能差距大大缩小。 6000 行服务器应用程序的 Dart 版本现在仅落后 Java 版本约 30%。比我对具有如此灵活的打字概念的语言的预期要好。这将使 Dart 在我的客户未来的技术决策中处于有利地位。
在我看来,为客户端和服务器应用程序使用一种语言可能是 Dart 的一个很好的论据。
下面是用于该项目的数据转换器的代码:
part of ylib;
/// [DataConverter] wraps a fixed size [Uint8List] and converts values from and into it
/// using big-endian byte order.
///
abstract class DataConverter {
/// Buffer.
final Uint8List buffer;
/// Current position within [buffer].
int _position = 0;
DataConverter._create(this.buffer);
/// Creates the converter with its associated [buffer].
///
factory DataConverter(size) {
final out = new Uint8List(size);
if (Endianness.HOST_ENDIAN == Endianness.LITTLE_ENDIAN) {
return new _ConverterForLEHost._create(out);
} else {
return new _ConverterForBEHost._create(out);
}
}
int get length => buffer.length;
int get position => _position;
set position(int position) {
if ((position < 0) || (position > buffer.lengthInBytes)) throw new ArgumentError(position);
_position = position;
}
double getFloat64();
putFloat64(double v);
double getFloat32();
putFloat32(double v);
static const int _MaxSignedInt64plus1 = 9223372036854775808;
static const int _MaxSignedInt32plus1 = 2147483648;
static const int _MaxSignedInt16plus1 = 32768;
static const int _MaxSignedInt8plus1 = 128;
int getInt64() {
int v =
buffer[_position + 7] | (buffer[_position + 6] << 8) | (buffer[_position + 5] << 16) |
(buffer[_position + 4] << 24) | (buffer[_position + 3] << 32) |
(buffer[_position + 2] << 40) | (buffer[_position + 1] << 48) | (buffer[_position] << 56);
_position += 8;
if (v >= _MaxSignedInt64plus1) v -= 2 * _MaxSignedInt64plus1;
return v;
}
putInt64(int v) {
assert((v < _MaxSignedInt64plus1) && (v >= -_MaxSignedInt64plus1));
buffer[_position + 7] = v;
buffer[_position + 6] = (v >> 8);
buffer[_position + 5] = (v >> 16);
buffer[_position + 4] = (v >> 24);
buffer[_position + 3] = (v >> 32);
buffer[_position + 2] = (v >> 40);
buffer[_position + 1] = (v >> 48);
buffer[_position + 0] = (v >> 56);
_position += 8;
}
int getInt32() {
int v = buffer[_position + 3] | (buffer[_position + 2] << 8) | (buffer[_position + 1] << 16) |
(buffer[_position] << 24);
_position += 4;
if (v >= _MaxSignedInt32plus1) v -= 2 * _MaxSignedInt32plus1;
return v;
}
putInt32(int v) {
assert((v < _MaxSignedInt32plus1) && (v >= -_MaxSignedInt32plus1));
buffer[_position + 3] = v;
buffer[_position + 2] = (v >> 8);
buffer[_position + 1] = (v >> 16);
buffer[_position + 0] = (v >> 24);
_position += 4;
}
// The following code which uses the 'double' conversion methods works but is about 50% slower!
//
// final Int32List _convI32 = new Int32List.view(_convU8.buffer);
//
// int getInt32() {
// _convU8[0] = out[_position + 0]; _convU8[1] = out[_position + 1];
// _convU8[2] = out[_position + 2]; _convU8[3] = out[_position + 3];
// _position += 4;
// return _convI32[0];
// }
//
// putInt32(int v) {
// _convI32[0] = v;
// out[_position + 0] = _convU8[0]; out[_position + 1] = _convU8[1];
// out[_position + 2] = _convU8[2]; out[_position + 3] = _convU8[3];
// _position += 4;
// }
int getInt16() {
int v = buffer[_position + 1] | (buffer[_position] << 8);
_position += 2;
if (v >= _MaxSignedInt16plus1) v -= 2 * _MaxSignedInt16plus1;
return v;
}
putInt16(int v) {
assert((v < _MaxSignedInt16plus1) && (v >= -_MaxSignedInt16plus1));
buffer[_position + 1] = v;
buffer[_position + 0] = (v >> 8);
_position += 2;
}
int getInt8() {
int v = buffer[_position++];
if (v >= _MaxSignedInt8plus1) v -= 2 * _MaxSignedInt8plus1;
return v;
}
putInt8(int v) {
assert((v < _MaxSignedInt8plus1) && (v >= -_MaxSignedInt8plus1));
buffer[_position] = v;
_position++;
}
String getString(int length) {
String s = new String.fromCharCodes(buffer, _position, _position + length);
_position += length;
return s;
}
putString(String str) {
buffer.setAll(_position, str.codeUnits);
_position += str.codeUnits.length;
}
}
/// Lists used for data convertion (alias each other).
final Uint8List _convU8 = new Uint8List(8);
final Float32List _convF32 = new Float32List.view(_convU8.buffer);
final Float64List _convF64 = new Float64List.view(_convU8.buffer);
/// Writer used on little-endian host.
class _ConverterForLEHost extends DataConverter {
_ConverterForLEHost._create(out) : super._create(out);
double getFloat64() {
_convU8[0] = buffer[_position + 7]; _convU8[1] = buffer[_position + 6];
_convU8[2] = buffer[_position + 5]; _convU8[3] = buffer[_position + 4];
_convU8[4] = buffer[_position + 3]; _convU8[5] = buffer[_position + 2];
_convU8[6] = buffer[_position + 1]; _convU8[7] = buffer[_position + 0];
_position += 8;
return _convF64[0];
}
putFloat64(double v) {
_convF64[0] = v;
buffer[_position + 7] = _convU8[0]; buffer[_position + 6] = _convU8[1];
buffer[_position + 5] = _convU8[2]; buffer[_position + 4] = _convU8[3];
buffer[_position + 3] = _convU8[4]; buffer[_position + 2] = _convU8[5];
buffer[_position + 1] = _convU8[6]; buffer[_position + 0] = _convU8[7];
_position += 8;
}
double getFloat32() {
_convU8[0] = buffer[_position + 3]; _convU8[1] = buffer[_position + 2];
_convU8[2] = buffer[_position + 1]; _convU8[3] = buffer[_position + 0];
_position += 4;
return _convF32[0];
}
putFloat32(double v) {
_convF32[0] = v;
assert(_convF32[0].isFinite || !v.isFinite); // overflow check
buffer[_position + 3] = _convU8[0]; buffer[_position + 2] = _convU8[1];
buffer[_position + 1] = _convU8[2]; buffer[_position + 0] = _convU8[3];
_position += 4;
}
}
/// Writer used on the big-endian host.
class _ConverterForBEHost extends DataConverter {
_ConverterForBEHost._create(out) : super._create(out);
double getFloat64() {
_convU8[0] = buffer[_position + 0]; _convU8[1] = buffer[_position + 1];
_convU8[2] = buffer[_position + 2]; _convU8[3] = buffer[_position + 3];
_convU8[4] = buffer[_position + 4]; _convU8[5] = buffer[_position + 5];
_convU8[6] = buffer[_position + 6]; _convU8[7] = buffer[_position + 7];
_position += 8;
return _convF64[0];
}
putFloat64(double v) {
_convF64[0] = v;
buffer[_position + 0] = _convU8[0]; buffer[_position + 1] = _convU8[1];
buffer[_position + 2] = _convU8[2]; buffer[_position + 3] = _convU8[3];
buffer[_position + 4] = _convU8[4]; buffer[_position + 5] = _convU8[5];
buffer[_position + 6] = _convU8[6]; buffer[_position + 7] = _convU8[7];
_position += 8;
}
double getFloat32() {
_convU8[0] = buffer[_position + 0]; _convU8[1] = buffer[_position + 1];
_convU8[2] = buffer[_position + 2]; _convU8[3] = buffer[_position + 3];
_position += 4;
return _convF32[0];
}
putFloat32(double v) {
_convF32[0] = v;
assert(_convF32[0].isFinite || !v.isFinite); // overflow check
buffer[_position + 0] = _convU8[0]; buffer[_position + 1] = _convU8[1];
buffer[_position + 2] = _convU8[2]; buffer[_position + 3] = _convU8[3];
_position += 4;
}
}
还有一个非常小的基本测试单元:
import 'package:ylib/ylib.dart';
import 'package:unittest/unittest.dart';
// -------- Test program for [DataConverter]: --------
void main() {
DataConverter dc = new DataConverter(100);
test('Float64', () {
double d1 = 1.246e370, d2 = -0.0000745687436849437;
dc.position = 0;
dc..putFloat64(d1)..putFloat64(d2);
dc.position = 0; // reset it
expect(dc.getFloat64(), d1);
expect(dc.getFloat64(), d2);
});
test('Float32', () {
double d1 = -0.43478e32, d2 = -0.0;
dc.position = 0;
dc..putFloat32(d1)..putFloat32(d2);
dc.position = 0; // reset it
expect(dc.getFloat32(), closeTo(d1, 1.7e24));
expect(dc.getFloat32(), d2);
});
test('Int64', () {
int i1 = 9223372036854775807, i2 = -22337203685477580;
dc.position = 3;
dc..putInt64(i1)..putInt64(i2);
dc.position = 3; // reset it
expect(dc.getInt64(), i1);
expect(dc.getInt64(), i2);
});
test('Int32_16_8', () {
int i1 = 192233720, i2 = -7233, i3 = 32, i4 = -17;
dc.position = 0;
dc..putInt32(i1)..putInt16(i2)..putInt8(i3)..putInt32(i4);
dc.position = 0; // reset it
expect(dc.getInt32(), i1);
expect(dc.getInt16(), i2);
expect(dc.getInt8(), i3);
expect(dc.getInt32(), i4);
});
test('String', () {
String s1 = r"922337203!§$%&()=?68547/807", s2 = "-22337203685477580Anton";
int i1 = -33;
dc.position = 33;
dc..putString(s1)..putInt8(i1)..putString(s2);
dc.position = 33; // reset it
expect(dc.getString(s1.length), s1);
expect(dc.getInt8(), i1);
expect(dc.getString(s2.length), s2);
});
}