在 F# 中创建一个可序列化的固定大小的字符数组
Creating a serializable fixed size char array in F#
我正在处理需要加载/保存到磁盘的大量数据,其中速度是关键。
我写了这段代码:
// load from cache
let loadFromCacheAsync<'a when 'a: (new: unit -> 'a) and 'a: struct and 'a :> ValueType> filespec =
async {
let! bytes = File.ReadAllBytesAsync(filespec) |> Async.AwaitTask
let result =
use pBytes = fixed bytes
let sourceSpan = Span<byte>(NativePtr.toVoidPtr pBytes, bytes.Length)
MemoryMarshal.Cast<byte, 'a>(sourceSpan).ToArray()
return result
}
// save to cache
let saveToCacheAsync<'a when 'a: unmanaged> filespec (data: 'a array) =
Directory.CreateDirectory cacheFolder |> ignore
let sizeStruct = sizeof<'a>
use ptr = fixed data
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, data.Length * sizeStruct).ToArray()
File.WriteAllBytesAsync(filespec, nativeSpan) |> Async.AwaitTask
并且它要求数据结构是非托管的。
例如,我有:
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortTradeData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Price: double
[<FieldOffset(16)>] Quantity: double
[<FieldOffset(24)>] Direction: int
}
或
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortCandleData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Open: double
[<FieldOffset(16)>] High: double
[<FieldOffset(24)>] Low: double
[<FieldOffset(32)>] Close: double
}
等...
我现在面临一个需要存储字符串的情况。我知道字符串的最大长度,但我正在尝试找出如何使用非托管类型执行此操作。
我想知道我是否可以做这样的事情(256 字节):
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Text: char
[<FieldOffset(264)>] Dummy: int
}
获取指向文本的指针,将其转换为字符数组,在其中读取/写入我想要的内容,然后根据需要保存/加载是否安全?
还是我在某个时候自找麻烦?
作为附带问题,任何加速 loadFromCache 函数的方法也非常受欢迎:)
编辑:
我暂时想到了这个。它将复杂事件对象列表转换为可序列化的对象。
该行:
let bytes = Pipeline.serializeBinary event
将原始事件数据转换为字节数组。
然后我创建将保存二进制流的结构,写入长度,创建一个表示结构的跨度并复制字节。然后我将跨度编组为结构类型 (ShortEventData)。
我不能使用 Marshal 复制,因为我不能放置目标偏移量,所以我必须使用循环复制字节。但是必须有更好的方法。
而且我认为,对于其中的所有其他内容,也必须有更好的方法 :D 任何建议都会有所帮助,我只是不太喜欢这个解决方案。
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Event: byte
[<FieldOffset(1032)>] Length: int
}
events
|> List.map (fun event ->
let bytes = Pipeline.serializeBinary event
let serializableEvent : DataCache.ShortEventData =
{
Timestamp = event.GetTimestamp()
Event = byte 0
Length = bytes.Length
}
use ptr = fixed [|serializableEvent|]
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, serializableEvent.Length * sizeStruct)
for i = 0 to bytes.Length - 1 do
nativeSpan[8 + i] <- bytes[i]
MemoryMarshal.Cast<byte, DataCache.ShortEventData>(nativeSpan).ToArray()[0]
)
编辑:
为不同的序列化模型添加基准:
open System
open System.IO
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open MBrace.FsPickler
open Microsoft.FSharp.NativeInterop
open Newtonsoft.Json
#nowarn "9"
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestStruct =
{
[<FieldOffset(00)>] SomeValue: int
[<FieldOffset(04)>] AnotherValue: int
[<FieldOffset(08)>] YetAnotherValue: double
}
static member MakeOne(r: Random) =
{
SomeValue = r.Next()
AnotherValue = r.Next()
YetAnotherValue = r.NextDouble()
}
[<MemoryDiagnoser>]
type Benchmarks () =
let testData =
let random = Random(1000)
Array.init 1000 (fun _ -> TestStruct.MakeOne(random))
// inits, outside of the benchmarks
// FSPickler
let FSPicklerSerializer = FsPickler.CreateBinarySerializer()
// APEX
let ApexSettings = Apex.Serialization.Settings().MarkSerializable(typeof<TestStruct>)
let ApexBinarySerializer = Apex.Serialization.Binary.Create(ApexSettings)
[<Benchmark>]
member _.Thomas() = // thomas' save to disk
let sizeStruct = sizeof<TestStruct>
use ptr = fixed testData
Span<byte>(NativePtr.toVoidPtr ptr, testData.Length * sizeStruct).ToArray()
[<Benchmark>]
member _.Newtonsoft() =
JsonConvert.SerializeObject(testData)
[<Benchmark>]
member _.FSPickler() =
FSPicklerSerializer.Pickle testData
[<Benchmark>]
member _.Apex() =
let outputStream = new MemoryStream()
ApexBinarySerializer.Write(testData, outputStream)
[<EntryPoint>]
let main _ =
let _ = BenchmarkRunner.Run<Benchmarks>()
0
| Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated |
|----------- |-------------:|-------------:|-------------:|---------:|--------:|--------:|----------:|
| Thomas | 878.4 ns | 11.74 ns | 10.41 ns | 2.5444 | 0.1411 | - | 16 KB |
| Newtonsoft | 880,641.2 ns | 16,346.50 ns | 15,290.52 ns | 103.5156 | 79.1016 | 48.8281 | 508 KB |
| FSPickler | 71,786.6 ns | 1,373.89 ns | 1,349.35 ns | 13.6719 | 2.0752 | - | 84 KB |
| Apex | 1,088.8 ns | 20.59 ns | 22.03 ns | 2.6093 | 0.0725 | - | 16 KB |
看起来 Apex 与我所做的非常接近,但它可能更灵活且更优化,因此切换到它可能是有意义的,除非我拥有的可以更优化。
还得看看@JL0PD的精彩评论如何提高速度
出于兴趣,我在你的问题末尾使用了 lambda 并测试了三个类似的实现,并 运行 它在 Benchmark.Net。
Reference
- 如您所见
Mutable Struct
- 因为我可能已经用可变结构
Record
- 使用普通的旧哑记录
亲自查看结果。普通的旧哑记录是最快的(尽管只比我的尝试快一点,比你的例子快 10 倍)。先写傻代码。对其进行基准测试。那就努力改进吧。
#nowarn "9"
open System
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open Microsoft.FSharp.NativeInterop
type ShortEventDataRec =
{
Timestamp: DateTime
Event: byte[]
Length: int
}
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Event: byte
[<FieldOffset(1032)>] Length: int
}
[<StructLayout(LayoutKind.Explicit)>]
type MutableShortEventData =
struct
[<FieldOffset(00)>] val mutable Timestamp: DateTime
[<FieldOffset(08)>] val mutable Event: byte
[<FieldOffset(1032)>] val mutable Length: int
end
[<MemoryDiagnoser>]
type Benchmarks () =
let event =
Array.init 1024 (fun i -> byte (i % 256))
let time = DateTime.Now
let sizeStruct = sizeof<ShortEventData>
[<Benchmark>]
member __.Reference() =
let bytes = event
let serializableEvent =
{
ShortEventData.Timestamp = time
Event = byte 0
Length = bytes.Length
}
use ptr = fixed [|serializableEvent|]
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, sizeStruct)
for i = 0 to bytes.Length - 1 do
nativeSpan.[8 + i] <- bytes.[i]
MemoryMarshal.Cast<byte, ShortEventData>(nativeSpan).[0]
[<Benchmark>]
member __.MutableStruct() =
let bytes = event
let targetBytes = GC.AllocateUninitializedArray(sizeStruct)
let targetSpan = Span(targetBytes)
let targetStruct = MemoryMarshal.Cast<_, MutableShortEventData>(targetSpan)
targetStruct.[0].Timestamp <- time
let targetEvent = bytes.CopyTo(targetSpan.Slice(8, 1024))
targetStruct.[0].Length <- event.Length
targetStruct.[0]
[<Benchmark>]
member __.Record() =
let bytes = event
let serializableEvent =
{
ShortEventDataRec.Timestamp = time
Event =
let eventBytes = GC.AllocateUninitializedArray(bytes.Length)
System.Array.Copy(bytes, eventBytes, bytes.Length)
eventBytes
Length = bytes.Length
}
serializableEvent
[<EntryPoint>]
let main _ =
let _ = BenchmarkRunner.Run<Benchmarks>()
0
Method
Mean
Error
StdDev
Gen 0
Gen 1
Allocated
Reference
526.88 ns
6.318 ns
5.909 ns
0.0629
-
1 KB
MutableStruct
49.50 ns
0.966 ns
1.074 ns
0.0636
-
1 KB
Record
42.73 ns
0.672 ns
0.628 ns
0.0650
0.0002
1 KB
我正在处理需要加载/保存到磁盘的大量数据,其中速度是关键。
我写了这段代码:
// load from cache
let loadFromCacheAsync<'a when 'a: (new: unit -> 'a) and 'a: struct and 'a :> ValueType> filespec =
async {
let! bytes = File.ReadAllBytesAsync(filespec) |> Async.AwaitTask
let result =
use pBytes = fixed bytes
let sourceSpan = Span<byte>(NativePtr.toVoidPtr pBytes, bytes.Length)
MemoryMarshal.Cast<byte, 'a>(sourceSpan).ToArray()
return result
}
// save to cache
let saveToCacheAsync<'a when 'a: unmanaged> filespec (data: 'a array) =
Directory.CreateDirectory cacheFolder |> ignore
let sizeStruct = sizeof<'a>
use ptr = fixed data
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, data.Length * sizeStruct).ToArray()
File.WriteAllBytesAsync(filespec, nativeSpan) |> Async.AwaitTask
并且它要求数据结构是非托管的。 例如,我有:
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortTradeData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Price: double
[<FieldOffset(16)>] Quantity: double
[<FieldOffset(24)>] Direction: int
}
或
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortCandleData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Open: double
[<FieldOffset(16)>] High: double
[<FieldOffset(24)>] Low: double
[<FieldOffset(32)>] Close: double
}
等...
我现在面临一个需要存储字符串的情况。我知道字符串的最大长度,但我正在尝试找出如何使用非托管类型执行此操作。
我想知道我是否可以做这样的事情(256 字节):
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Text: char
[<FieldOffset(264)>] Dummy: int
}
获取指向文本的指针,将其转换为字符数组,在其中读取/写入我想要的内容,然后根据需要保存/加载是否安全?
还是我在某个时候自找麻烦?
作为附带问题,任何加速 loadFromCache 函数的方法也非常受欢迎:)
编辑:
我暂时想到了这个。它将复杂事件对象列表转换为可序列化的对象。 该行:
let bytes = Pipeline.serializeBinary event
将原始事件数据转换为字节数组。
然后我创建将保存二进制流的结构,写入长度,创建一个表示结构的跨度并复制字节。然后我将跨度编组为结构类型 (ShortEventData)。
我不能使用 Marshal 复制,因为我不能放置目标偏移量,所以我必须使用循环复制字节。但是必须有更好的方法。
而且我认为,对于其中的所有其他内容,也必须有更好的方法 :D 任何建议都会有所帮助,我只是不太喜欢这个解决方案。
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Event: byte
[<FieldOffset(1032)>] Length: int
}
events
|> List.map (fun event ->
let bytes = Pipeline.serializeBinary event
let serializableEvent : DataCache.ShortEventData =
{
Timestamp = event.GetTimestamp()
Event = byte 0
Length = bytes.Length
}
use ptr = fixed [|serializableEvent|]
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, serializableEvent.Length * sizeStruct)
for i = 0 to bytes.Length - 1 do
nativeSpan[8 + i] <- bytes[i]
MemoryMarshal.Cast<byte, DataCache.ShortEventData>(nativeSpan).ToArray()[0]
)
编辑:
为不同的序列化模型添加基准:
open System
open System.IO
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open MBrace.FsPickler
open Microsoft.FSharp.NativeInterop
open Newtonsoft.Json
#nowarn "9"
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestStruct =
{
[<FieldOffset(00)>] SomeValue: int
[<FieldOffset(04)>] AnotherValue: int
[<FieldOffset(08)>] YetAnotherValue: double
}
static member MakeOne(r: Random) =
{
SomeValue = r.Next()
AnotherValue = r.Next()
YetAnotherValue = r.NextDouble()
}
[<MemoryDiagnoser>]
type Benchmarks () =
let testData =
let random = Random(1000)
Array.init 1000 (fun _ -> TestStruct.MakeOne(random))
// inits, outside of the benchmarks
// FSPickler
let FSPicklerSerializer = FsPickler.CreateBinarySerializer()
// APEX
let ApexSettings = Apex.Serialization.Settings().MarkSerializable(typeof<TestStruct>)
let ApexBinarySerializer = Apex.Serialization.Binary.Create(ApexSettings)
[<Benchmark>]
member _.Thomas() = // thomas' save to disk
let sizeStruct = sizeof<TestStruct>
use ptr = fixed testData
Span<byte>(NativePtr.toVoidPtr ptr, testData.Length * sizeStruct).ToArray()
[<Benchmark>]
member _.Newtonsoft() =
JsonConvert.SerializeObject(testData)
[<Benchmark>]
member _.FSPickler() =
FSPicklerSerializer.Pickle testData
[<Benchmark>]
member _.Apex() =
let outputStream = new MemoryStream()
ApexBinarySerializer.Write(testData, outputStream)
[<EntryPoint>]
let main _ =
let _ = BenchmarkRunner.Run<Benchmarks>()
0
| Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Gen 2 | Allocated |
|----------- |-------------:|-------------:|-------------:|---------:|--------:|--------:|----------:|
| Thomas | 878.4 ns | 11.74 ns | 10.41 ns | 2.5444 | 0.1411 | - | 16 KB |
| Newtonsoft | 880,641.2 ns | 16,346.50 ns | 15,290.52 ns | 103.5156 | 79.1016 | 48.8281 | 508 KB |
| FSPickler | 71,786.6 ns | 1,373.89 ns | 1,349.35 ns | 13.6719 | 2.0752 | - | 84 KB |
| Apex | 1,088.8 ns | 20.59 ns | 22.03 ns | 2.6093 | 0.0725 | - | 16 KB |
看起来 Apex 与我所做的非常接近,但它可能更灵活且更优化,因此切换到它可能是有意义的,除非我拥有的可以更优化。
还得看看@JL0PD的精彩评论如何提高速度
出于兴趣,我在你的问题末尾使用了 lambda 并测试了三个类似的实现,并 运行 它在 Benchmark.Net。
Reference
- 如您所见Mutable Struct
- 因为我可能已经用可变结构Record
- 使用普通的旧哑记录
亲自查看结果。普通的旧哑记录是最快的(尽管只比我的尝试快一点,比你的例子快 10 倍)。先写傻代码。对其进行基准测试。那就努力改进吧。
#nowarn "9"
open System
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open Microsoft.FSharp.NativeInterop
type ShortEventDataRec =
{
Timestamp: DateTime
Event: byte[]
Length: int
}
[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
{
[<FieldOffset(00)>] Timestamp: DateTime
[<FieldOffset(08)>] Event: byte
[<FieldOffset(1032)>] Length: int
}
[<StructLayout(LayoutKind.Explicit)>]
type MutableShortEventData =
struct
[<FieldOffset(00)>] val mutable Timestamp: DateTime
[<FieldOffset(08)>] val mutable Event: byte
[<FieldOffset(1032)>] val mutable Length: int
end
[<MemoryDiagnoser>]
type Benchmarks () =
let event =
Array.init 1024 (fun i -> byte (i % 256))
let time = DateTime.Now
let sizeStruct = sizeof<ShortEventData>
[<Benchmark>]
member __.Reference() =
let bytes = event
let serializableEvent =
{
ShortEventData.Timestamp = time
Event = byte 0
Length = bytes.Length
}
use ptr = fixed [|serializableEvent|]
let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, sizeStruct)
for i = 0 to bytes.Length - 1 do
nativeSpan.[8 + i] <- bytes.[i]
MemoryMarshal.Cast<byte, ShortEventData>(nativeSpan).[0]
[<Benchmark>]
member __.MutableStruct() =
let bytes = event
let targetBytes = GC.AllocateUninitializedArray(sizeStruct)
let targetSpan = Span(targetBytes)
let targetStruct = MemoryMarshal.Cast<_, MutableShortEventData>(targetSpan)
targetStruct.[0].Timestamp <- time
let targetEvent = bytes.CopyTo(targetSpan.Slice(8, 1024))
targetStruct.[0].Length <- event.Length
targetStruct.[0]
[<Benchmark>]
member __.Record() =
let bytes = event
let serializableEvent =
{
ShortEventDataRec.Timestamp = time
Event =
let eventBytes = GC.AllocateUninitializedArray(bytes.Length)
System.Array.Copy(bytes, eventBytes, bytes.Length)
eventBytes
Length = bytes.Length
}
serializableEvent
[<EntryPoint>]
let main _ =
let _ = BenchmarkRunner.Run<Benchmarks>()
0
Method | Mean | Error | StdDev | Gen 0 | Gen 1 | Allocated |
---|---|---|---|---|---|---|
Reference | 526.88 ns | 6.318 ns | 5.909 ns | 0.0629 | - | 1 KB |
MutableStruct | 49.50 ns | 0.966 ns | 1.074 ns | 0.0636 | - | 1 KB |
Record | 42.73 ns | 0.672 ns | 0.628 ns | 0.0650 | 0.0002 | 1 KB |