在 F# 中创建一个可序列化的固定大小的字符数组

Creating a serializable fixed size char array in F#

我正在处理需要加载/保存到磁盘的大量数据,其中速度是关键。

我写了这段代码:

// load from cache
let loadFromCacheAsync<'a when 'a: (new: unit -> 'a) and 'a: struct and 'a :> ValueType> filespec =
    async {
        let! bytes = File.ReadAllBytesAsync(filespec) |> Async.AwaitTask
        let result = 
            use pBytes = fixed bytes
            let sourceSpan = Span<byte>(NativePtr.toVoidPtr pBytes, bytes.Length) 
            MemoryMarshal.Cast<byte, 'a>(sourceSpan).ToArray()
        return result
    }    

// save to cache
let saveToCacheAsync<'a when 'a: unmanaged> filespec (data: 'a array) =
    Directory.CreateDirectory cacheFolder |> ignore
    let sizeStruct = sizeof<'a>
    use ptr = fixed data
    let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, data.Length * sizeStruct).ToArray()
    File.WriteAllBytesAsync(filespec, nativeSpan) |> Async.AwaitTask

并且它要求数据结构是非托管的。 例如,我有:

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortTradeData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Price:     double
        [<FieldOffset(16)>]    Quantity:  double
        [<FieldOffset(24)>]    Direction: int
    }

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortCandleData =
    {
        [<FieldOffset(00)>] Timestamp:  DateTime
        [<FieldOffset(08)>] Open:       double
        [<FieldOffset(16)>] High:       double
        [<FieldOffset(24)>] Low:        double
        [<FieldOffset(32)>] Close:      double
    }

等...

我现在面临一个需要存储字符串的情况。我知道字符串的最大长度,但我正在尝试找出如何使用非托管类型执行此操作。

我想知道我是否可以做这样的事情(256 字节):

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Text:      char
        [<FieldOffset(264)>]   Dummy:     int
    }

获取指向文本的指针,将其转换为字符数组,在其中读取/写入我想要的内容,然后根据需要保存/加载是否安全?

还是我在某个时候自找麻烦?

作为附带问题,任何加速 loadFromCache 函数的方法也非常受欢迎:)

编辑:

我暂时想到了这个。它将复杂事件对象列表转换为可序列化的对象。 该行:

let bytes = Pipeline.serializeBinary event

将原始事件数据转换为字节数组。

然后我创建将保存二进制流的结构,写入长度,创建一个表示结构的跨度并复制字节。然后我将跨度编组为结构类型 (ShortEventData)。

我不能使用 Marshal 复制,因为我不能放置目标偏移量,所以我必须使用循环复制字节。但是必须有更好的方法。

而且我认为,对于其中的所有其他内容,也必须有更好的方法 :D 任何建议都会有所帮助,我只是不太喜欢这个解决方案。

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Event:     byte
        [<FieldOffset(1032)>]  Length:    int
    }

events
|> List.map (fun event ->
        let bytes = Pipeline.serializeBinary event
        let serializableEvent : DataCache.ShortEventData =
            {
                Timestamp = event.GetTimestamp()
                Event     = byte 0
                Length    = bytes.Length
            }
        use ptr = fixed [|serializableEvent|]
        let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, serializableEvent.Length * sizeStruct)
        for i = 0 to bytes.Length - 1 do
            nativeSpan[8 + i] <- bytes[i]
        MemoryMarshal.Cast<byte, DataCache.ShortEventData>(nativeSpan).ToArray()[0]
    )

编辑:

为不同的序列化模型添加基准:

open System
open System.IO
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open MBrace.FsPickler
open Microsoft.FSharp.NativeInterop
open Newtonsoft.Json

#nowarn "9"



[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type TestStruct =
    {
        [<FieldOffset(00)>] SomeValue:       int
        [<FieldOffset(04)>] AnotherValue:    int
        [<FieldOffset(08)>] YetAnotherValue: double
    }
    
    static member MakeOne(r: Random) =
        {
            SomeValue       = r.Next()
            AnotherValue    = r.Next()
            YetAnotherValue = r.NextDouble()
        }
        
        
        
[<MemoryDiagnoser>]
type Benchmarks () =        
    let testData =
        let random = Random(1000)
        Array.init 1000 (fun _ -> TestStruct.MakeOne(random))

    
    // inits, outside of the benchmarks
    // FSPickler
    let FSPicklerSerializer = FsPickler.CreateBinarySerializer()

    // APEX
    let ApexSettings = Apex.Serialization.Settings().MarkSerializable(typeof<TestStruct>)
    let ApexBinarySerializer = Apex.Serialization.Binary.Create(ApexSettings)
        

    [<Benchmark>]
    member _.Thomas() =  // thomas' save to disk
        let sizeStruct = sizeof<TestStruct>
        use ptr = fixed testData
        Span<byte>(NativePtr.toVoidPtr ptr, testData.Length * sizeStruct).ToArray()

    [<Benchmark>]
    member _.Newtonsoft() =
        JsonConvert.SerializeObject(testData)

    [<Benchmark>]
    member _.FSPickler() =
        FSPicklerSerializer.Pickle testData
        
    [<Benchmark>]
    member _.Apex() =
        let outputStream = new MemoryStream()
        ApexBinarySerializer.Write(testData, outputStream)        
        
    
[<EntryPoint>]
let main _ =

    let _ = BenchmarkRunner.Run<Benchmarks>()
    0
|     Method |         Mean |        Error |       StdDev |    Gen 0 |   Gen 1 |   Gen 2 | Allocated |
|----------- |-------------:|-------------:|-------------:|---------:|--------:|--------:|----------:|
|     Thomas |     878.4 ns |     11.74 ns |     10.41 ns |   2.5444 |  0.1411 |       - |     16 KB |
| Newtonsoft | 880,641.2 ns | 16,346.50 ns | 15,290.52 ns | 103.5156 | 79.1016 | 48.8281 |    508 KB |
|  FSPickler |  71,786.6 ns |  1,373.89 ns |  1,349.35 ns |  13.6719 |  2.0752 |       - |     84 KB |
|       Apex |   1,088.8 ns |     20.59 ns |     22.03 ns |   2.6093 |  0.0725 |       - |     16 KB |

看起来 Apex 与我所做的非常接近,但它可能更灵活且更优化,因此切换到它可能是有意义的,除非我拥有的可以更优化。

还得看看@JL0PD的精彩评论如何提高速度

出于兴趣,我在你的问题末尾使用了 lambda 并测试了三个类似的实现,并 运行 它在 Benchmark.Net。

  • Reference - 如您所见
  • Mutable Struct - 因为我可能已经用可变结构
  • Record - 使用普通的旧哑记录

亲自查看结果。普通的旧哑记录是最快的(尽管只比我的尝试快一点,比你的例子快 10 倍)。先写傻代码。对其进行基准测试。那就努力改进吧。

#nowarn "9"

open System
open System.Runtime.InteropServices
open BenchmarkDotNet.Attributes
open BenchmarkDotNet.Running
open Microsoft.FSharp.NativeInterop

type ShortEventDataRec =
    {
        Timestamp: DateTime
        Event:     byte[]
        Length:    int
    }

[<Struct>]
[<StructLayout(LayoutKind.Explicit)>]
type ShortEventData =
    {
        [<FieldOffset(00)>]    Timestamp: DateTime
        [<FieldOffset(08)>]    Event:     byte
        [<FieldOffset(1032)>]  Length:    int
    }

[<StructLayout(LayoutKind.Explicit)>]
type MutableShortEventData =
    struct
        [<FieldOffset(00)>]    val mutable Timestamp: DateTime
        [<FieldOffset(08)>]    val mutable Event:     byte
        [<FieldOffset(1032)>]  val mutable Length:    int
    end 

[<MemoryDiagnoser>]
type Benchmarks () =

    let event  = 
        Array.init 1024 (fun i -> byte (i % 256))
    let time = DateTime.Now
    let sizeStruct = sizeof<ShortEventData>


    [<Benchmark>]
    member __.Reference() =
        let bytes = event
        let serializableEvent =
            {
                ShortEventData.Timestamp = time
                Event     = byte 0
                Length    = bytes.Length
            }
        use ptr = fixed [|serializableEvent|]
        let nativeSpan = Span<byte>(NativePtr.toVoidPtr ptr, sizeStruct)
        for i = 0 to bytes.Length - 1 do
            nativeSpan.[8 + i] <- bytes.[i]

        MemoryMarshal.Cast<byte, ShortEventData>(nativeSpan).[0]


    [<Benchmark>]
    member __.MutableStruct() =
        let bytes = event

        let targetBytes = GC.AllocateUninitializedArray(sizeStruct)
        let targetSpan = Span(targetBytes)
        let targetStruct = MemoryMarshal.Cast<_, MutableShortEventData>(targetSpan)

        targetStruct.[0].Timestamp <- time
        let targetEvent = bytes.CopyTo(targetSpan.Slice(8, 1024))
        targetStruct.[0].Length <- event.Length

        targetStruct.[0]


    [<Benchmark>]
    member __.Record() =
        let bytes = event
        let serializableEvent =
            {
                ShortEventDataRec.Timestamp = time
                Event     = 
                    let eventBytes = GC.AllocateUninitializedArray(bytes.Length)
                    System.Array.Copy(bytes, eventBytes, bytes.Length)
                    eventBytes
                Length    = bytes.Length
            }
        serializableEvent
    
[<EntryPoint>]
let main _ =

    let _ = BenchmarkRunner.Run<Benchmarks>()
    0
Method Mean Error StdDev Gen 0 Gen 1 Allocated
Reference 526.88 ns 6.318 ns 5.909 ns 0.0629 - 1 KB
MutableStruct 49.50 ns 0.966 ns 1.074 ns 0.0636 - 1 KB
Record 42.73 ns 0.672 ns 0.628 ns 0.0650 0.0002 1 KB