我可以获得 F# 引用的标识吗?

Can I get an identity of an F# quotation?

F# 引用是一个很棒的功能,它允许我们将 F# 表达式视为普通的 F# 值。在我的上下文中,我使用 F# 引用对 Gpu 内核进行编码,并将其编译成 Gpu 位码模块。

有一个问题。我不想每次都编译Gpu内核,我想缓存编译好的Gpu bitcode模块。因此,我需要一个键,或者来自 F# 引用值的标识。我想要一个像这样的缓存系统:

let compile : Expr -> GpuModule

let cache = ConcurrentDictionary<Key, GpuModule>()

let jitCompile (expr:Expr) =
    let key = getQuotationKey(expr)
    cache.GetOrAdd(key, fun key -> compile expr)

有一种解决方案,使用引用表达式实例作为键。但是看看这段代码:

open Microsoft.FSharp.Quotations

let foo (expr:Expr) =
    printfn "%O" expr

[<EntryPoint>]
let main argv = 

    for i = 1 to 10 do
        foo <@ fun x y -> x + y @>

    0

如果我检查编译后的 IL 代码,我得到了这些 IL 指令:

IL_0000: nop
IL_0001: ldc.i4.1
IL_0002: stloc.0
IL_0003: br IL_00a2
// loop start (head: IL_00a2)
    IL_0008: ldtoken '<StartupCode$ConsoleApplication2>.$Program'
    IL_000d: call class [mscorlib]System.Type [mscorlib]System.Type::GetTypeFromHandle(valuetype [mscorlib]System.RuntimeTypeHandle)
    IL_0012: ldc.i4.5
    IL_0013: newarr [mscorlib]System.Type
    IL_0018: dup
    IL_0019: ldc.i4.0
    IL_001a: ldtoken [mscorlib]System.Int32
    IL_001f: call class [mscorlib]System.Type [mscorlib]System.Type::GetTypeFromHandle(valuetype [mscorlib]System.RuntimeTypeHandle)
    IL_0024: stelem.any [mscorlib]System.Type
    IL_0029: dup
    IL_002a: ldc.i4.1
    IL_002b: ldtoken [FSharp.Core]Microsoft.FSharp.Core.Operators
    IL_0030: call class [mscorlib]System.Type [mscorlib]System.Type::GetTypeFromHandle(valuetype [mscorlib]System.RuntimeTypeHandle)
    IL_0035: stelem.any [mscorlib]System.Type
    IL_003a: dup
    IL_003b: ldc.i4.2
    IL_003c: ldtoken [mscorlib]System.Tuple`2
    IL_0041: call class [mscorlib]System.Type [mscorlib]System.Type::GetTypeFromHandle(valuetype [mscorlib]System.RuntimeTypeHandle)
    IL_0046: stelem.any [mscorlib]System.Type
    IL_004b: dup
    IL_004c: ldc.i4.3
    IL_004d: ldtoken [mscorlib]System.String
    IL_0052: call class [mscorlib]System.Type [mscorlib]System.Type::GetTypeFromHandle(valuetype [mscorlib]System.RuntimeTypeHandle)
    IL_0057: stelem.any [mscorlib]System.Type
    IL_005c: dup
    IL_005d: ldc.i4.4
    IL_005e: ldtoken [mscorlib]System.Tuple`5
    IL_0063: call class [mscorlib]System.Type [mscorlib]System.Type::GetTypeFromHandle(valuetype [mscorlib]System.RuntimeTypeHandle)
    IL_0068: stelem.any [mscorlib]System.Type
    IL_006d: ldc.i4.0
    IL_006e: newarr [mscorlib]System.Type
    IL_0073: ldc.i4.0
    IL_0074: newarr [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr
    IL_0079: ldc.i4 372
    IL_007e: newarr [mscorlib]System.Byte
    IL_0083: dup
    IL_0084: ldtoken field valuetype '<PrivateImplementationDetails$ConsoleApplication2>'/T1805_372Bytes@ Program::field1806@
    IL_0089: call void [mscorlib]System.Runtime.CompilerServices.RuntimeHelpers::InitializeArray(class [mscorlib]System.Array, valuetype [mscorlib]System.RuntimeFieldHandle)
    IL_008e: call class [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr::Deserialize40(class [mscorlib]System.Type, class [mscorlib]System.Type[], class [mscorlib]System.Type[], class [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr[], uint8[])
    IL_0093: call class [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr`1<!!0> [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr::Cast<class [FSharp.Core]Microsoft.FSharp.Core.FSharpFunc`2<int32, class [FSharp.Core]Microsoft.FSharp.Core.FSharpFunc`2<int32, int32>>>(class [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr)
    IL_0098: call void Program::foo(class [FSharp.Core]Microsoft.FSharp.Quotations.FSharpExpr)
    IL_009d: nop
    IL_009e: ldloc.0
    IL_009f: ldc.i4.1
    IL_00a0: add
    IL_00a1: stloc.0

    IL_00a2: ldloc.0
    IL_00a3: ldc.i4.s 11
    IL_00a5: blt IL_0008
// end loop

IL_00aa: ldc.i4.0
IL_00ab: ret

这是一个大代码,但基本上它在循环中做了这些事情:

所以根据这个观察,我的问题是:

  1. 虽然报价存储在一个静态字段中,但每次我们写<@ ... @>,它们都会创建一个新的Expr实例,即使静态字段相同。所以我不能使用 Expr 实例作为键,最好获取静态字段令牌并将其用作键。但我不知道如何获取该信息;
  2. 我们看到有很多 IL 指令只是重新创建一个引用实例,即使它们是相同的引用。这可能会有一些性能问题,F#编译器可以在这里优化吗?

此致, 翔.

@kvb 给出了精彩的回答。看起来我们只需要修复引号中的 Var 比较(当 var 有一个对应项并且具有相同的类型时)。按照他的回答我做了以下测试并且有效:

let comparer =
    let rec compareQuots vs = function
        | ShapeLambda(v,e), ShapeLambda(v',e') ->
            compareQuots (vs |> Map.add v v') (e,e')
        | ShapeCombination(o,es), ShapeCombination(o',es') ->
            o = o' && (es.Length = es'.Length) && List.forall2 (fun q1 q2 -> compareQuots vs (q1, q2)) es es'
        | ShapeVar v, ShapeVar v' when Map.tryFind v vs = Some v' && v.Type = v'.Type ->
            true
        | _ -> false

    let rec hashQuot n vs = function
        | ShapeLambda(v,e) ->
            hashQuot (n+1) (vs |> Map.add v n) e
        | ShapeCombination(o,es) ->
            es |> List.fold (fun h e -> 31 * h + hashQuot n vs e) (o.GetHashCode())
        | ExprShape.ShapeVar v ->
            Map.find v vs

    { new System.Collections.Generic.IEqualityComparer<_> with 
        member __.Equals(q1,q2) = compareQuots Map.empty (q1,q2)
        member __.GetHashCode q = hashQuot 0 Map.empty q }

type Module = int

let mutable counter = 0

let compile (expr:Expr) =
    counter <- counter + 1
    printfn "Compiling #.%d module..." counter
    counter

let cache = ConcurrentDictionary<Expr, Module>(comparer)

let jitCompile (expr:Expr) =
    cache.GetOrAdd(expr, compile)

[<Test>]
let testJITCompile() =
    Assert.AreEqual(1, jitCompile <@ fun x y -> x + y @>)
    Assert.AreEqual(1, jitCompile <@ fun x y -> x + y @>)
    Assert.AreEqual(1, jitCompile <@ fun a b -> a + b @>)
    Assert.AreEqual(2, jitCompile <@ fun a b -> a + b + 1 @>)

    let combineExpr (expr:Expr<int -> int -> int>) =
        <@ fun (a:int) (b:int) -> ((%expr) a b) + 1 @> 

    // although (combineExpr <@ (+) @>) = <@ fun a b -> a + b + 1 @>
    // but they are treated as different expr.
    Assert.AreEqual(3, jitCompile (combineExpr <@ (+) @>))
    Assert.AreEqual(3, jitCompile (combineExpr <@ (+) @>))
    Assert.AreEqual(4, jitCompile (combineExpr <@ (-) @>))

每次通过循环创建一个新对象并不一定意味着该对象不能用作键,只要对象每次比较相等即可。

您遇到的真正问题是,"the same" 引号对您的含义与对 F# 编译器的含义不同,尤其是涉及到引号中的变量时。例如,您可以验证

<@ [1 + 1] @> = <@ [1 + 1] @>

计算结果为 true,并且

<@ fun x -> x @> = <@ fun y -> y @>

计算为 false(这很有道理,因为 lambda 等同于重命名,但不完全相同)。也许更令人惊讶的是,您会看到

<@ fun x -> x @> = <@ fun x -> x @>

的计算结果也为 false。这是因为每个引用中的变量都被视为不同的变量,而这些变量恰好共享相同的名称。您会在循环中看到相同的行为 - 每次迭代的变量 x 被认为是不同的。

然而,一切并没有丢失;您需要做的就是使用自定义 IEqualityComparer<Quotations.Expr>。我认为这样的事情应该可以识别任何相同的模变量重命名的引用:

let comparer = 
    let rec compareQuots vs = function
    | Quotations.ExprShape.ShapeLambda(v,e), Quotations.ExprShape.ShapeLambda(v',e') ->
        compareQuots (vs |> Map.add v v') (e,e')
    | Quotations.ExprShape.ShapeCombination(o,es), Quotations.ExprShape.ShapeCombination(o',es') ->
        o = o' && (es.Length = es'.Length) && List.forall2 (fun q1 q2 -> compareQuots vs (q1, q2)) es es'
    | Quotations.ExprShape.ShapeVar v, Quotations.ExprShape.ShapeVar v' when Map.tryFind v vs = Some v' && v.Type = v'.Type -> 
        true
    | _ -> false

    let rec hashQuot n vs = function
    | Quotations.ExprShape.ShapeLambda(v,e) -> 
        hashQuot (n+1) (vs |> Map.add v n) e
    | Quotations.ExprShape.ShapeCombination(o,es) -> 
        es |> List.fold (fun h e -> 31 * h + hashQuot n vs e) (o.GetHashCode())
    | Quotations.ExprShape.ShapeVar v -> 
        Map.find v vs

    { new System.Collections.Generic.IEqualityComparer<_> with 
        member __.Equals(q1,q2) = compareQuots Map.empty (q1,q2)
        member __.GetHashCode q = hashQuot 0 Map.empty q }

let cache = ConcurrentDictionary<Expr, Module>(comparer)