C# 调用英特尔 MKL cblas_dgemm_batch

C# calling Intel MKL cblas_dgemm_batch

我可以从 C# 调用英特尔 MKL cblas_dgem,请参见以下代码:

[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)]
internal static extern void cblas_dgemm(
    int Order, int TransA, int TransB, MKL_INT M, MKL_INT N, MKL_INT K,
    double alpha, [In] double[,] A, MKL_INT lda, [In] double[,] B, MKL_INT ldb,
    double beta, [In, Out] double[,] C, MKL_INT ldc);

void cblas_dgemm (const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE transa, const CBLAS_TRANSPOSE transb, const MKL_INT m, const MKL_INT n, const MKL_INT k, const double alpha, const double *a, const MKL_INT lda, const double *b, const MKL_INT ldb, const double beta, double *c, const MKL_INT ldc);

但我无法从 C# 调用 cblas_dgemm_batch,请参见以下代码:

[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)] // not working
internal static extern void cblas_dgemm_batch(
    int Layout, [In] int[] transa_array, [In] int[] transb_array, [In] MKL_INT[] m_array, [In] MKL_INT[] n_array, [In] MKL_INT[]  k_array, 
    [In] double[] alpha_array, [In] double[][,] a_array, [In] MKL_INT[] lda_array, [In] double[][,] b_array, [In] MKL_INT[] ldb_array,
    [In] double[] beta_array, [In, Out] double[][,] c_array, [In] MKL_INT[] ldc_array, MKL_INT group_count, [In] MKL_INT[] group_size);

void cblas_dgemm_batch (const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE* transa_array, const CBLAS_TRANSPOSE* transb_array, const MKL_INT* m_array, const MKL_INT* n_array, const MKL_INT* k_array, const double* alpha_array, const double **a_array, const MKL_INT* lda_array, const double **b_array, const MKL_INT* ldb_array, const double* beta_array, double **c_array, const MKL_INT* ldc_array, const MKL_INT group_count, const MKL_INT* group_size);

我收到以下错误消息:

我可以理解问题出在嵌套数组参数上。该参数应该是指向数组的指针数组。但是如何从 C# 中调用 cblas_dgemm_batch

使用以下 custom marshaler for the jagged arrays 是解决方案:

class JaggedArrayMarshaler : ICustomMarshaler
{
    static ICustomMarshaler GetInstance(string cookie)
    {
        return new JaggedArrayMarshaler();
    }
    GCHandle[] handles;
    GCHandle buffer;
    Array[] array;
    public void CleanUpManagedData(object ManagedObj)
    {
    }
    public void CleanUpNativeData(IntPtr pNativeData)
    {
        buffer.Free();
        foreach (GCHandle handle in handles) handle.Free();
    }
    public int GetNativeDataSize()
    {
        return IntPtr.Size;
    }
    public IntPtr MarshalManagedToNative(object ManagedObj)
    {
        array = (Array[])ManagedObj;
        handles = new GCHandle[array.Length];
        for (int i = 0; i < array.Length; i++)
            handles[i] = GCHandle.Alloc(array[i], GCHandleType.Pinned);
        IntPtr[] pointers = new IntPtr[handles.Length];
        for (int i = 0; i < handles.Length; i++)
            pointers[i] = handles[i].AddrOfPinnedObject();
        buffer = GCHandle.Alloc(pointers, GCHandleType.Pinned);
        return buffer.AddrOfPinnedObject();
    }
    public object MarshalNativeToManaged(IntPtr pNativeData)
    {
        return array;
    }
}

并使用上面的编组器:

[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)]
internal static extern void cblas_dgemm_batch(
    int Layout, [In] int[] transa_array, [In] int[] transb_array, [In] MKL_INT[] m_array, [In] MKL_INT[] n_array, [In] MKL_INT[] k_array,
    [In] double[] alpha_array, 
    [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In] double[][,] a_array, [In] MKL_INT[] lda_array, 
    [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In] double[][,] b_array, [In] MKL_INT[] ldb_array,
    [In] double[] beta_array, 
    [MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In, Out] double[][,] c_array, 
    [In] MKL_INT[] ldc_array, MKL_INT group_count, [In] MKL_INT[] group_size);

我正在使用以下代码对其进行测试:

public static double[][,] Dot(double[][,] a, double[][,] b)
{
    int n0 = a.Length;
    if (b.Length != n0) throw new System.Exception("Group size must be the same");
    int Order = 101; // row-major arrays
    int[] TransA = new int[n0];
    int[] TransB = new int[n0];
    MKL_INT[] M = new MKL_INT[n0];
    MKL_INT[] N = new MKL_INT[n0];
    MKL_INT[] K = new MKL_INT[n0];
    double[] alpha = new double[n0];
    double[] beta = new double[n0];
    double[][,] c = new double[n0][,];
    MKL_INT GroupCount = n0;
    MKL_INT[] GroupSize = new MKL_INT[n0];
    for (int i0 = 0; i0 < n0; i0++)
    {
        int n1 = a[i0].GetLength(0);
        int n2 = a[i0].GetLength(1);
        int n3 = b[i0].GetLength(0);
        int n4 = b[i0].GetLength(1);
        if (n2 != n3) throw new System.Exception("Inner matrix dimensions must agree");
        TransA[i0] = 111; // trans='N'
        TransB[i0] = 111; // trans='N'
        M[i0] = n1; N[i0] = n4; K[i0] = n2;
        alpha[i0] = 1; beta[i0] = 0;
        c[i0] = new double[n1, n4];
        GroupSize[i0] = 1;
    }
    MKL_INT[] lda = K;
    MKL_INT[] ldb = N;
    MKL_INT[] ldc = N;
    _mkl.cblas_dgemm_batch(Order, TransA, TransB, M, N, K, alpha, a, lda, b, ldb, beta, c, ldc, GroupCount, GroupSize);
    return c;
}

double[,] A0 = new double[,] { { 1, 2 }, { 3, 4 } };
double[,] A1 = new double[,] { { 5, 6 }, { 7, 8 } };
double[,] B0 = new double[,] { { 9, 10 }, { 11, 12 } };
double[,] B1 = new double[,] { { 13, 14 }, { 15, 16 } };
double[][,] A = new double[][,] { A0, A1 };
double[][,] B = new double[][,] { B0, B1 };
double[][,] C = MKL.Dot(A, B);

当我 运行 代码有效时。我可以在调试器中看到在调用 cblas_dgemm_batch 之前方法 MarshalManagedToNative 被调用了 3 次(如预期的那样)。