C# 调用英特尔 MKL cblas_dgemm_batch
C# calling Intel MKL cblas_dgemm_batch
我可以从 C# 调用英特尔 MKL cblas_dgem,请参见以下代码:
[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)]
internal static extern void cblas_dgemm(
int Order, int TransA, int TransB, MKL_INT M, MKL_INT N, MKL_INT K,
double alpha, [In] double[,] A, MKL_INT lda, [In] double[,] B, MKL_INT ldb,
double beta, [In, Out] double[,] C, MKL_INT ldc);
和
void cblas_dgemm (const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE transa, const CBLAS_TRANSPOSE transb, const MKL_INT m, const MKL_INT n, const MKL_INT k, const double alpha, const double *a, const MKL_INT lda, const double *b, const MKL_INT ldb, const double beta, double *c, const MKL_INT ldc);
但我无法从 C# 调用 cblas_dgemm_batch,请参见以下代码:
[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)] // not working
internal static extern void cblas_dgemm_batch(
int Layout, [In] int[] transa_array, [In] int[] transb_array, [In] MKL_INT[] m_array, [In] MKL_INT[] n_array, [In] MKL_INT[] k_array,
[In] double[] alpha_array, [In] double[][,] a_array, [In] MKL_INT[] lda_array, [In] double[][,] b_array, [In] MKL_INT[] ldb_array,
[In] double[] beta_array, [In, Out] double[][,] c_array, [In] MKL_INT[] ldc_array, MKL_INT group_count, [In] MKL_INT[] group_size);
和
void cblas_dgemm_batch (const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE* transa_array, const CBLAS_TRANSPOSE* transb_array, const MKL_INT* m_array, const MKL_INT* n_array, const MKL_INT* k_array, const double* alpha_array, const double **a_array, const MKL_INT* lda_array, const double **b_array, const MKL_INT* ldb_array, const double* beta_array, double **c_array, const MKL_INT* ldc_array, const MKL_INT group_count, const MKL_INT* group_size);
我收到以下错误消息:
- System.Runtime.InteropServices.MarshalDirectiveException
- 无法封送 'parameter #8': 没有对嵌套数组的封送支持。
我可以理解问题出在嵌套数组参数上。该参数应该是指向数组的指针数组。但是如何从 C# 中调用 cblas_dgemm_batch?
使用以下 custom marshaler for the jagged arrays 是解决方案:
class JaggedArrayMarshaler : ICustomMarshaler
{
static ICustomMarshaler GetInstance(string cookie)
{
return new JaggedArrayMarshaler();
}
GCHandle[] handles;
GCHandle buffer;
Array[] array;
public void CleanUpManagedData(object ManagedObj)
{
}
public void CleanUpNativeData(IntPtr pNativeData)
{
buffer.Free();
foreach (GCHandle handle in handles) handle.Free();
}
public int GetNativeDataSize()
{
return IntPtr.Size;
}
public IntPtr MarshalManagedToNative(object ManagedObj)
{
array = (Array[])ManagedObj;
handles = new GCHandle[array.Length];
for (int i = 0; i < array.Length; i++)
handles[i] = GCHandle.Alloc(array[i], GCHandleType.Pinned);
IntPtr[] pointers = new IntPtr[handles.Length];
for (int i = 0; i < handles.Length; i++)
pointers[i] = handles[i].AddrOfPinnedObject();
buffer = GCHandle.Alloc(pointers, GCHandleType.Pinned);
return buffer.AddrOfPinnedObject();
}
public object MarshalNativeToManaged(IntPtr pNativeData)
{
return array;
}
}
并使用上面的编组器:
[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)]
internal static extern void cblas_dgemm_batch(
int Layout, [In] int[] transa_array, [In] int[] transb_array, [In] MKL_INT[] m_array, [In] MKL_INT[] n_array, [In] MKL_INT[] k_array,
[In] double[] alpha_array,
[MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In] double[][,] a_array, [In] MKL_INT[] lda_array,
[MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In] double[][,] b_array, [In] MKL_INT[] ldb_array,
[In] double[] beta_array,
[MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In, Out] double[][,] c_array,
[In] MKL_INT[] ldc_array, MKL_INT group_count, [In] MKL_INT[] group_size);
我正在使用以下代码对其进行测试:
public static double[][,] Dot(double[][,] a, double[][,] b)
{
int n0 = a.Length;
if (b.Length != n0) throw new System.Exception("Group size must be the same");
int Order = 101; // row-major arrays
int[] TransA = new int[n0];
int[] TransB = new int[n0];
MKL_INT[] M = new MKL_INT[n0];
MKL_INT[] N = new MKL_INT[n0];
MKL_INT[] K = new MKL_INT[n0];
double[] alpha = new double[n0];
double[] beta = new double[n0];
double[][,] c = new double[n0][,];
MKL_INT GroupCount = n0;
MKL_INT[] GroupSize = new MKL_INT[n0];
for (int i0 = 0; i0 < n0; i0++)
{
int n1 = a[i0].GetLength(0);
int n2 = a[i0].GetLength(1);
int n3 = b[i0].GetLength(0);
int n4 = b[i0].GetLength(1);
if (n2 != n3) throw new System.Exception("Inner matrix dimensions must agree");
TransA[i0] = 111; // trans='N'
TransB[i0] = 111; // trans='N'
M[i0] = n1; N[i0] = n4; K[i0] = n2;
alpha[i0] = 1; beta[i0] = 0;
c[i0] = new double[n1, n4];
GroupSize[i0] = 1;
}
MKL_INT[] lda = K;
MKL_INT[] ldb = N;
MKL_INT[] ldc = N;
_mkl.cblas_dgemm_batch(Order, TransA, TransB, M, N, K, alpha, a, lda, b, ldb, beta, c, ldc, GroupCount, GroupSize);
return c;
}
和
double[,] A0 = new double[,] { { 1, 2 }, { 3, 4 } };
double[,] A1 = new double[,] { { 5, 6 }, { 7, 8 } };
double[,] B0 = new double[,] { { 9, 10 }, { 11, 12 } };
double[,] B1 = new double[,] { { 13, 14 }, { 15, 16 } };
double[][,] A = new double[][,] { A0, A1 };
double[][,] B = new double[][,] { B0, B1 };
double[][,] C = MKL.Dot(A, B);
当我 运行 代码有效时。我可以在调试器中看到在调用 cblas_dgemm_batch 之前方法 MarshalManagedToNative
被调用了 3 次(如预期的那样)。
我可以从 C# 调用英特尔 MKL cblas_dgem,请参见以下代码:
[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)]
internal static extern void cblas_dgemm(
int Order, int TransA, int TransB, MKL_INT M, MKL_INT N, MKL_INT K,
double alpha, [In] double[,] A, MKL_INT lda, [In] double[,] B, MKL_INT ldb,
double beta, [In, Out] double[,] C, MKL_INT ldc);
和
void cblas_dgemm (const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE transa, const CBLAS_TRANSPOSE transb, const MKL_INT m, const MKL_INT n, const MKL_INT k, const double alpha, const double *a, const MKL_INT lda, const double *b, const MKL_INT ldb, const double beta, double *c, const MKL_INT ldc);
但我无法从 C# 调用 cblas_dgemm_batch,请参见以下代码:
[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)] // not working
internal static extern void cblas_dgemm_batch(
int Layout, [In] int[] transa_array, [In] int[] transb_array, [In] MKL_INT[] m_array, [In] MKL_INT[] n_array, [In] MKL_INT[] k_array,
[In] double[] alpha_array, [In] double[][,] a_array, [In] MKL_INT[] lda_array, [In] double[][,] b_array, [In] MKL_INT[] ldb_array,
[In] double[] beta_array, [In, Out] double[][,] c_array, [In] MKL_INT[] ldc_array, MKL_INT group_count, [In] MKL_INT[] group_size);
和
void cblas_dgemm_batch (const CBLAS_LAYOUT Layout, const CBLAS_TRANSPOSE* transa_array, const CBLAS_TRANSPOSE* transb_array, const MKL_INT* m_array, const MKL_INT* n_array, const MKL_INT* k_array, const double* alpha_array, const double **a_array, const MKL_INT* lda_array, const double **b_array, const MKL_INT* ldb_array, const double* beta_array, double **c_array, const MKL_INT* ldc_array, const MKL_INT group_count, const MKL_INT* group_size);
我收到以下错误消息:
- System.Runtime.InteropServices.MarshalDirectiveException
- 无法封送 'parameter #8': 没有对嵌套数组的封送支持。
我可以理解问题出在嵌套数组参数上。该参数应该是指向数组的指针数组。但是如何从 C# 中调用 cblas_dgemm_batch?
使用以下 custom marshaler for the jagged arrays 是解决方案:
class JaggedArrayMarshaler : ICustomMarshaler
{
static ICustomMarshaler GetInstance(string cookie)
{
return new JaggedArrayMarshaler();
}
GCHandle[] handles;
GCHandle buffer;
Array[] array;
public void CleanUpManagedData(object ManagedObj)
{
}
public void CleanUpNativeData(IntPtr pNativeData)
{
buffer.Free();
foreach (GCHandle handle in handles) handle.Free();
}
public int GetNativeDataSize()
{
return IntPtr.Size;
}
public IntPtr MarshalManagedToNative(object ManagedObj)
{
array = (Array[])ManagedObj;
handles = new GCHandle[array.Length];
for (int i = 0; i < array.Length; i++)
handles[i] = GCHandle.Alloc(array[i], GCHandleType.Pinned);
IntPtr[] pointers = new IntPtr[handles.Length];
for (int i = 0; i < handles.Length; i++)
pointers[i] = handles[i].AddrOfPinnedObject();
buffer = GCHandle.Alloc(pointers, GCHandleType.Pinned);
return buffer.AddrOfPinnedObject();
}
public object MarshalNativeToManaged(IntPtr pNativeData)
{
return array;
}
}
并使用上面的编组器:
[DllImport("custom_mkl", CallingConvention = CallingConvention.Cdecl, ExactSpelling = true, SetLastError = false)]
internal static extern void cblas_dgemm_batch(
int Layout, [In] int[] transa_array, [In] int[] transb_array, [In] MKL_INT[] m_array, [In] MKL_INT[] n_array, [In] MKL_INT[] k_array,
[In] double[] alpha_array,
[MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In] double[][,] a_array, [In] MKL_INT[] lda_array,
[MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In] double[][,] b_array, [In] MKL_INT[] ldb_array,
[In] double[] beta_array,
[MarshalAs(UnmanagedType.CustomMarshaler, MarshalTypeRef = typeof(JaggedArrayMarshaler))][In, Out] double[][,] c_array,
[In] MKL_INT[] ldc_array, MKL_INT group_count, [In] MKL_INT[] group_size);
我正在使用以下代码对其进行测试:
public static double[][,] Dot(double[][,] a, double[][,] b)
{
int n0 = a.Length;
if (b.Length != n0) throw new System.Exception("Group size must be the same");
int Order = 101; // row-major arrays
int[] TransA = new int[n0];
int[] TransB = new int[n0];
MKL_INT[] M = new MKL_INT[n0];
MKL_INT[] N = new MKL_INT[n0];
MKL_INT[] K = new MKL_INT[n0];
double[] alpha = new double[n0];
double[] beta = new double[n0];
double[][,] c = new double[n0][,];
MKL_INT GroupCount = n0;
MKL_INT[] GroupSize = new MKL_INT[n0];
for (int i0 = 0; i0 < n0; i0++)
{
int n1 = a[i0].GetLength(0);
int n2 = a[i0].GetLength(1);
int n3 = b[i0].GetLength(0);
int n4 = b[i0].GetLength(1);
if (n2 != n3) throw new System.Exception("Inner matrix dimensions must agree");
TransA[i0] = 111; // trans='N'
TransB[i0] = 111; // trans='N'
M[i0] = n1; N[i0] = n4; K[i0] = n2;
alpha[i0] = 1; beta[i0] = 0;
c[i0] = new double[n1, n4];
GroupSize[i0] = 1;
}
MKL_INT[] lda = K;
MKL_INT[] ldb = N;
MKL_INT[] ldc = N;
_mkl.cblas_dgemm_batch(Order, TransA, TransB, M, N, K, alpha, a, lda, b, ldb, beta, c, ldc, GroupCount, GroupSize);
return c;
}
和
double[,] A0 = new double[,] { { 1, 2 }, { 3, 4 } };
double[,] A1 = new double[,] { { 5, 6 }, { 7, 8 } };
double[,] B0 = new double[,] { { 9, 10 }, { 11, 12 } };
double[,] B1 = new double[,] { { 13, 14 }, { 15, 16 } };
double[][,] A = new double[][,] { A0, A1 };
double[][,] B = new double[][,] { B0, B1 };
double[][,] C = MKL.Dot(A, B);
当我 运行 代码有效时。我可以在调试器中看到在调用 cblas_dgemm_batch 之前方法 MarshalManagedToNative
被调用了 3 次(如预期的那样)。