我如何从 C 中的共享库 return 到 python 作为 np.array 的多维数组?

How can I return a multidimensional array from a shared library in C to python as an np.array?

我目前正在想办法从 C 语言的共享库 return 多维数组(双精度数组)到 python 并使其成为 np.array。我目前的做法是这样的:

共享库 ("utils.c")

#include <stdio.h>

void somefunction(double *inputMatrix, int d1_inputMatrix, int d2_inputMatrix, int h_inputMatrix, int w_inputMatrix, double *kernel, int d1_kernel, int d2_kernel, int h_kernel, int w_kernel, int stride) {
    
    double result[d1_kernel][d2_kernel][d2_inputMatrix][h_inputMatrix-h_kernel+1][w_inputMatrix-w_kernel+1];
    // ---some operation--

    return result;

}

现在,我用 cc -fPIC -shared -o utils.so utils.c

编译 utils.c

python ("somefile.py")

from ctypes import *
import numpy as np

so_file = "/home/benni/Coding/5.PK/Code/utils.so"
utils = CDLL(so_file)

INT = c_int64
ND_POINTER_4 = np.ctypeslib.ndpointer(dtype=np.float64, ndim=4, flags="C")

utils.convolve.argtypes = [ND_POINTER_4, INT, INT, INT, INT, ND_POINTER_4, INT, INT, INT, INT, INT]
a = ... #some np.array  with 4 dimensions
b = ... #some np.array  with 4 dimensions

result = utils.somefunction(a, a.shape[0], a.shape[1], a.shape[2], a.shape[3], b, b.shape[0], b.shape[1], b.shape[2], b.shape[3], 1)

现在,如何将 utils.somefunction() 的结果转换为 np.array? 我知道,为了解决我的问题,我必须指定 utils.convolve.restype。但是,如果我希望 return 类型成为 np.array,我必须为 restype 添加什么?

首先,在堆栈上分配的作用域 C 数组(如 somefunction)绝不能由函数返回。堆栈的 space 将被其他函数重用,例如 CPython 的函数。 返回的数组必须分配在堆上

此外,使用 ctypes 编写处理 Numpy 数组的函数非常麻烦。正如您所发现的,您需要在参数中传递完整的形状。但问题是你还需要为每个维度 和函数参数中的每个输入数组传递 步幅,因为它们在内存中可能不连续(例如 np.transpose改变这个)。话虽如此,为了性能和理智,我们可以假设输入数组是连续的。这可以通过 np.ascontiguousarray 强制执行。可以使用 numpy.ctypeslib.as_ctypes 提取视图 ab 的指针,但希望 ctype 可以自动执行此操作。此外,返回的数组当前是 C 指针而不是 Numpy 数组。因此,您需要创建一个具有正确形状和步幅的 Numpy 数组 numpy.ctypeslib.as_array。由于调用者不知道生成的形状,因此您需要使用多个整数指针(每个维度一个)从被调用函数中检索它。最后,这导致 pretty-big 丑陋 highly-bug-prone code (如果出现任何问题,它通常会悄无声息地崩溃,更不用说如果您不注意的话可能会发生内存泄漏)。您可以使用 Cython 为您完成大部分工作。

假设您不想或不能使用 Cython,这里是一个带有 ctypes 的示例代码:

import ctypes
import numpy as np

# Example of input
a = np.empty((16, 16, 12, 12), dtype=np.float64)
b = np.empty((8, 8, 4, 4), dtype=np.float64)

# Better than CDLL regarding the Numpy documentation.
# Here the DLL/SO file is found in:
# Windows:  ".\utils.dll"
# Linux:    "./libutils.so"
utils = np.ctypeslib.load_library('utils', '.')

INT = ctypes.c_int64
PINT = ctypes.POINTER(ctypes.c_int64)
PDOUBLE = ctypes.POINTER(ctypes.c_double)
ND_POINTER_4 = np.ctypeslib.ndpointer(dtype=np.float64, ndim=4, flags="C_CONTIGUOUS")

utils.somefunction.argtypes = [
    ND_POINTER_4, INT, INT, INT, INT, 
    ND_POINTER_4, INT, INT, INT, INT, 
    PINT, PINT, PINT, PINT, PINT
]
utils.somefunction.restype = PDOUBLE

d1_out, d2_out, d3_out, d4_out, d5_out = INT(), INT(), INT(), INT(), INT()
p_d1_out = ctypes.pointer(d1_out)
p_d2_out = ctypes.pointer(d2_out)
p_d3_out = ctypes.pointer(d3_out)
p_d4_out = ctypes.pointer(d4_out)
p_d5_out = ctypes.pointer(d5_out)
out = utils.somefunction(a, a.shape[0], a.shape[1], a.shape[2], a.shape[3],
                         b, b.shape[0], b.shape[1], b.shape[2], b.shape[3],
                         p_d1_out, p_d2_out, p_d3_out, p_d4_out, p_d5_out)
d1_out = d1_out.value
d2_out = d2_out.value
d3_out = d3_out.value
d4_out = d4_out.value
d5_out = d5_out.value
result = np.ctypeslib.as_array(out, shape=(d1_out, d2_out, d3_out, d4_out, d5_out))

# Some operations

# WARNING: 
# You should free the memory of the allocated buffer 
# with `free(out)` when you are done with `result` 
# since Numpy does not free it for you: it just creates 
# a view and does not take the ownership.
# Note that the right libc must be used, otherwise the
# call to free will cause an undefined behaviour
# (eg. crash, error message, nothing)

这是 C 代码(注意 fixed-length 类型):

/* utils.c */

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

double* somefunction(
        double* inputMatrix, int64_t d1_inputMatrix, int64_t d2_inputMatrix, int64_t h_inputMatrix, int64_t w_inputMatrix, 
        double* kernel, int64_t d1_kernel, int64_t d2_kernel, int64_t h_kernel, int64_t w_kernel,
        int64_t* d1_out, int64_t* d2_out, int64_t* d3_out, int64_t* d4_out, int64_t* d5_out
    )
{
    *d1_out = d1_kernel;
    *d2_out = d2_kernel;
    *d3_out = d2_inputMatrix;
    *d4_out = h_inputMatrix - h_kernel + 1;
    *d5_out = w_inputMatrix - w_kernel + 1;

    const size_t size = *d1_out * *d2_out * *d3_out * *d4_out * *d5_out;
    double* result = malloc(size * sizeof(double));

    if(result == NULL)
    {
        fprintf(stderr, "Unable to allocate an array of %d bytes", size * sizeof(double));
        return NULL;
    }

    /* Some operation: fill `result` */

    return result;
}

这里是用 GCC 构建库的命令:

# On Windows
gcc utils.c -shared -o utils.dll

# On Linux
gcc utils.c -fPIC -shared -o libutils.so

更多信息,请阅读: