用 C 扩展 python，return numpy 数组产生垃圾

Question

我正在包装一个 C 文件，以便可以在 python 中使用它。 C 函数的输出是双精度数组。我希望这是 python 中的一个 numpy 数组。我得到垃圾。下面是生成错误的示例。

首先是C文件（重点关注最后一个函数定义，其他都OK）：

#include <Python.h>
#include <numpy/arrayobject.h>
#include <stdio.h>

static char module_docstring[] =
    "docstring";

static char error_docstring[] =
        "generate the error";

static PyObject *_aux_error(PyObject *self, PyObject *args);

static PyMethodDef module_methods[] = {
        {"error", _aux_error, METH_VARARGS, error_docstring},
        {NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC init_tmp(void) {

    PyObject *m = Py_InitModule3("_tmp", module_methods, module_docstring);
    if (m == NULL)
        return;

    /* Load `numpy` functionality. */
    import_array();
}

static PyObject *_aux_error(PyObject *self ,PyObject *args) {

    double vector[2] = {1.0 , 2.0};

    npy_intp dims[1] = { 2 };

    PyObject *ret  = PyArray_SimpleNewFromData(1, dims, (int)NPY_FLOAT , vector );
    return ret;
}

编译正常（据我了解 - 我使用了一个 python 脚本来编译所有内容）。

在 python 中，我运行以下脚本来测试我的新模块：

try:
    import _tmp
    res = _tmp.error()
    print(res)
except:
    print("fail")

我在屏幕上看到的结果是垃圾。我尝试用 (int)NPY_FLOAT32, (int)NPY_FLOAT64, (int)NPY_DOUBLE 替换 (int)NPY_FLOAT 但我仍然得到垃圾。我正在使用 python2.7.

谢谢！！！

编辑：按照下面的回答，我将最后一个函数更改为：

static PyObject *_aux_error(PyObject *self, PyObject *args) {


    double *vector = calloc(2, sizeof(double));
    vector[0] = 1.0;
    vector[1] = 2.0;


    npy_intp *dims = calloc(1 , sizeof(npy_intp));
    dims[1] = 2;


    PyObject *ret  = PyArray_SimpleNewFromData(1, dims, (int)NPY_FLOAT , &vector );
    return ret;
}

现在 python 显示一个空数组。

Answer 1

尝试改变这个：

static PyObject *_aux_error(PyObject *self) {

对此：

static PyObject *_aux_error(PyObject *self, PyObject *args) {

Python 将传递 args 参数，即使您没有用它定义函数。

您的代码仍然存在根本问题。您已经使用位于堆栈上的数组 vector 创建了一个 numpy 数组。当 _aux_error returns 时，该内存被回收并可能被重新使用。

您可以使用 PyArray_SimpleNew() 创建数组来分配 numpy 数组，然后将 vector 复制到数组的数据中：

static PyObject *_aux_error(PyObject *self, PyObject *args)
{
    double vector[2] = {1.0 , 2.0};
    npy_intp dims[1] = {2};

    PyObject *ret = PyArray_SimpleNew(1, dims, NPY_DOUBLE);
    memcpy(PyArray_DATA(ret), vector, sizeof(vector));
    return ret;
}

注意我把类型改成了NPY_DOUBLE； NPY_FLOAT是32位浮点型。

在评论中，您询问了在 _aux_error 中动态分配内存的问题。下面是可能有用的示例变体。数组的长度仍然硬编码在 dims 中，因此它不是完全通用的，但它可能足以解决评论中的问题。

static PyObject *_aux_error(PyObject *self, PyObject *args)
{
    double *vector;
    npy_intp dims[1] = {5};
    npy_intp k;

    PyObject *ret = PyArray_SimpleNew(1, dims, NPY_DOUBLE);
    vector = (double *) PyArray_DATA(ret);
    /*
     *  NOTE: Treating PyArray_DATA(ret) as if it were a contiguous one-dimensional C
     *  array is safe, because we just created it with PyArray_SimpleNew, so we know
     *  that it is, in fact, a one-dimensional contiguous array.
     */
    for (k = 0; k < dims[0]; ++k) {
        vector[k] = 1.0 + k;
    }
    return ret;
}

Answer 2

这是我的完整解决方案，供您娱乐。复制、粘贴和修改。显然我遇到的问题比上面的问题要复杂一些。我用了一些 Dan Foreman Mackay's online code.

我的代码的目标是 return 一个协方差向量（不管它是什么）。我有一个名为 aux.c 的 C 文件，其中 return 是一个新分配的数组：

#include "aux.h"
#include <math.h>
#include <stdlib.h>
double *covVec(double *X, double *x, int nvecs, int veclen) {


    double r = 1.3;
    double d = 1.0;

    double result;
    double dist;
    int n;

    double *k;
    k = malloc(nvecs * sizeof(double));

    int row;
    for( row = 0 ; row < nvecs ; row++) {

        result = 0.0;
        for (n = 0; n < veclen; n++) {
                dist = x[n] - X[row*veclen + n];
                result += dist * dist;
        }

        result = d*exp(  -result/(2.0*r*r)  );
        k[row] = result;
    }
    return k;
}

然后，我需要一个很短的头文件，名字是aux.h:

double *covVec(double *X, double *x, int nvecs, int veclen);

要将其包装到 python 我有 _aux.c:

#include <Python.h>
#include <numpy/arrayobject.h>
#include "aux.h"
#include <stdio.h>

static char module_docstring[] =
    "This module provides an interface for calculating covariance using C.";

static char cov_vec_docstring[] =
    "Calculate the covariances between a vector and a list of vectors.";

static PyObject *_aux_covVec(PyObject *self, PyObject *args);

static PyMethodDef module_methods[] = {
        {"cov_vec", _aux_covVec, METH_VARARGS, cov_vec_docstring},
        {NULL, NULL, 0, NULL}
};

PyMODINIT_FUNC init_aux(void) {

    PyObject *m = Py_InitModule3("_aux", module_methods, module_docstring);
    if (m == NULL)
        return;

    /* Load `numpy` functionality. */
    import_array();
}


static PyObject *_aux_covVec(PyObject *self, PyObject *args)
{
    PyObject *X_obj, *x_obj;

    /* Parse the input tuple */
    if (!PyArg_ParseTuple(args, "OO", &X_obj, &x_obj ))
        return NULL;

    /* Interpret the input objects as numpy arrays. */
    PyObject *X_array = PyArray_FROM_OTF(X_obj, NPY_DOUBLE, NPY_IN_ARRAY);
    PyObject *x_array = PyArray_FROM_OTF(x_obj, NPY_DOUBLE, NPY_IN_ARRAY);


    /* If that didn't work, throw an exception. */
    if (X_array == NULL || x_array == NULL ) {
        Py_XDECREF(X_array);
        Py_XDECREF(x_array);
        return NULL;
    }

    /* What are the dimensions? */
    int nvecs  = (int)PyArray_DIM(X_array, 0);
    int veclen = (int)PyArray_DIM(X_array, 1);
    int xlen   = (int)PyArray_DIM(x_array, 0);

    /* Get pointers to the data as C-types. */
    double *X    = (double*)PyArray_DATA(X_array);
    double *x    = (double*)PyArray_DATA(x_array);


    /* Call the external C function to compute the covariance. */
    double *k = covVec(X, x, nvecs, veclen);



    if ( veclen !=  xlen ) {
        PyErr_SetString(PyExc_RuntimeError,
                                "Dimensions don't match!!");
        return NULL;
    }

    /* Clean up. */
    Py_DECREF(X_array);
    Py_DECREF(x_array);

    int i;
    for(i = 0 ; i < nvecs ; i++) {
        printf("k[%d]   = %f\n",i,k[i]);
        if (k[i] < 0.0) {
            PyErr_SetString(PyExc_RuntimeError,
                        "Covariance should be positive but it isn't.");
            return NULL;
        }
    }

    npy_intp dims[1] = {nvecs};

    PyObject *ret = PyArray_SimpleNew(1, dims, NPY_DOUBLE);
    memcpy(PyArray_DATA(ret), k, nvecs*sizeof(double));
    free(k);

    return ret;
}

我有一个名为 setup_cov.py 的 python 文件：

from distutils.core import setup, Extension
import numpy.distutils.misc_util

setup(
    ext_modules=[Extension("_aux", ["_aux.c", "aux.c"])],
    include_dirs=numpy.distutils.misc_util.get_numpy_include_dirs(),
)

我使用 python2.7 setup_cov.py build_ext --inplace 从命令行编译。然后我运行以下python测试文件：

import numpy as np
import _aux as a

nvecs  = 6
veclen = 9
X= []
for _ in range(nvecs):
    X.append(np.random.normal(size= veclen))
X = np.asarray(X)

x = np.random.normal(size=veclen)
k = a.cov_vec(X,x)
print(k)

Answer 3

Warren 的解决方案似乎有效，尽管释放 C 数组内存块会导致我在编译时出错。我在下面的简约函数中使用了 memcopy 技巧（通过指针将 1D C 数组复制到 numpy），为了简单起见，它不接受任何参数，并且应该让 reader 知道如何将其应用于 C 数组而不是向量：

static PyObject *_cmod_test(PyObject *self, PyObject *args)
    {
    double f[5] = {0,1,2,3,4};
    int d[1] = {5};
    PyObject *c = PyArray_FromDims(1,d,NPY_DOUBLE);
    memcpy(PyArray_DATA(c), f, 5*sizeof(double));
    return c;    
    };

launch.py脚本很简单

import _cmod
_cmod.test()

不要忘记声明函数

#include <Python.h>
#include <numpy/arrayobject.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
static PyObject *_cmod_test(PyObject *self, PyObject *args);

关于与 PyArray_SimpleNewFromData 一起使用的任何建议（同时避免内存泄漏陷阱）？也许类似于下面的损坏代码。

static PyObject *_cmod_test(PyObject *self, PyObject *args)
    {
    double f[5] = {0,1,2,3,4};
    npy_intp dims[1] = {5};
    PyObject *c = PyArray_SimpleNewFromData(1, dims, NPY_DOUBLE ,f);
    PyArray_ENABLEFLAGS(c, NPY_ARRAY_OWNDATA);
    return c;
    };

我还推荐 python C API 上 Dan Foreman Mackay 的博客。

用 C 扩展 python，return numpy 数组产生垃圾

Extend python with C, return numpy array gives garbage

c

python

numpy

wrapper

python-2.7