使用 C API 访问 NumPy 数组的视图

Question

在我用 C++ 编写的 Python 扩展模块中，我使用以下代码片段将 NumPy 数组转换为 Armadillo 数组以用于代码的 C++ 部分:

static arma::mat convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
    // Check if the dimensions are what I expect.
    if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
    
    const std::vector<int> dims = getPyArrayDimensions(pyarr);  // Gets the dimensions using the API
    
    PyArray_Descr* reqDescr = PyArray_DescrFromType(NPY_DOUBLE);
    if (reqDescr == NULL) throw std::bad_alloc();
    
    // Convert the array to Fortran-ordering as required by Armadillo
    PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, 
                                                                NPY_ARRAY_FARRAY);
    if (cleanArr == NULL) throw std::bad_alloc();
    reqDescr = NULL;  // The new reference from DescrFromType was stolen by FromArray

    double* dataPtr = static_cast<double*>(PyArray_DATA(cleanArr));
    arma::mat result (dataPtr, dims[0], dims[1], true);  // this copies the data from cleanArr
    Py_DECREF(cleanArr);
    return result;
}

问题是，当我将 NumPy 数组（即 my_array[:, 3]）的 view 传递给它时，它似乎无法处理底层的步幅C数组正确。根据输出，似乎函数接收到的数组 pyarr 实际上是完整的基本数组，而不是视图（或者至少当我使用 PyArray_DATA 访问数据时，我似乎是获取指向完整基数组的指针）。如果我改为向此函数传递视图的副本（即 my_array[:, 3].copy()），它会按预期工作，但我不想每次都记住这样做。

那么，有没有办法让 PyArray_FromArray 只复制我想要的矩阵切片？我尝试使用标志 NPY_ARRAY_ENSURECOPY，但没有帮助。

编辑 1

正如评论中所建议的，这是一个完整的工作示例：

在文件中 example.cpp:

#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION

extern "C" {
    #include <Python.h>
    #include <numpy/arrayobject.h>
}
#include <exception>
#include <cassert>
#include <string>
#include <type_traits>
#include <map>
#include <vector>
#include <armadillo>

class WrongDimensions : public std::exception
{
public:
    WrongDimensions() {}
    const char* what() const noexcept { return msg.c_str(); }

private:
    std::string msg = "The dimensions were incorrect";
};

class NotImplemented : public std::exception
{
public:
    NotImplemented() {}
    const char* what() const noexcept { return msg.c_str(); }

private:
    std::string msg = "Not implemented";
};

class BadArrayLayout : public std::exception
{
public:
    BadArrayLayout() {}
    const char* what() const noexcept { return msg.c_str(); }

private:
    std::string msg = "The matrix was not contiguous";
};

static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)
{
    npy_intp ndims = PyArray_NDIM(pyarr);
    npy_intp* dims = PyArray_SHAPE(pyarr);
    std::vector<npy_intp> result;
    for (int i = 0; i < ndims; i++) {
        result.push_back(dims[i]);
    }
    return result;
}

/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't
 * care what the size is in that dimension. Pass dimensions (X, 1) for a vector.
 */
static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)
{
    const auto dims = getPyArrayDimensions(pyarr);
    assert(dims.size() <= 2 && dims.size() > 0);
    if (dims.size() == 1) {
        return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);
    }
    else {
        return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);
    }
}

template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
    if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();

    int arrTypeCode;
    if (std::is_same<outT, uint16_t>::value) {
        arrTypeCode = NPY_UINT16;
    }
    else if (std::is_same<outT, double>::value) {
        arrTypeCode = NPY_DOUBLE;
    }
    else {
        throw NotImplemented();
    }

    const auto dims = getPyArrayDimensions(pyarr);
    if (dims.size() == 1) {
        outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));
        return arma::Col<outT>(dataPtr, dims[0], true);
    }
    else {
        PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
        if (reqDescr == NULL) throw std::bad_alloc();
        PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
        if (cleanArr == NULL) throw std::bad_alloc();
        reqDescr = NULL;  // The new reference from DescrFromType was stolen by FromArray

        outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
        arma::Mat<outT> result (dataPtr, dims[0], dims[1], true);  // this copies the data from cleanArr
        Py_DECREF(cleanArr);
        return result;
    }
}

static PyObject* convertArmaToPyArray(const arma::mat& matrix)
{
    npy_intp ndim = matrix.is_colvec() ? 1 : 2;
    npy_intp nRows = static_cast<npy_intp>(matrix.n_rows);  // NOTE: This narrows the integer
    npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);
    npy_intp dims[2] = {nRows, nCols};

    PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);
    if (result == NULL) throw std::bad_alloc();

    double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));
    for (int i = 0; i < nRows; i++) {
        for (int j = 0; j < nCols; j++) {
            resultDataPtr[i * nCols + j] = matrix(i, j);
        }
    }

    return result;
}

extern "C" {
    // An example function that takes a NumPy array and converts it to
    // an arma::mat and back. This should return the array unchanged.
    static PyObject* example_testFunction(PyObject* self, PyObject* args)
    {
        PyArrayObject* myArray = NULL;

        if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &myArray)) {
            return NULL;
        }

        PyObject* output = NULL;
        try {
            arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);
            output = convertArmaToPyArray(myMat);
        }
        catch (const std::bad_alloc&) {
            PyErr_NoMemory();
            Py_XDECREF(output);
            return NULL;
        }
        catch (const std::exception& err) {
            PyErr_SetString(PyExc_RuntimeError, err.what());
            Py_XDECREF(output);
            return NULL;
        }

        return output;
    }

    static PyMethodDef example_methods[] =
    {
        {"test_function", example_testFunction, METH_VARARGS, "A test function"},
        {NULL, NULL, 0, NULL}
    };

    static struct PyModuleDef example_module = {
       PyModuleDef_HEAD_INIT,
       "example",   /* name of module */
       NULL, /* module documentation, may be NULL */
       -1,       /* size of per-interpreter state of the module,
                    or -1 if the module keeps state in global variables. */
       example_methods
    };

    PyMODINIT_FUNC
    PyInit_example(void)
    {
        import_array();

        PyObject* m = PyModule_Create(&example_module);
        if (m == NULL) return NULL;
        return m;
    }
}

和setup.py编译：

from setuptools import setup, Extension
import numpy as np

example_module = Extension(
    'example',
    include_dirs=[np.get_include(), '/usr/local/include'],
    libraries=['armadillo'],
    library_dirs=['/usr/local/lib'],
    sources=['example.cpp'],
    language='c++',
    extra_compile_args=['-std=c++11', '-mmacosx-version-min=10.10'],
    )

setup(name='example',
      ext_modules=[example_module],
      )

现在假设我们有示例数组

a = np.array([[ 1, 2, 3, 4, 5, 6], 
              [ 7, 8, 9,10,11,12], 
              [13,14,15,16,17,18]], dtype='float64')

该函数似乎适用于像 a[:, :3] 这样的多维切片，并且它 returns 矩阵没有改变，正如我所期望的那样。但是如果我给它一个一维切片，我得到错误的组件，除非我复制：

>>> example.test_function(a[:, 3])
array([ 4.,  5.,  6.])

>>> example.test_function(a[:, 3].copy())
array([  4.,  10.,  16.])

Answer 1

数组的视图只是同一数据数组的另一个 information-wrapper。 Numpy 不在此处复制任何数据。仅调整用于解释数据的信息，并在有用时移动指向数据的指针。

在您的代码中，您假设向量 a[:, 3] 的数据表示为内存中的向量，对于 NPY_ARRAY_CARRAY 和 NPY_ARRAY_FARRAY 没有区别。但是这种表示只有在创建数组本身的（fortran 有序的）副本后才能获得。

为了让它工作，我稍微修改了你的 convertPyArrayToArma() 函数来创建一个副本，即使它是一个矢量：

template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
    if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();

    int arrTypeCode;
    if (std::is_same<outT, uint16_t>::value) {
        arrTypeCode = NPY_UINT16;
    }
    else if (std::is_same<outT, double>::value) {
        arrTypeCode = NPY_DOUBLE;
    }
    else {
        throw NotImplemented();
    }

    PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
    if (reqDescr == NULL) throw std::bad_alloc();
    PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
    if (cleanArr == NULL) throw std::bad_alloc();
    reqDescr = NULL;  // The new reference from DescrFromType was stolen by FromArray

    const auto dims = getPyArrayDimensions(pyarr);
    outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));

    // this copies the data from cleanArr
    arma::Mat<outT> result;
    if (dims.size() == 1) {
        result = arma::Col<outT>(dataPtr, dims[0], true);
    }
    else {
        result = arma::Mat<outT>(dataPtr, dims[0], dims[1], true);
    }

    Py_DECREF(cleanArr);
    return result;
}

使用 C API 访问 NumPy 数组的视图

Accessing view of a NumPy array using the C API

python

numpy

python-c-api

armadillo

python-3.x

编辑 1