使用 C API 访问 NumPy 数组的视图
Accessing view of a NumPy array using the C API
在我用 C++ 编写的 Python 扩展模块中,我使用以下代码片段将 NumPy 数组转换为 Armadillo 数组以用于代码的 C++ 部分:
static arma::mat convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
// Check if the dimensions are what I expect.
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
const std::vector<int> dims = getPyArrayDimensions(pyarr); // Gets the dimensions using the API
PyArray_Descr* reqDescr = PyArray_DescrFromType(NPY_DOUBLE);
if (reqDescr == NULL) throw std::bad_alloc();
// Convert the array to Fortran-ordering as required by Armadillo
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr,
NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
double* dataPtr = static_cast<double*>(PyArray_DATA(cleanArr));
arma::mat result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
问题是,当我将 NumPy 数组(即 my_array[:, 3]
)的 view 传递给它时,它似乎无法处理底层的步幅C数组正确。根据输出,似乎函数接收到的数组 pyarr
实际上是完整的基本数组,而不是视图(或者至少当我使用 PyArray_DATA
访问数据时,我似乎是获取指向完整基数组的指针)。如果我改为向此函数传递视图的副本(即 my_array[:, 3].copy()
),它会按预期工作,但我不想每次都记住这样做。
那么,有没有办法让 PyArray_FromArray
只复制我想要的矩阵切片?我尝试使用标志 NPY_ARRAY_ENSURECOPY
,但没有帮助。
编辑 1
正如评论中所建议的,这是一个完整的工作示例:
在文件中 example.cpp
:
#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION
extern "C" {
#include <Python.h>
#include <numpy/arrayobject.h>
}
#include <exception>
#include <cassert>
#include <string>
#include <type_traits>
#include <map>
#include <vector>
#include <armadillo>
class WrongDimensions : public std::exception
{
public:
WrongDimensions() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The dimensions were incorrect";
};
class NotImplemented : public std::exception
{
public:
NotImplemented() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "Not implemented";
};
class BadArrayLayout : public std::exception
{
public:
BadArrayLayout() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The matrix was not contiguous";
};
static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)
{
npy_intp ndims = PyArray_NDIM(pyarr);
npy_intp* dims = PyArray_SHAPE(pyarr);
std::vector<npy_intp> result;
for (int i = 0; i < ndims; i++) {
result.push_back(dims[i]);
}
return result;
}
/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't
* care what the size is in that dimension. Pass dimensions (X, 1) for a vector.
*/
static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)
{
const auto dims = getPyArrayDimensions(pyarr);
assert(dims.size() <= 2 && dims.size() > 0);
if (dims.size() == 1) {
return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);
}
else {
return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);
}
}
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
const auto dims = getPyArrayDimensions(pyarr);
if (dims.size() == 1) {
outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));
return arma::Col<outT>(dataPtr, dims[0], true);
}
else {
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
arma::Mat<outT> result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
}
static PyObject* convertArmaToPyArray(const arma::mat& matrix)
{
npy_intp ndim = matrix.is_colvec() ? 1 : 2;
npy_intp nRows = static_cast<npy_intp>(matrix.n_rows); // NOTE: This narrows the integer
npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);
npy_intp dims[2] = {nRows, nCols};
PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);
if (result == NULL) throw std::bad_alloc();
double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));
for (int i = 0; i < nRows; i++) {
for (int j = 0; j < nCols; j++) {
resultDataPtr[i * nCols + j] = matrix(i, j);
}
}
return result;
}
extern "C" {
// An example function that takes a NumPy array and converts it to
// an arma::mat and back. This should return the array unchanged.
static PyObject* example_testFunction(PyObject* self, PyObject* args)
{
PyArrayObject* myArray = NULL;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &myArray)) {
return NULL;
}
PyObject* output = NULL;
try {
arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);
output = convertArmaToPyArray(myMat);
}
catch (const std::bad_alloc&) {
PyErr_NoMemory();
Py_XDECREF(output);
return NULL;
}
catch (const std::exception& err) {
PyErr_SetString(PyExc_RuntimeError, err.what());
Py_XDECREF(output);
return NULL;
}
return output;
}
static PyMethodDef example_methods[] =
{
{"test_function", example_testFunction, METH_VARARGS, "A test function"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef example_module = {
PyModuleDef_HEAD_INIT,
"example", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
example_methods
};
PyMODINIT_FUNC
PyInit_example(void)
{
import_array();
PyObject* m = PyModule_Create(&example_module);
if (m == NULL) return NULL;
return m;
}
}
和setup.py
编译:
from setuptools import setup, Extension
import numpy as np
example_module = Extension(
'example',
include_dirs=[np.get_include(), '/usr/local/include'],
libraries=['armadillo'],
library_dirs=['/usr/local/lib'],
sources=['example.cpp'],
language='c++',
extra_compile_args=['-std=c++11', '-mmacosx-version-min=10.10'],
)
setup(name='example',
ext_modules=[example_module],
)
现在假设我们有示例数组
a = np.array([[ 1, 2, 3, 4, 5, 6],
[ 7, 8, 9,10,11,12],
[13,14,15,16,17,18]], dtype='float64')
该函数似乎适用于像 a[:, :3]
这样的多维切片,并且它 returns 矩阵没有改变,正如我所期望的那样。但是如果我给它一个一维切片,我得到错误的组件,除非我复制:
>>> example.test_function(a[:, 3])
array([ 4., 5., 6.])
>>> example.test_function(a[:, 3].copy())
array([ 4., 10., 16.])
数组的视图只是同一数据数组的另一个 information-wrapper。 Numpy
不在此处复制任何数据。仅调整用于解释数据的信息,并在有用时移动指向数据的指针。
在您的代码中,您假设向量 a[:, 3]
的数据表示为内存中的向量,对于 NPY_ARRAY_CARRAY
和 NPY_ARRAY_FARRAY
没有区别。但是这种表示只有在创建数组本身的(fortran 有序的)副本后才能获得。
为了让它工作,我稍微修改了你的 convertPyArrayToArma()
函数来创建一个副本,即使它是一个矢量:
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
const auto dims = getPyArrayDimensions(pyarr);
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
// this copies the data from cleanArr
arma::Mat<outT> result;
if (dims.size() == 1) {
result = arma::Col<outT>(dataPtr, dims[0], true);
}
else {
result = arma::Mat<outT>(dataPtr, dims[0], dims[1], true);
}
Py_DECREF(cleanArr);
return result;
}
在我用 C++ 编写的 Python 扩展模块中,我使用以下代码片段将 NumPy 数组转换为 Armadillo 数组以用于代码的 C++ 部分:
static arma::mat convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
// Check if the dimensions are what I expect.
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
const std::vector<int> dims = getPyArrayDimensions(pyarr); // Gets the dimensions using the API
PyArray_Descr* reqDescr = PyArray_DescrFromType(NPY_DOUBLE);
if (reqDescr == NULL) throw std::bad_alloc();
// Convert the array to Fortran-ordering as required by Armadillo
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr,
NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
double* dataPtr = static_cast<double*>(PyArray_DATA(cleanArr));
arma::mat result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
问题是,当我将 NumPy 数组(即 my_array[:, 3]
)的 view 传递给它时,它似乎无法处理底层的步幅C数组正确。根据输出,似乎函数接收到的数组 pyarr
实际上是完整的基本数组,而不是视图(或者至少当我使用 PyArray_DATA
访问数据时,我似乎是获取指向完整基数组的指针)。如果我改为向此函数传递视图的副本(即 my_array[:, 3].copy()
),它会按预期工作,但我不想每次都记住这样做。
那么,有没有办法让 PyArray_FromArray
只复制我想要的矩阵切片?我尝试使用标志 NPY_ARRAY_ENSURECOPY
,但没有帮助。
编辑 1
正如评论中所建议的,这是一个完整的工作示例:
在文件中 example.cpp
:
#define NPY_NO_DEPRECATED_API NPY_1_9_API_VERSION
extern "C" {
#include <Python.h>
#include <numpy/arrayobject.h>
}
#include <exception>
#include <cassert>
#include <string>
#include <type_traits>
#include <map>
#include <vector>
#include <armadillo>
class WrongDimensions : public std::exception
{
public:
WrongDimensions() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The dimensions were incorrect";
};
class NotImplemented : public std::exception
{
public:
NotImplemented() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "Not implemented";
};
class BadArrayLayout : public std::exception
{
public:
BadArrayLayout() {}
const char* what() const noexcept { return msg.c_str(); }
private:
std::string msg = "The matrix was not contiguous";
};
static const std::vector<npy_intp> getPyArrayDimensions(PyArrayObject* pyarr)
{
npy_intp ndims = PyArray_NDIM(pyarr);
npy_intp* dims = PyArray_SHAPE(pyarr);
std::vector<npy_intp> result;
for (int i = 0; i < ndims; i++) {
result.push_back(dims[i]);
}
return result;
}
/* Checks the dimensions of the given array. Pass -1 for either dimension to say you don't
* care what the size is in that dimension. Pass dimensions (X, 1) for a vector.
*/
static bool checkPyArrayDimensions(PyArrayObject* pyarr, const npy_intp dim0, const npy_intp dim1)
{
const auto dims = getPyArrayDimensions(pyarr);
assert(dims.size() <= 2 && dims.size() > 0);
if (dims.size() == 1) {
return (dims[0] == dim0 || dim0 == -1) && (dim1 == 1 || dim1 == -1);
}
else {
return (dims[0] == dim0 || dim0 == -1) && (dims[1] == dim1 || dim1 == -1);
}
}
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
const auto dims = getPyArrayDimensions(pyarr);
if (dims.size() == 1) {
outT* dataPtr = static_cast<outT*>(PyArray_DATA(pyarr));
return arma::Col<outT>(dataPtr, dims[0], true);
}
else {
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
arma::Mat<outT> result (dataPtr, dims[0], dims[1], true); // this copies the data from cleanArr
Py_DECREF(cleanArr);
return result;
}
}
static PyObject* convertArmaToPyArray(const arma::mat& matrix)
{
npy_intp ndim = matrix.is_colvec() ? 1 : 2;
npy_intp nRows = static_cast<npy_intp>(matrix.n_rows); // NOTE: This narrows the integer
npy_intp nCols = static_cast<npy_intp>(matrix.n_cols);
npy_intp dims[2] = {nRows, nCols};
PyObject* result = PyArray_SimpleNew(ndim, dims, NPY_DOUBLE);
if (result == NULL) throw std::bad_alloc();
double* resultDataPtr = static_cast<double*>(PyArray_DATA((PyArrayObject*)result));
for (int i = 0; i < nRows; i++) {
for (int j = 0; j < nCols; j++) {
resultDataPtr[i * nCols + j] = matrix(i, j);
}
}
return result;
}
extern "C" {
// An example function that takes a NumPy array and converts it to
// an arma::mat and back. This should return the array unchanged.
static PyObject* example_testFunction(PyObject* self, PyObject* args)
{
PyArrayObject* myArray = NULL;
if (!PyArg_ParseTuple(args, "O!", &PyArray_Type, &myArray)) {
return NULL;
}
PyObject* output = NULL;
try {
arma::mat myMat = convertPyArrayToArma<double>(myArray, -1, -1);
output = convertArmaToPyArray(myMat);
}
catch (const std::bad_alloc&) {
PyErr_NoMemory();
Py_XDECREF(output);
return NULL;
}
catch (const std::exception& err) {
PyErr_SetString(PyExc_RuntimeError, err.what());
Py_XDECREF(output);
return NULL;
}
return output;
}
static PyMethodDef example_methods[] =
{
{"test_function", example_testFunction, METH_VARARGS, "A test function"},
{NULL, NULL, 0, NULL}
};
static struct PyModuleDef example_module = {
PyModuleDef_HEAD_INIT,
"example", /* name of module */
NULL, /* module documentation, may be NULL */
-1, /* size of per-interpreter state of the module,
or -1 if the module keeps state in global variables. */
example_methods
};
PyMODINIT_FUNC
PyInit_example(void)
{
import_array();
PyObject* m = PyModule_Create(&example_module);
if (m == NULL) return NULL;
return m;
}
}
和setup.py
编译:
from setuptools import setup, Extension
import numpy as np
example_module = Extension(
'example',
include_dirs=[np.get_include(), '/usr/local/include'],
libraries=['armadillo'],
library_dirs=['/usr/local/lib'],
sources=['example.cpp'],
language='c++',
extra_compile_args=['-std=c++11', '-mmacosx-version-min=10.10'],
)
setup(name='example',
ext_modules=[example_module],
)
现在假设我们有示例数组
a = np.array([[ 1, 2, 3, 4, 5, 6],
[ 7, 8, 9,10,11,12],
[13,14,15,16,17,18]], dtype='float64')
该函数似乎适用于像 a[:, :3]
这样的多维切片,并且它 returns 矩阵没有改变,正如我所期望的那样。但是如果我给它一个一维切片,我得到错误的组件,除非我复制:
>>> example.test_function(a[:, 3])
array([ 4., 5., 6.])
>>> example.test_function(a[:, 3].copy())
array([ 4., 10., 16.])
数组的视图只是同一数据数组的另一个 information-wrapper。 Numpy
不在此处复制任何数据。仅调整用于解释数据的信息,并在有用时移动指向数据的指针。
在您的代码中,您假设向量 a[:, 3]
的数据表示为内存中的向量,对于 NPY_ARRAY_CARRAY
和 NPY_ARRAY_FARRAY
没有区别。但是这种表示只有在创建数组本身的(fortran 有序的)副本后才能获得。
为了让它工作,我稍微修改了你的 convertPyArrayToArma()
函数来创建一个副本,即使它是一个矢量:
template<typename outT>
static arma::Mat<outT> convertPyArrayToArma(PyArrayObject* pyarr, int nrows, int ncols)
{
if (!checkPyArrayDimensions(pyarr, nrows, ncols)) throw WrongDimensions();
int arrTypeCode;
if (std::is_same<outT, uint16_t>::value) {
arrTypeCode = NPY_UINT16;
}
else if (std::is_same<outT, double>::value) {
arrTypeCode = NPY_DOUBLE;
}
else {
throw NotImplemented();
}
PyArray_Descr* reqDescr = PyArray_DescrFromType(arrTypeCode);
if (reqDescr == NULL) throw std::bad_alloc();
PyArrayObject* cleanArr = (PyArrayObject*)PyArray_FromArray(pyarr, reqDescr, NPY_ARRAY_FARRAY);
if (cleanArr == NULL) throw std::bad_alloc();
reqDescr = NULL; // The new reference from DescrFromType was stolen by FromArray
const auto dims = getPyArrayDimensions(pyarr);
outT* dataPtr = static_cast<outT*>(PyArray_DATA(cleanArr));
// this copies the data from cleanArr
arma::Mat<outT> result;
if (dims.size() == 1) {
result = arma::Col<outT>(dataPtr, dims[0], true);
}
else {
result = arma::Mat<outT>(dataPtr, dims[0], dims[1], true);
}
Py_DECREF(cleanArr);
return result;
}