将 NumPy 数组转换为 cufftComplex
Converting a NumPy Array to cufftComplex
我正在编写一个脚本来使用基于 GPU/CUDA 的 cuFFT 库执行 FFT。 CuFFT 要求输入数据的格式必须指定为 "cufftComplex"。但是我的输入数据是 numpy.complex64 格式。我正在使用 Python C-API 将数据从 python 发送到 C。如何在两种格式之间进行转换?目前我的代码如下所示:
#include<python2.7/Python.h>
#include<numpy/arrayobject.h>
#include<cufft.h>
void compute_BP(PyObject* inputData, pyObject* OutputData, int Nfft)
{
cuffthandle plan;
cuFFTPlan1d(&plan, Nfft, CUFFT_C2C, CUFFT_INVERSE);
cuFFTExecC2C(plan, inputData, OutputData, CUFFT_INVERSE);
...
}
编译时出现以下错误:
Error: argument of type "PyObject *" is incompatible with parameter of type "cufftComplex".
借用我的回答 ,这是一个工作示例,说明如何使用 python 中的 ctypes
到 运行 中 cufft 库中的函数python 脚本,使用 numpy
数据:
$ cat mylib.cpp
#include <cufft.h>
#include <stdio.h>
#include <assert.h>
#include <cuda_runtime_api.h>
extern "C"
void fft(void *input, void *output, size_t N){
cufftHandle plan;
cufftComplex *d_in, *d_out;
size_t ds = N*sizeof(cufftComplex);
cudaMalloc((void **)&d_in, ds);
cudaMalloc((void **)&d_out, ds);
cufftResult res = cufftPlan1d(&plan, N, CUFFT_C2C, 1);
assert(res == CUFFT_SUCCESS);
cudaMemcpy(d_in, input, ds, cudaMemcpyHostToDevice);
res = cufftExecC2C(plan, d_in, d_out, CUFFT_FORWARD);
assert(res == CUFFT_SUCCESS);
cudaMemcpy(output, d_out, ds, cudaMemcpyDeviceToHost);
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
printf("from shared object:\n");
for (int i = 0; i < N; i++)
printf("%.1f + j%.1f, ", ((cufftComplex *)output)[i].x, ((cufftComplex *)output)[i].y);
printf("\n");
}
$ cat t8.py
import ctypes
import os
import sys
import numpy as np
mylib = ctypes.cdll.LoadLibrary('libmylib.so')
N = 4
mydata = np.ones((N), dtype = np.complex64)
myresult = np.zeros((N), dtype = np.complex64)
mylib.fft(ctypes.c_void_p(mydata.ctypes.data), ctypes.c_void_p(myresult.ctypes.data), ctypes.c_size_t(N))
print(myresult)
$ g++ -fPIC -I/usr/local/cuda/include --shared mylib.cpp -L/usr/local/cuda/lib64 -lcufft -lcudart -o libmylib.so
$ LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd` python t8.py
no error
from shared object:
4.0 + j0.0, 0.0 + j0.0, 0.0 + j0.0, 0.0 + j0.0,
[4.+0.j 0.+0.j 0.+0.j 0.+0.j]
$
我正在编写一个脚本来使用基于 GPU/CUDA 的 cuFFT 库执行 FFT。 CuFFT 要求输入数据的格式必须指定为 "cufftComplex"。但是我的输入数据是 numpy.complex64 格式。我正在使用 Python C-API 将数据从 python 发送到 C。如何在两种格式之间进行转换?目前我的代码如下所示:
#include<python2.7/Python.h>
#include<numpy/arrayobject.h>
#include<cufft.h>
void compute_BP(PyObject* inputData, pyObject* OutputData, int Nfft)
{
cuffthandle plan;
cuFFTPlan1d(&plan, Nfft, CUFFT_C2C, CUFFT_INVERSE);
cuFFTExecC2C(plan, inputData, OutputData, CUFFT_INVERSE);
...
}
编译时出现以下错误:
Error: argument of type "PyObject *" is incompatible with parameter of type "cufftComplex".
借用我的回答 ctypes
到 运行 中 cufft 库中的函数python 脚本,使用 numpy
数据:
$ cat mylib.cpp
#include <cufft.h>
#include <stdio.h>
#include <assert.h>
#include <cuda_runtime_api.h>
extern "C"
void fft(void *input, void *output, size_t N){
cufftHandle plan;
cufftComplex *d_in, *d_out;
size_t ds = N*sizeof(cufftComplex);
cudaMalloc((void **)&d_in, ds);
cudaMalloc((void **)&d_out, ds);
cufftResult res = cufftPlan1d(&plan, N, CUFFT_C2C, 1);
assert(res == CUFFT_SUCCESS);
cudaMemcpy(d_in, input, ds, cudaMemcpyHostToDevice);
res = cufftExecC2C(plan, d_in, d_out, CUFFT_FORWARD);
assert(res == CUFFT_SUCCESS);
cudaMemcpy(output, d_out, ds, cudaMemcpyDeviceToHost);
printf("%s\n", cudaGetErrorString(cudaGetLastError()));
printf("from shared object:\n");
for (int i = 0; i < N; i++)
printf("%.1f + j%.1f, ", ((cufftComplex *)output)[i].x, ((cufftComplex *)output)[i].y);
printf("\n");
}
$ cat t8.py
import ctypes
import os
import sys
import numpy as np
mylib = ctypes.cdll.LoadLibrary('libmylib.so')
N = 4
mydata = np.ones((N), dtype = np.complex64)
myresult = np.zeros((N), dtype = np.complex64)
mylib.fft(ctypes.c_void_p(mydata.ctypes.data), ctypes.c_void_p(myresult.ctypes.data), ctypes.c_size_t(N))
print(myresult)
$ g++ -fPIC -I/usr/local/cuda/include --shared mylib.cpp -L/usr/local/cuda/lib64 -lcufft -lcudart -o libmylib.so
$ LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd` python t8.py
no error
from shared object:
4.0 + j0.0, 0.0 + j0.0, 0.0 + j0.0, 0.0 + j0.0,
[4.+0.j 0.+0.j 0.+0.j 0.+0.j]
$