使用 NVIDIA 的 cuSolver 库的 Pycuda 中的分段错误
Segmentation Fault in Pycuda using NVIDIA's cuSolver Library
我正在尝试制作受 scikits-cuda library, for some operations provided in the new cuSolver library of Nvidia, first I need to perfom an LU factorization through cusolverDnSgetrf() op 启发的 pycuda 包装器。但在此之前我需要 'Workspace' 参数,cuSolver 提供的工具名为 cusolverDnSgetrf_bufferSize();但是当我使用它时,只是崩溃并且 return 出现分段错误。我做错了什么?
注意:我已经使用 scikits-cuda 处理这个操作,但是 cuSolver 库使用了很多这种参数,我想比较 scikits-cuda 和我的新库实现之间的用法。
import numpy as np
import pycuda.gpuarray
import ctypes
import ctypes.util
libcusolver = ctypes.cdll.LoadLibrary('libcusolver.so')
class _types:
handle = ctypes.c_void_p
libcusolver.cusolverDnCreate.restype = int
libcusolver.cusolverDnCreate.argtypes = [_types.handle]
def cusolverCreate():
handle = _types.handle()
libcusolver.cusolverDnCreate(ctypes.byref(handle))
return handle.value
libcusolver.cusolverDnDestroy.restype = int
libcusolver.cusolverDnDestroy.argtypes = [_types.handle]
def cusolverDestroy(handle):
libcusolver.cusolverDnDestroy(handle)
libcusolver.cusolverDnSgetrf_bufferSize.restype = int
libcusolver.cusolverDnSgetrf_bufferSize.argtypes =[_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
def cusolverLUFactorization(handle, matrix):
m,n=matrix.shape
mtx_gpu = gpuarray.to_gpu(matrix.astype('float32'))
work=gpuarray.zeros(1, np.float32)
status=libcusolver.cusolverDnSgetrf_bufferSize(
handle, m, n,
int(mtx_gpu.gpudata),
n, int(work.gpudata))
print status
x = np.asarray(np.random.rand(3, 3), np.float32)
handle_solver=cusolverCreate()
cusolverLUFactorization(handle_solver,x)
cusolverDestroy(handle_solver)
cusolverDnSgetrf_bufferSize
的最后一个参数应该是一个普通的指针,而不是GPU内存指针。尝试修改 cusolverLUFactorization()
函数如下:
def cusolverLUFactorization(handle, matrix):
m,n=matrix.shape
mtx_gpu = gpuarray.to_gpu(matrix.astype('float32'))
work = ctypes.c_int()
status = libcusolver.cusolverDnSgetrf_bufferSize(
handle, m, n,
int(mtx_gpu.gpudata),
n, ctypes.pointer(work))
print status
print work.value
我正在尝试制作受 scikits-cuda library, for some operations provided in the new cuSolver library of Nvidia, first I need to perfom an LU factorization through cusolverDnSgetrf() op 启发的 pycuda 包装器。但在此之前我需要 'Workspace' 参数,cuSolver 提供的工具名为 cusolverDnSgetrf_bufferSize();但是当我使用它时,只是崩溃并且 return 出现分段错误。我做错了什么?
注意:我已经使用 scikits-cuda 处理这个操作,但是 cuSolver 库使用了很多这种参数,我想比较 scikits-cuda 和我的新库实现之间的用法。
import numpy as np
import pycuda.gpuarray
import ctypes
import ctypes.util
libcusolver = ctypes.cdll.LoadLibrary('libcusolver.so')
class _types:
handle = ctypes.c_void_p
libcusolver.cusolverDnCreate.restype = int
libcusolver.cusolverDnCreate.argtypes = [_types.handle]
def cusolverCreate():
handle = _types.handle()
libcusolver.cusolverDnCreate(ctypes.byref(handle))
return handle.value
libcusolver.cusolverDnDestroy.restype = int
libcusolver.cusolverDnDestroy.argtypes = [_types.handle]
def cusolverDestroy(handle):
libcusolver.cusolverDnDestroy(handle)
libcusolver.cusolverDnSgetrf_bufferSize.restype = int
libcusolver.cusolverDnSgetrf_bufferSize.argtypes =[_types.handle,
ctypes.c_int,
ctypes.c_int,
ctypes.c_void_p,
ctypes.c_int,
ctypes.c_void_p]
def cusolverLUFactorization(handle, matrix):
m,n=matrix.shape
mtx_gpu = gpuarray.to_gpu(matrix.astype('float32'))
work=gpuarray.zeros(1, np.float32)
status=libcusolver.cusolverDnSgetrf_bufferSize(
handle, m, n,
int(mtx_gpu.gpudata),
n, int(work.gpudata))
print status
x = np.asarray(np.random.rand(3, 3), np.float32)
handle_solver=cusolverCreate()
cusolverLUFactorization(handle_solver,x)
cusolverDestroy(handle_solver)
cusolverDnSgetrf_bufferSize
的最后一个参数应该是一个普通的指针,而不是GPU内存指针。尝试修改 cusolverLUFactorization()
函数如下:
def cusolverLUFactorization(handle, matrix):
m,n=matrix.shape
mtx_gpu = gpuarray.to_gpu(matrix.astype('float32'))
work = ctypes.c_int()
status = libcusolver.cusolverDnSgetrf_bufferSize(
handle, m, n,
int(mtx_gpu.gpudata),
n, ctypes.pointer(work))
print status
print work.value