cuBLAS cublasSgemv “分段错误”
cuBLAS cublasSgemv “Segmentation fault"
当 运行 cublasSegmv.My GPU 是 K20Xm.Here 是我的代码时,我遇到了分段错误。
float *a, *x, *y;
int NUM_VEC = 8;
y = (float*)malloc(sizeof(float) * rows * NUM_VEC);
a = (float*)malloc(sizeof(float) * rows * cols);
x = (float*)malloc(sizeof(float) * cols * NUM_VEC);
get_mat_random(a, rows, cols);
get_vec_random(x, cols * NUM_VEC);
float *d_a = 0;
float *d_x = 0;
float *d_y = 0;
cudaMalloc((void **)&d_a, rows * cols * sizeof(float);
cudaMalloc((void **)&d_x, cols * NUM_VEC * sizeof(float);
cudaMalloc((void **)&d_y, rows * NUM_VEC * sizeof(float);
cublasSetVector(rows * cols, sizeof(float), a, 1, d_a, 1);
cublasSetVector(NUM_VEC * cols, sizeof(float), x, 1, d_x, 1);
cublasSetVector(NUM_VEC * rows, sizeof(float), y, 1, d_y, 1);
float alpha = 1.0f;
for (int i = 0; i < NUM_VEC; i++) {
cublasSgemv(handle, CUBLAS_OP_T, cols, rows, &alpha, d_a, rows, d_x + i * cols, 1,0, d_y + i * rows, 1);
}
在我有限的测试中,错误是因为cublasSgemv
的beta
参数不能是NULL
。您应该在主机或设备上为 beta
变量分配内存。以下是我用来重现和修复错误的代码。
#include <cstdio>
#include <iostream>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cstdlib>
using namespace std;
void get_vec_random(float* a, int count)
{
for(int i=0; i<count; i++)
a[i] = rand() / float(RAND_MAX);
}
void get_mat_random(float* a, int rows, int cols)
{
get_vec_random(a, rows * cols);
}
int main(int argc, char** argv)
{
int rows = 10, cols = 10;
cublasHandle_t handle;
cublasCreate(&handle);
float *a, *x, *y;
int NUM_VEC = 8;
y = (float*)malloc(sizeof(float) * rows * NUM_VEC);
a = (float*)malloc(sizeof(float) * rows * cols);
x = (float*)malloc(sizeof(float) * cols * NUM_VEC);
get_mat_random(a, rows, cols);
get_vec_random(x, cols * NUM_VEC);
float *d_a = 0;
float *d_x = 0;
float *d_y = 0;
cudaMalloc((void **)&d_a, rows * cols * sizeof(float));
cudaMalloc((void **)&d_x, cols * NUM_VEC * sizeof(float));
cudaMalloc((void **)&d_y, rows * NUM_VEC * sizeof(float));
cublasSetVector(rows * cols, sizeof(float), a, 1, d_a, 1);
cublasSetVector(NUM_VEC * cols, sizeof(float), x, 1, d_x, 1);
cublasSetVector(NUM_VEC * rows, sizeof(float), y, 1, d_y, 1);
float alpha = 1.0f, beta = 1.0f;
cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST);
for (int i = 0; i < NUM_VEC; i++)
{
cublasSgemv(handle,
CUBLAS_OP_T,
cols,
rows,
&alpha,
d_a,
rows,
d_x + i * cols,
1,
&beta,
d_y + i * rows,
1);
}
return 0;
}
希望这能解决问题。
当 运行 cublasSegmv.My GPU 是 K20Xm.Here 是我的代码时,我遇到了分段错误。
float *a, *x, *y;
int NUM_VEC = 8;
y = (float*)malloc(sizeof(float) * rows * NUM_VEC);
a = (float*)malloc(sizeof(float) * rows * cols);
x = (float*)malloc(sizeof(float) * cols * NUM_VEC);
get_mat_random(a, rows, cols);
get_vec_random(x, cols * NUM_VEC);
float *d_a = 0;
float *d_x = 0;
float *d_y = 0;
cudaMalloc((void **)&d_a, rows * cols * sizeof(float);
cudaMalloc((void **)&d_x, cols * NUM_VEC * sizeof(float);
cudaMalloc((void **)&d_y, rows * NUM_VEC * sizeof(float);
cublasSetVector(rows * cols, sizeof(float), a, 1, d_a, 1);
cublasSetVector(NUM_VEC * cols, sizeof(float), x, 1, d_x, 1);
cublasSetVector(NUM_VEC * rows, sizeof(float), y, 1, d_y, 1);
float alpha = 1.0f;
for (int i = 0; i < NUM_VEC; i++) {
cublasSgemv(handle, CUBLAS_OP_T, cols, rows, &alpha, d_a, rows, d_x + i * cols, 1,0, d_y + i * rows, 1);
}
在我有限的测试中,错误是因为cublasSgemv
的beta
参数不能是NULL
。您应该在主机或设备上为 beta
变量分配内存。以下是我用来重现和修复错误的代码。
#include <cstdio>
#include <iostream>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cstdlib>
using namespace std;
void get_vec_random(float* a, int count)
{
for(int i=0; i<count; i++)
a[i] = rand() / float(RAND_MAX);
}
void get_mat_random(float* a, int rows, int cols)
{
get_vec_random(a, rows * cols);
}
int main(int argc, char** argv)
{
int rows = 10, cols = 10;
cublasHandle_t handle;
cublasCreate(&handle);
float *a, *x, *y;
int NUM_VEC = 8;
y = (float*)malloc(sizeof(float) * rows * NUM_VEC);
a = (float*)malloc(sizeof(float) * rows * cols);
x = (float*)malloc(sizeof(float) * cols * NUM_VEC);
get_mat_random(a, rows, cols);
get_vec_random(x, cols * NUM_VEC);
float *d_a = 0;
float *d_x = 0;
float *d_y = 0;
cudaMalloc((void **)&d_a, rows * cols * sizeof(float));
cudaMalloc((void **)&d_x, cols * NUM_VEC * sizeof(float));
cudaMalloc((void **)&d_y, rows * NUM_VEC * sizeof(float));
cublasSetVector(rows * cols, sizeof(float), a, 1, d_a, 1);
cublasSetVector(NUM_VEC * cols, sizeof(float), x, 1, d_x, 1);
cublasSetVector(NUM_VEC * rows, sizeof(float), y, 1, d_y, 1);
float alpha = 1.0f, beta = 1.0f;
cublasSetPointerMode(handle, CUBLAS_POINTER_MODE_HOST);
for (int i = 0; i < NUM_VEC; i++)
{
cublasSgemv(handle,
CUBLAS_OP_T,
cols,
rows,
&alpha,
d_a,
rows,
d_x + i * cols,
1,
&beta,
d_y + i * rows,
1);
}
return 0;
}
希望这能解决问题。