Nvidia Cuda C 分段错误
Nvidia Cuda C segmentation fault
#include <stdio.h>
#include <sys/time.h>
#include <cuda_runtime.h>
float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
float **d_Many, **h_Many;
cudaError_t err = cudaSuccess;
long numElements = 10000000;
double startHostAllocate, endHostAllocate, startDeviceAllocate,
endDeviceAllocate, startCopy, endCopy, startExecute, endExecute;
double cpuSecond() {
struct timeval tp;
gettimeofday(&tp, NULL);
return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
}
void** allocateManyHostMemory(void **manyHostMemory, int length, size_t size,
int numElements) {
manyHostMemory = (void **) malloc(sizeof(void*) * length);
printf("Host array memory allocated");
for (int i = 0; i < length; i++) {
manyHostMemory[i] = malloc(size * numElements);
}
return manyHostMemory;
}
void allocateMemory(int numElements) {
bool memcpyThisArray[numElements];
startHostAllocate = cpuSecond();
{
allocateManyHostMemory((void **) h_Many, 3, sizeof(float), numElements);
}
endHostAllocate = cpuSecond();
printf("Host memory allocated");
}
int main(void) {
startDeviceAllocate = cpuSecond();
allocateMemory(numElements);
endDeviceAllocate = cpuSecond();
}
编辑 gdb 结果
Program received signal SIGSEGV, Segmentation fault.
allocateMemory (numElements=10000000) at addOperation.cu:46
46 startHostAllocate = cpuSecond();
(gdb) bt
#0 allocateMemory (numElements=10000000) at addOperation.cu:46
#1 0x00000000004027f9 in main () at addOperation.cu:59
(gdb)
我在这里错过了什么?
再次编辑MVCE
我已经添加了代码,以便可以复制和编译。
问题是the same as this(与CUDA无关):
bool memcpyThisArray[numElements];
当 numElements=10000000
时,您的程序用完堆 space 并产生堆栈 overflow/segmentation 错误。将代码更改为:
void allocateMemory(int numElements) {
/* bool memcpyThisArray[numElements]; */
startHostAllocate = cpuSecond();
{
allocateManyHostMemory((void **) h_Many, 3, sizeof(float), numElements);
}
endHostAllocate = cpuSecond();
printf("Host memory allocated");
}
问题就会消失。
如果您在实际应用程序中确实需要使用 memcpyThisArray
进行某些操作,请动态分配它。
#include <stdio.h>
#include <sys/time.h>
#include <cuda_runtime.h>
float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
float **d_Many, **h_Many;
cudaError_t err = cudaSuccess;
long numElements = 10000000;
double startHostAllocate, endHostAllocate, startDeviceAllocate,
endDeviceAllocate, startCopy, endCopy, startExecute, endExecute;
double cpuSecond() {
struct timeval tp;
gettimeofday(&tp, NULL);
return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
}
void** allocateManyHostMemory(void **manyHostMemory, int length, size_t size,
int numElements) {
manyHostMemory = (void **) malloc(sizeof(void*) * length);
printf("Host array memory allocated");
for (int i = 0; i < length; i++) {
manyHostMemory[i] = malloc(size * numElements);
}
return manyHostMemory;
}
void allocateMemory(int numElements) {
bool memcpyThisArray[numElements];
startHostAllocate = cpuSecond();
{
allocateManyHostMemory((void **) h_Many, 3, sizeof(float), numElements);
}
endHostAllocate = cpuSecond();
printf("Host memory allocated");
}
int main(void) {
startDeviceAllocate = cpuSecond();
allocateMemory(numElements);
endDeviceAllocate = cpuSecond();
}
编辑 gdb 结果
Program received signal SIGSEGV, Segmentation fault.
allocateMemory (numElements=10000000) at addOperation.cu:46
46 startHostAllocate = cpuSecond();
(gdb) bt
#0 allocateMemory (numElements=10000000) at addOperation.cu:46
#1 0x00000000004027f9 in main () at addOperation.cu:59
(gdb)
我在这里错过了什么?
再次编辑MVCE 我已经添加了代码,以便可以复制和编译。
问题是the same as this(与CUDA无关):
bool memcpyThisArray[numElements];
当 numElements=10000000
时,您的程序用完堆 space 并产生堆栈 overflow/segmentation 错误。将代码更改为:
void allocateMemory(int numElements) {
/* bool memcpyThisArray[numElements]; */
startHostAllocate = cpuSecond();
{
allocateManyHostMemory((void **) h_Many, 3, sizeof(float), numElements);
}
endHostAllocate = cpuSecond();
printf("Host memory allocated");
}
问题就会消失。
如果您在实际应用程序中确实需要使用 memcpyThisArray
进行某些操作,请动态分配它。