Nvidia Cuda C 分段错误

Question

#include <stdio.h>
#include <sys/time.h>

#include <cuda_runtime.h>

float *h_A, *h_B, *h_C, *d_A, *d_B, *d_C;
float **d_Many, **h_Many;
cudaError_t err = cudaSuccess;
long numElements = 10000000;
double startHostAllocate, endHostAllocate, startDeviceAllocate,
       endDeviceAllocate, startCopy, endCopy, startExecute, endExecute;

double cpuSecond() {
    struct timeval tp;
    gettimeofday(&tp, NULL);
    return ((double) tp.tv_sec + (double) tp.tv_usec * 1.e-6);
}

void** allocateManyHostMemory(void **manyHostMemory, int length, size_t size,
        int numElements) {
    manyHostMemory = (void **) malloc(sizeof(void*) * length);
    printf("Host array memory allocated");
    for (int i = 0; i < length; i++) {
        manyHostMemory[i] = malloc(size * numElements);
    }
    return manyHostMemory;
}

void allocateMemory(int numElements) {
    bool memcpyThisArray[numElements];

    startHostAllocate = cpuSecond();
    {
        allocateManyHostMemory((void **) h_Many, 3, sizeof(float), numElements);
    }
    endHostAllocate = cpuSecond();
    printf("Host memory allocated");
}

int main(void) {
    startDeviceAllocate = cpuSecond();
    allocateMemory(numElements);
    endDeviceAllocate = cpuSecond();
}

编辑 gdb 结果

  Program received signal SIGSEGV, Segmentation fault.
allocateMemory (numElements=10000000) at addOperation.cu:46
46      startHostAllocate = cpuSecond();
(gdb) bt
#0  allocateMemory (numElements=10000000) at addOperation.cu:46
#1  0x00000000004027f9 in main () at addOperation.cu:59
(gdb)

我在这里错过了什么？

再次编辑MVCE 我已经添加了代码，以便可以复制和编译。

Answer 1

问题是the same as this（与CUDA无关）：

bool memcpyThisArray[numElements];

当 numElements=10000000 时，您的程序用完堆 space 并产生堆栈 overflow/segmentation 错误。将代码更改为：

   void allocateMemory(int numElements) {
        /* bool memcpyThisArray[numElements]; */

        startHostAllocate = cpuSecond();
        {
            allocateManyHostMemory((void **) h_Many, 3, sizeof(float), numElements);
        }
        endHostAllocate = cpuSecond();
        printf("Host memory allocated");
    }

问题就会消失。

如果您在实际应用程序中确实需要使用 memcpyThisArray 进行某些操作，请动态分配它。

Nvidia Cuda C 分段错误

Nvidia Cuda C segmentation fault

cuda

nvidia