将 RGB 图像转换为灰度时,我的输出是黑色图像

My output is a black image when converting RGB image to greyscale

我正在尝试将 RGB 图像转换为灰度图像,我正在使用 Lena.jpg 图像。我在 TODO 部分添加了一些代码行,但不幸的是我在输出中得到了一个黑色图像。 我的内核:

#define CHANNELS 3
__global__ void colorConvert(float * grayImage,
                float * rgbImage,
                int width, int height) {
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;

    if (x < width && y < height) {
        // get 1D coordinate for the grayscale image 
        int grayOffset = y*width + x;
        // one can think of the RGB image having
        // CHANNEL times columns than the gray scale image
        int rgbOffset = grayOffset*CHANNELS;
        float r = rgbImage[rgbOffset];
        float g = rgbImage[rgbOffset+1];
        float b = rgbImage[rgbOffset+2];
        //perform the rescaling and store it
        // multiply by constant values
        grayImage[grayOffset] = 0.21f*r + 0.71f*g + 0.07f*b;        
    }   
}

这是我的主要功能:

int main(int argc, char **argv)
{
  if(argc!=3) {cout<<"Program takes two image filenames as parameters"<<endl;exit(3);}
  float *imgIn, *imgOut;
  int nCols, nRows, channels;

  // Allocate images and initialize from file
  imgIn = read_colored_image_asfloat(argv[1],&nCols, &nRows, &channels);
  if(channels!=3){cout<<"Input image is not a colored image"<<endl;exit(4);}
    // Allocate host images
  //imgIn = (float *)calloc(nCols*nRows, sizeof(float));
  imgOut = (float *)calloc(nCols*nRows, sizeof(float));

  // Allocates device images
  float *d_imgIn, *d_imgOut;
  //@TODO@ : Complete for device allocations
    int size = (nCols*nRows)*sizeof(float);
    // allocate memory on device
    cudaMalloc((float**) &d_imgIn, size);
    cudaMalloc((float**) &d_imgOut, size);
  // Copy input data
  //@TODO@ : Complete for data copy
    cudaMemcpy(d_imgIn, imgIn, size, cudaMemcpyHostToDevice);
  // Call the kernel
  //@TODO@ : Compute threads block and grid dimensions
    dim3 GridDim((nCols/16.0)+1, (nRows/16.0)+1, 1);
    dim3 BlockDim(16, 16, 1);
  //@TODO@ : Call the CUDA kernel
    colorConvert<<<GridDim, BlockDim>>>(d_imgOut, d_imgIn, nRows, nCols);
  // Copy output data
  //@TODO@ : Complete for data copy
    cudaMemcpy(imgOut, d_imgOut, size, cudaMemcpyDeviceToHost);

  // Write gray image to file
  write_gray_image_fromfloat(argv[2], imgOut, nCols, nRows, 1);

  // Free memory
  //@TODO@ : Free host and device memory
    // free host
    free(imgIn); free(imgOut);
    // free device
    cudaFree(d_imgIn);cudaFree(d_imgOut);
  return 0;
}

您忘记将 RGB 矩阵的大小乘以 3。

应该是:cudaMalloc((float**) &d_imgIn, size*3);cudaMemcpy(d_imgIn, imgIn, size*3, cudaMemcpyHostToDevice);

您还交换了 nColsnRows
应该是:colorConvert<<<GridDim, BlockDim>>>(d_imgOut, d_imgIn, nCols, nRows);

以下代码应该有效:

int main()
{
    //int nCols = 512;int nRows = 384;int channels = 3;
    float *imgIn, *imgOut;
    int nCols, nRows, channels;

    // Allocate images and initialize from file
    imgIn = read_colored_image_asfloat(argv[1],&nCols, &nRows, &channels);

    //imgIn = (float*)calloc(nCols*nRows*3, sizeof(float));
    //FILE *f = NULL;fopen_s(&f, "rgb32f.raw", "rb");fread(imgIn, sizeof(float), nCols*nRows*3, f);fclose(f);f = NULL;

    imgOut = (float*)calloc(nCols*nRows, sizeof(float));

    // Allocates device images
    float *d_imgIn, *d_imgOut;
    //@TODO@ : Complete for device allocations
    int size = (nCols*nRows)*sizeof(float);
    // allocate memory on device
    cudaMalloc((float**)&d_imgIn, size*3);
    cudaMalloc((float**)&d_imgOut, size);

    // Copy input data
    //@TODO@ : Complete for data copy
    cudaMemcpy(d_imgIn, imgIn, size*3, cudaMemcpyHostToDevice);
    // Call the kernel
    //@TODO@ : Compute threads block and grid dimensions
    dim3 GridDim((nCols/16)+1, (nRows/16)+1, 1);
    dim3 BlockDim(16, 16, 1);
    //@TODO@ : Call the CUDA kernel
    colorConvert<<<GridDim, BlockDim>>>(d_imgOut, d_imgIn, nCols, nRows);
    // Copy output data
    //@TODO@ : Complete for data copy
    cudaMemcpy(imgOut, d_imgOut, size, cudaMemcpyDeviceToHost);

    //fopen_s(&f, "gray32f.raw", "wb");fwrite(imgOut, sizeof(float), nCols*nRows, f);fclose(f);f = NULL;
    // Write gray image to file
    write_gray_image_fromfloat(argv[2], imgOut, nCols, nRows, 1);

    // Free memory
    //@TODO@ : Free host and device memory
    // free host
    free(imgIn); 
    free(imgOut);

    // free device
    cudaFree(d_imgIn);
    cudaFree(d_imgOut);

    return 0;
}

哪个错误导致黑色图像?

更正代码的结果:

nRowsnCols 交换的结果:

cudaMemcpy(d_imgIn, imgIn, size, cudaMemcpyHostToDevice);(而不是 size*3)的结果:

cudaMalloc((float**)&d_imgIn, size);(而不是 size*3)的结果:

结论:
cudaMalloc 的疏忽是导致黑色结果的主要原因。


有CUDA错误提示吗?

正在读取 cudaMemcpy 的 return 值:

cudaError_t err = cudaMemcpy(imgOut, d_imgOut, size, cudaMemcpyDeviceToHost);

Returns 错误状态:cudaErrorIllegalAddress

结论:
检查 return 状态很重要 - 有时它有助于检测代码中的错误。