在 WSL2 上使用 Cuda 给了我 "no kernel image is available for execution on the device."

Question

我正在尝试在 WSL2 上的 C++ 程序中使用 Cuda 和 Thrust。我按照 here 中的说明在 WSL2 上启用了 Cuda。这是一个小示例程序：

首先，我定义：

export CUDA_LIBRARY_DIRECTORY=/usr/local/cuda-11.0/lib64
export CUDA_INCLUDE_DIRECTORY=/usr/local/cuda-11.0/include
export CUDACXX=/usr/local/cuda-11.0/bin/nvcc

CMakeLists.txt

cmake_minimum_required(VERSION 2.8)
project(proj LANGUAGES CXX CUDA)

set (CMAKE_CXX_STANDARD 14)

#### use cuda ####
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=sm_50;-lineinfo; -cudart=static; -Xptxas; -v)

include_directories($ENV{CUDA_INCLUDE_DIRECTORY})
link_directories($ENV{CUDA_LIBRARY_DIRECTORY})

ADD_EXECUTABLE(
proj 
src/cudafile.cu
src/main.cpp)

main.cpp

#include<thrust/host_vector.h>
#include<thrust/device_vector.h>
#include<thrust/device_ptr.h>

void func(int size, int* a1, int* a2, int* a3);
void FillWithValue(int* arr, int size, int val);

int main()
{

    int size=1000;
    int *arr1, *arr2, *arr3;
    
    cudaMalloc((void**)&arr1, size * sizeof(int));
    FillWithValue(arr1,size,1);

    cudaMalloc((void**)&arr2, size * sizeof(int));
    FillWithValue(arr2,size,2);

    cudaMalloc((void**)&arr3, size * sizeof(int));

    int* harr = new int [size];
    cudaMemcpy(harr,arr1,size*sizeof(int),cudaMemcpyDeviceToHost);
    fprintf(stdout, "%d\n",harr[0]);


    func(size, arr1, arr2, arr3);
    cudaError_t err = cudaGetLastError();
    if (cudaSuccess != err)
        fprintf(stderr, "Cuda error: %s.\n", cudaGetErrorString(err));
    

    return 1;

}

cudafile.cu

#include<thrust/host_vector.h>
#include<thrust/device_vector.h>
#include<thrust/device_ptr.h>

#define blocksize 512
#define maxblocks 65535

__global__ void funcKernel(int size, int* a1, int* a2, int* a3)
{
    int i = blockIdx.x * blockDim.x + threadIdx.x;

    while (i < size)
    {
        a3[i]=a1[i]+a2[i];
    }
}

void func(int size, int* a1, int* a2, int* a3)
{
    int gridsize = size / blocksize + 1;
    if (gridsize > maxblocks) gridsize = maxblocks;

    funcKernel << <gridsize, blocksize >> > (size, a1, a2, a3);
}

void FillWithValue(int* arr, int size, int val)
{

    thrust::device_ptr<int> d = thrust::device_pointer_cast(arr);
    thrust::fill(d, d + size, val);
}

产出

0
Cuda error: no kernel image is available for execution on the device.

现在第一个fprintf的输出证明Thrust fill函数填充数组失败，cudaGetLastError()捕获错误，证明内核也失败了。

这是详细的 cmake 构建：

cmake ..

-- The CXX compiler identification is GNU 9.3.0
-- The CUDA compiler identification is NVIDIA 11.0.221
-- Check for working CXX compiler: /usr/bin/c++
-- Check for working CXX compiler: /usr/bin/c++ -- works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Check for working CUDA compiler: /usr/local/cuda-11.0/bin/nvcc
-- Check for working CUDA compiler: /usr/local/cuda-11.0/bin/nvcc -- works
-- Detecting CUDA compiler ABI info
-- Detecting CUDA compiler ABI info - done
-- Configuring done
-- Generating done
-- Build files have been written to: /mnt/d/work/wsl2-projects/tests/kernels/build

制作

/usr/bin/cmake -S/mnt/d/work/wsl2-projects/tests/kernels -B/mnt/d/work/wsl2-projects/tests/kernels/build --check-build-system CMakeFiles/Makefile.cmake 0
/usr/bin/cmake -E cmake_progress_start /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles/progress.marks
make -f CMakeFiles/Makefile2 all
make[1]: Entering directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
make -f CMakeFiles/proj.dir/build.make CMakeFiles/proj.dir/depend
make[2]: Entering directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
cd /mnt/d/work/wsl2-projects/tests/kernels/build && /usr/bin/cmake -E cmake_depends "Unix Makefiles" /mnt/d/work/wsl2-projects/tests/kernels /mnt/d/work/wsl2-projects/tests/kernels /mnt/d/work/wsl2-projects/tests/kernels/build /mnt/d/work/wsl2-projects/tests/kernels/build /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles/proj.dir/DependInfo.cmake --color=
Scanning dependencies of target proj
make[2]: Leaving directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
make -f CMakeFiles/proj.dir/build.make CMakeFiles/proj.dir/build
make[2]: Entering directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
[ 33%] Building CUDA object CMakeFiles/proj.dir/src/cudafile.cu.o
/usr/local/cuda-11.0/bin/nvcc     -x cu -c /mnt/d/work/wsl2-projects/tests/kernels/src/cudafile.cu -o CMakeFiles/proj.dir/src/cudafile.cu.o
[ 66%] Building CXX object CMakeFiles/proj.dir/src/main.cpp.o
/usr/bin/c++   -I/usr/local/cuda-11.0/include  -std=gnu++14 -o CMakeFiles/proj.dir/src/main.cpp.o -c /mnt/d/work/wsl2-projects/tests/kernels/src/main.cpp
[100%] Linking CXX executable proj
/usr/bin/cmake -E cmake_link_script CMakeFiles/proj.dir/link.txt --verbose=1
/usr/bin/c++    -rdynamic CMakeFiles/proj.dir/src/cudafile.cu.o CMakeFiles/proj.dir/src/main.cpp.o  -o proj   -L/usr/local/cuda-11.0/lib64  -L/usr/local/cuda-11.0/targets/x86_64-linux/lib/stubs  -L/usr/local/cuda-11.0/targets/x86_64-linux/lib  -lcudadevrt -lcudart_static -lrt -lpthread -ldl
make[2]: Leaving directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
[100%] Built target proj
make[1]: Leaving directory '/mnt/d/work/wsl2-projects/tests/kernels/build'
/usr/bin/cmake -E cmake_progress_start /mnt/d/work/wsl2-projects/tests/kernels/build/CMakeFiles 0

是不是和我的GPU不匹配Cuda版本有关？我想降级到 Cuda 10 或 9，但我不知道如何安装它完全像 here，这样它就不会用另一个 Nvidia 驱动程序替换驱动程序。

附加信息：

GeForce GTX 950M
Windows11家。构建 22000.51.
WSL2: Ubuntu-20.04
Cuda 编译工具，9.1 版，V9.1.85

Answer 1

根据 Robert Crovella 的评论，我设法使程序正确地运行，输出正确，没有错误。

在CMakeLists.txt中，我使用了

set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_50,code=sm_50 -lineinfo -cudart=static -Xptxas -v")

而不是

set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_50,code=sm_50;-lineinfo; -cudart=static; -Xptxas; -v)

现在输出是

在 WSL2 上使用 Cuda 给了我 "no kernel image is available for execution on the device."

Using Cuda on WSL2 gives me "no kernel image is available for execution on the device."

c++

cuda

cmake

thrust

wsl-2