如何将 cuda-memcheck 应用于具有来自标准 I/O 的管道输入的应用程序
How to apply cuda-memcheck to an app with piped inputs from standard I/O
我想将 cuda-memcheck 用于具有标准 I/O 的应用程序。
应用程序 dut 读取标准输入并写入标准输出。
cat input.txt | cuda-memcheck ./dut -dutoptions > output.txt
在这种情况下,dut 应用程序似乎已启动,但 cuda-memcheck 无法正常工作。
我该怎么做?
一种可能的方法:
将要执行的命令放入脚本中。 运行 cuda-memcheck 在该脚本上。
$ cat input.txt
4
$ cat t2030.cu
#include <iostream>
__global__ void k(int *d){
*d = 5;
}
int main(){
int *d = NULL;
#ifndef FAULT
cudaMalloc(&d, sizeof(d[0]));
#endif
int a;
std::cin >> a;
std::cout << "value read: " << a << std::endl;
k<<<1,1>>>(d);
cudaDeviceSynchronize();
}
$ nvcc -o t2030 t2030.cu
$ cat test
cat input.txt | ./t2030 > output.txt
$ chmod +x test
$ cuda-memcheck ./test
========= CUDA-MEMCHECK
========= ERROR SUMMARY: 0 errors
$ cat output.txt
value read: 4
$ nvcc -DFAULT -o t2030 t2030.cu
$ cuda-memcheck ./test
========= CUDA-MEMCHECK
========= Invalid __global__ write of size 4
========= at 0x00000050 in k(int*)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x00000000 is out of bounds
========= Device Frame:k(int*) (k(int*) : 0x50)
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/lib64/libcuda.so.1 [0x20d6ea]
========= Host Frame:./t2030 [0x864b]
========= Host Frame:./t2030 [0x5ec88]
========= Host Frame:./t2030 [0x3ea0]
========= Host Frame:./t2030 [0x3d81]
========= Host Frame:./t2030 [0x3da9]
========= Host Frame:./t2030 [0x3c2a]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x21b15]
========= Host Frame:./t2030 [0x3a91]
=========
========= Program hit cudaErrorLaunchFailure (error 719) due to "unspecified launch failure" on CUDA API call to cudaDeviceSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/lib64/libcuda.so.1 [0x3bd253]
========= Host Frame:./t2030 [0x3a047]
========= Host Frame:./t2030 [0x3c2f]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x21b15]
========= Host Frame:./t2030 [0x3a91]
=========
========= ERROR SUMMARY: 2 errors
$
请注意,对于 volta 或更新的设备,cuda-memcheck
应替换为 compute-sanitizer
。
我想将 cuda-memcheck 用于具有标准 I/O 的应用程序。
应用程序 dut 读取标准输入并写入标准输出。
cat input.txt | cuda-memcheck ./dut -dutoptions > output.txt
在这种情况下,dut 应用程序似乎已启动,但 cuda-memcheck 无法正常工作。
我该怎么做?
一种可能的方法:
将要执行的命令放入脚本中。 运行 cuda-memcheck 在该脚本上。
$ cat input.txt
4
$ cat t2030.cu
#include <iostream>
__global__ void k(int *d){
*d = 5;
}
int main(){
int *d = NULL;
#ifndef FAULT
cudaMalloc(&d, sizeof(d[0]));
#endif
int a;
std::cin >> a;
std::cout << "value read: " << a << std::endl;
k<<<1,1>>>(d);
cudaDeviceSynchronize();
}
$ nvcc -o t2030 t2030.cu
$ cat test
cat input.txt | ./t2030 > output.txt
$ chmod +x test
$ cuda-memcheck ./test
========= CUDA-MEMCHECK
========= ERROR SUMMARY: 0 errors
$ cat output.txt
value read: 4
$ nvcc -DFAULT -o t2030 t2030.cu
$ cuda-memcheck ./test
========= CUDA-MEMCHECK
========= Invalid __global__ write of size 4
========= at 0x00000050 in k(int*)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x00000000 is out of bounds
========= Device Frame:k(int*) (k(int*) : 0x50)
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:/lib64/libcuda.so.1 [0x20d6ea]
========= Host Frame:./t2030 [0x864b]
========= Host Frame:./t2030 [0x5ec88]
========= Host Frame:./t2030 [0x3ea0]
========= Host Frame:./t2030 [0x3d81]
========= Host Frame:./t2030 [0x3da9]
========= Host Frame:./t2030 [0x3c2a]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x21b15]
========= Host Frame:./t2030 [0x3a91]
=========
========= Program hit cudaErrorLaunchFailure (error 719) due to "unspecified launch failure" on CUDA API call to cudaDeviceSynchronize.
========= Saved host backtrace up to driver entry point at error
========= Host Frame:/lib64/libcuda.so.1 [0x3bd253]
========= Host Frame:./t2030 [0x3a047]
========= Host Frame:./t2030 [0x3c2f]
========= Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5) [0x21b15]
========= Host Frame:./t2030 [0x3a91]
=========
========= ERROR SUMMARY: 2 errors
$
请注意,对于 volta 或更新的设备,cuda-memcheck
应替换为 compute-sanitizer
。