为什么 cuda-gdb 显示意外的内存值?
Why cuda-gdb shows unexpected memory values?
我正在用 nvcc -g -G gdbfail.cu
.
编译以下代码片段
#include <cstdio>
#include <cinttypes>
__global__ void mykernel() {
uint8_t* ptr = (uint8_t*) malloc(8);
for (int i = 0; i < 8; i++) {
ptr[i] = 7 - i;
}
for (int i = 0; i < 8; i++) { // PUT BREAKPOINT HERE
printf("%" PRIx8 " ", ptr[i]);
}
printf("\n");
}
int main() {
uint8_t* ptr = (uint8_t*) malloc(8);
for (int i = 0; i < 8; i++) {
ptr[i] = 7 - i;
}
for (int i = 0; i < 8; i++) { // PUT BREAKPOINT HERE
printf("%" PRIx8 " ", ptr[i]);
}
printf("\n");
mykernel<<<1,1>>>();
cudaDeviceSynchronize();
}
当我 运行 cuda-gdb ./a.out
并将断点放在第 10 行 (b 10
),运行 代码 (r
),并尝试打印ptr
中地址的值我得到了令人惊讶的结果
(cuda-gdb) x/8b ptr
0x7fffcddff920: 7 6 5 4 3 2 1 0
(cuda-gdb) x/8b 0x7fffcddff920
0x7fffcddff920: 0 0 0 0 0 0 0 0
当我在主机代码(b 23
、r
)中做同样的事情时,我得到了预期的结果:
(cuda-gdb) x/8b ptr
0x5555556000a0: 7 6 5 4 3 2 1 0
(cuda-gdb) x/8b 0x5555556000a0
0x5555556000a0: 7 6 5 4 3 2 1 0
为什么 cuda-gdb 在提供地址作为数字 (0x7fffcddff920
) 而不是符号 (ptr
) 时不显示正确的内存值?
显然,并非所有可在主机代码中使用的 gdb
命令功能也可在设备代码中使用。在设备代码中使用时,支持的命令可能具有不同的语法或期望。这在 cuda-gdb docs.
中表示
那些文档 indicate 检查内存的方法是 print
命令,并指出“裸” address/pointer 需要一些额外的解码语法。这是您的示例:
$ cuda-gdb ./t1869
NVIDIA (R) CUDA Debugger
11.4 release
Portions Copyright (C) 2007-2021 NVIDIA Corporation
GNU gdb (GDB) 10.1
Copyright (C) 2020 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from ./t1869...
(cuda-gdb) b 10
Breakpoint 1 at 0x403b05: file t1869.cu, line 14.
(cuda-gdb) r
Starting program: /home/user2/misc/t1869
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
7 6 5 4 3 2 1 0
[Detaching after fork from child process 25822]
[New Thread 0x7fffef475700 (LWP 25829)]
[New Thread 0x7fffeec74700 (LWP 25830)]
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (0,0,0), device 0, sm 0, warp 0, lane 0]
Thread 1 "t1869" hit Breakpoint 1, mykernel<<<(1,1,1),(1,1,1)>>> () at t1869.cu:10
10 for (int i = 0; i < 8; i++) { // PUT BREAKPOINT HERE
(cuda-gdb) x/8b ptr
0x7fffbcdff920: 7 6 5 4 3 2 1 0
(cuda-gdb) p/x *(@global unsigned char *)0x7fffbcdff920@8
= {0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0}
(cuda-gdb)
请注意上面的 print
命令需要一些帮助来解释您期望内存地址指的是哪个“space”(例如 @shared
、@global
、等等)
如果我们为您的命令提供相同的“帮助”,我们会得到预期的结果:
(cuda-gdb) x/8b ptr
0x7fffbcdff920: 7 6 5 4 3 2 1 0
(cuda-gdb) x/8b (@global unsigned char *)0x7fffbcdff920
0x7fffbcdff920: 7 6 5 4 3 2 1 0
(cuda-gdb)
我正在用 nvcc -g -G gdbfail.cu
.
#include <cstdio>
#include <cinttypes>
__global__ void mykernel() {
uint8_t* ptr = (uint8_t*) malloc(8);
for (int i = 0; i < 8; i++) {
ptr[i] = 7 - i;
}
for (int i = 0; i < 8; i++) { // PUT BREAKPOINT HERE
printf("%" PRIx8 " ", ptr[i]);
}
printf("\n");
}
int main() {
uint8_t* ptr = (uint8_t*) malloc(8);
for (int i = 0; i < 8; i++) {
ptr[i] = 7 - i;
}
for (int i = 0; i < 8; i++) { // PUT BREAKPOINT HERE
printf("%" PRIx8 " ", ptr[i]);
}
printf("\n");
mykernel<<<1,1>>>();
cudaDeviceSynchronize();
}
当我 运行 cuda-gdb ./a.out
并将断点放在第 10 行 (b 10
),运行 代码 (r
),并尝试打印ptr
中地址的值我得到了令人惊讶的结果
(cuda-gdb) x/8b ptr
0x7fffcddff920: 7 6 5 4 3 2 1 0
(cuda-gdb) x/8b 0x7fffcddff920
0x7fffcddff920: 0 0 0 0 0 0 0 0
当我在主机代码(b 23
、r
)中做同样的事情时,我得到了预期的结果:
(cuda-gdb) x/8b ptr
0x5555556000a0: 7 6 5 4 3 2 1 0
(cuda-gdb) x/8b 0x5555556000a0
0x5555556000a0: 7 6 5 4 3 2 1 0
为什么 cuda-gdb 在提供地址作为数字 (0x7fffcddff920
) 而不是符号 (ptr
) 时不显示正确的内存值?
显然,并非所有可在主机代码中使用的 gdb
命令功能也可在设备代码中使用。在设备代码中使用时,支持的命令可能具有不同的语法或期望。这在 cuda-gdb docs.
那些文档 indicate 检查内存的方法是 print
命令,并指出“裸” address/pointer 需要一些额外的解码语法。这是您的示例:
$ cuda-gdb ./t1869
NVIDIA (R) CUDA Debugger
11.4 release
Portions Copyright (C) 2007-2021 NVIDIA Corporation
GNU gdb (GDB) 10.1
Copyright (C) 2020 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.
Type "show copying" and "show warranty" for details.
This GDB was configured as "x86_64-pc-linux-gnu".
Type "show configuration" for configuration details.
For bug reporting instructions, please see:
<https://www.gnu.org/software/gdb/bugs/>.
Find the GDB manual and other documentation resources online at:
<http://www.gnu.org/software/gdb/documentation/>.
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from ./t1869...
(cuda-gdb) b 10
Breakpoint 1 at 0x403b05: file t1869.cu, line 14.
(cuda-gdb) r
Starting program: /home/user2/misc/t1869
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
7 6 5 4 3 2 1 0
[Detaching after fork from child process 25822]
[New Thread 0x7fffef475700 (LWP 25829)]
[New Thread 0x7fffeec74700 (LWP 25830)]
[Switching focus to CUDA kernel 0, grid 1, block (0,0,0), thread (0,0,0), device 0, sm 0, warp 0, lane 0]
Thread 1 "t1869" hit Breakpoint 1, mykernel<<<(1,1,1),(1,1,1)>>> () at t1869.cu:10
10 for (int i = 0; i < 8; i++) { // PUT BREAKPOINT HERE
(cuda-gdb) x/8b ptr
0x7fffbcdff920: 7 6 5 4 3 2 1 0
(cuda-gdb) p/x *(@global unsigned char *)0x7fffbcdff920@8
= {0x7, 0x6, 0x5, 0x4, 0x3, 0x2, 0x1, 0x0}
(cuda-gdb)
请注意上面的 print
命令需要一些帮助来解释您期望内存地址指的是哪个“space”(例如 @shared
、@global
、等等)
如果我们为您的命令提供相同的“帮助”,我们会得到预期的结果:
(cuda-gdb) x/8b ptr
0x7fffbcdff920: 7 6 5 4 3 2 1 0
(cuda-gdb) x/8b (@global unsigned char *)0x7fffbcdff920
0x7fffbcdff920: 7 6 5 4 3 2 1 0
(cuda-gdb)