gdb - 如何使用 gdb 捕获损坏

gdb - how to trap corruption using gdb

我正在尝试查找 where/when 损坏发生在新程序中。该程序只有 495 行,gdb 并没有帮我调试它。 (至少,不是我目前的知识集。)考虑以下内容:

> gdb psgrep-2020
(comments omitted)
Reading symbols from psgrep-2020...
(gdb) b 466
Breakpoint 1 at 0x3073: file psgrep-2020.c, line 466.
(gdb) run -F dnsmasq
Starting program: /usr/local/src/psgrep-2022/psgrep-2020 -F dnsmasq

Breakpoint 1, showProcess (pid=893) at psgrep-2020.c:466
466         if (printCmdline) {
(gdb) step
467             procNameFromCmdline(pid, strWork, sizeof(strWork), TRUE) ;
(gdb) p pid
 = 893
(gdb) p strWork
 = '[=10=]0' <repeats 1023 times>
(gdb) print sizeof(strWork)
 = 1024
(gdb) step
procNameFromCmdline (pid=0, result=0x0, resultLen=0, fullCmd=0 '[=10=]0') at psgrep-2020.c:58
58  int procNameFromCmdline(pid_t pid, char *result, int resultLen, BOOL fullCmd) {
(gdb) 

在被调用进程 (procNameFromCmdline) 的初始阶段,我们可以看到每个参数都不正确(通过#define,TRUE 等于 1)。有时 gdb 显示如下:

procNameFromCmdline (pid=0, result=0x19c5b4 <error: Cannot access memory at address 0x19c5b4>, resultLen=1689012, fullCmd=0 '[=11=]0') 

我不是要让别人帮我找出问题;我想做的是找到一种可以检测程序何时损坏的方法。我相信我所有的 memset、snprintf() 等都得到了正确的约束;显然有些事情出了差错。

如果有任何帮助并正确看待事情,这里是调用之前的周围代码...

    fpProcFile = fopen(sProcPath, "rt") ; // Open the stat file for reading text
    if (fpProcFile) {
        fscanf(fpProcFile
            , "%d %s %c %d %d %d %d %d %u %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld "
              "%ld %llu %lu %ld %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u "
              "%u %llu %lu %ld %lu %lu %lu %lu %lu %lu %lu %d"
            , &(s->pid),           s->comm,       &(s->state),       &(s->ppid),        &(s->pgrp)
            , &(s->session),     &(s->tty_nr),    &(s->tpgid),       &(s->flags),       &(s->minflt)
            , &(s->cminflt),     &(s->majflt),    &(s->cmajflt),     &(s->utime),       &(s->stime)
            , &(s->cutime),      &(s->cstime),    &(s->priority),    &(s->nice),        &(s->num_threads)
            , &(s->itrealvalue), &(s->starttime), &(s->vsize),       &(s->rss),         &(s->rsslim)
            , &(s->startcode),   &(s->endcode),   &(s->startstack),  &(s->kstkesp),     &(s->kstkeip)
            , &(s->signal),      &(s->blocked),   &(s->sigignore),   &(s->sigcatch),    &(s->wchan)
            , &(s->nswap),       &(s->cnswap),    &(s->exit_signal), &(s->processor),   &(s->rt_priority)
            , &(s->policy),      &(s->delayacct_blkio_ticks)
                                               ,  &(s->guest_time),  &(s->cguest_time), &(s->start_data)
            , &(s->end_data),    &(s->start_brk), &(s->arg_start),   &(s->arg_end),     &(s->env_start)
            , &(s->env_end),     &(s->exit_code)
             ) ;
        fclose(fpProcFile) ;
        processName = s->comm ;
        memset(strWork, 0x00, sizeof(strWork)) ;
        if (printCmdline) {
            procNameFromCmdline(pid, strWork, sizeof(strWork), TRUE) ;

(fscanf 中唯一的 %s 指向一个 char[65535] 并且 /fproc/893/stat 中该字段的值的长度为 9 加上一个终止符。根据文档,16 就足够了。但那不是无论如何要点。)

有办法吗?我需要更专业的调试器吗?

(虽然我不是在找人来解决这个程序的问题,但它似乎引起了一些兴趣。鉴于此,我发布了引用代码中使用的结构。)这记录在linux 内核源代码 (fs/proc/array.c) 和(不是我的版本)可以在 [这里][1] 和许多其他地方看到。

struct myProcStat {
    int pid ;       // Process ID
    char comm[65535] ; // Command name limited to 16 bytes
    char state ;    // R=Running S=Sleeping D=WaitingDisk Z=Zombie T=Stopped 
                    // t=TracingStopped W=Paging X=Dead x=Dead K=Wakekill
                    // W=Waking P=Parked
    int ppid ;      // Parent process ID
    int pgrp ;      // Process group ID
    int session ;   // Session ID
    int tty_nr ;    // Controlling terminal
    int tpgid ;     // Foreground process group
    unsigned int flags ; // Kernel flags
    unsigned long int minflt ; // Number of minor faults
    unsigned long int cminflt ; // Children's minor faults
    unsigned long int majflt ; // Number of major faults
    unsigned long int cmajflt ; // Children's major faults
    unsigned long int utime ; // Amount of time scheduled user mode
    unsigned long int stime ; // Amount of time scheduled kernel mode
    long int cutime ;   // Amount of time waited-for children scheduled user mode
    long int cstime ;   // Amount of time waited-for children scheduled kernel mode
    long int priority ; // Priority running real-time scheduling policy
    long int nice ;     // Nice value
    long int num_threads ; // Number of threads in this process
    long int itrealvalue ; // Time in jiffies before next SIGALARM is sent
// 21 above, 22 next ...
    unsigned long long int starttime ; // Start tine (in clock ticks) after system boot (divide by sysconf(_SC_CLK_TCK))
    unsigned long int vsize ; // Virtual memory size in bytes
    long int rss ;  // Resident set size
    unsigned long int rsslim ; // Current soft limit in bytes on rss
    unsigned long int startcode ; // address above which text can be run
    unsigned long int endcode ;   // Address below which text can be run
    unsigned long int startstack ; // Address of the start (bottom) of the stack
    unsigned long int kstkesp ; // Current stack pointer from kernel perspective
    unsigned long int kstkeip ; // Current EIP (instruction pointer)
    unsigned long int signal ; // Bitmap of pending signals as a decimal number. Obsolete. use /proc/[pid]/status instead.
    unsigned long int blocked ; // Bitmap of blocked signals. Obsolete. Use /proc/[pid]/status instead
    unsigned long int sigignore ; // Bitmap of ignored signals. Obsolete, use /proc/[pid]/status instead
    unsigned long int sigcatch ; // Bitmap of caught signals.  Use /proc/[pid]/status instead
    unsigned long int wchan ; // Channel in which process is waiting. Use with /proc/[pid]/wchan
    unsigned long int nswap ; // Number of pages swapped (not maintained - ignore)
    unsigned long int cnswap ; // Number of child process pages swapped (not maintained - ignore)
    int exit_signal ; // Signal to be sent to parent upon death
    int processor ;   // CPU last executed on
    unsigned int rt_priority ; // Real-time scheduling priority
    unsigned int policy ; // Scheduling policy for real-time scheduling
    unsigned long long int delayacct_blkio_ticks ; // Aggregated block I/O delays, in clock ticks
    unsigned long int guest_time ; // Guest time (time spent running virtual CPU for guest OS)
    unsigned long int cguest_time ; // Guest time of processes' children
    unsigned long int start_data ; // Address above which program BSS data are placed
    unsigned long int end_data ; // Address below which program BSS data are placed
    unsigned long int start_brk ; // Address above which program heap can be expanded
    unsigned long int arg_start ; // Address above which program command-line arguments (argv) are placed
    unsigned long int arg_end ; // Address below which argv are placed
    unsigned long int env_start ; // Address above which environment is placed
    unsigned long int env_end ; // Address below which environment is placed
    int exit_code ; // The thread's exit status in form reported by waitpid(2)
    } ;


  [1]: https://elixir.bootlin.com/linux/latest/source/fs/proc/array.c

At the inception of the called process (procNameFromCmdline) we can see that every parameter is incorrect

这很可能意味着 GDB 没有跳过函数序言(就像它应该的那样)。很可能是因为 this.

如果你再做一个stepnext,参数会突然又正确了。

请注意,上述错误已在较新的 GDB 版本中修复,因此更新 GDB 是另一种解决方案。