汇编语言中的 getchar()

getchar() in assembly language

这是一个简单的 C 程序:

#include <stdio.h>

int main(){
    getchar();
    return 0;
}

这是 gcc 命令的输出 "gcc prog.c -S":

    .file   "prog.c"
    .text
    .globl  main
    .type   main, @function
main:
.LFB0:
    .cfi_startproc
    pushl   %ebp
    .cfi_def_cfa_offset 8
    .cfi_offset 5, -8
    movl    %esp, %ebp
    .cfi_def_cfa_register 5
    andl    $-16, %esp
    call getchar
    movl    [=11=], %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
.LFE0:
    .size   main, .-main
    .ident  "GCC: (Debian 4.8.2-1) 4.8.2"
    .section    .note.GNU-stack,"",@progbits

在"main"中调用了"getchar"函数。是否可以查看此函数的代码(当然是汇编代码)?

特别是我会理解"getchar()"背后的键盘中断机制。

如果你想要一个 ASM 输出,你可以简单地要求 gdb 加载你的 C 程序并为你反汇编 getchar。 这会给出这样的东西(在我的系统上):

$ gdb /bin/cat
(gdb) run
Starting program: /bin/cat 
^C
Program received signal SIGINT, Interrupt.
0x00007ffff7b0c5c0 in __read_nocancel () at ../sysdeps/unix/syscall-template.S:81
(gdb) disas getchar 
Dump of assembler code for function getchar:
   0x00007ffff7aa4b30 <+0>:     push   %rbx
   0x00007ffff7aa4b31 <+1>:     mov    0x332be0(%rip),%rbx        # 0x7ffff7dd7718 <stdin>
   0x00007ffff7aa4b38 <+8>:     mov    (%rbx),%eax
   0x00007ffff7aa4b3a <+10>:    mov    %rbx,%rdi
   0x00007ffff7aa4b3d <+13>:    and    [=10=]x8000,%eax
   0x00007ffff7aa4b42 <+18>:    jne    0x7ffff7aa4b9e <getchar+110>
   0x00007ffff7aa4b44 <+20>:    mov    0x88(%rbx),%rdx
   0x00007ffff7aa4b4b <+27>:    mov    %fs:0x10,%r8
   0x00007ffff7aa4b54 <+36>:    cmp    0x8(%rdx),%r8
   0x00007ffff7aa4b58 <+40>:    je     0x7ffff7aa4bf0 <getchar+192>
   0x00007ffff7aa4b5e <+46>:    mov    [=10=]x1,%esi
   0x00007ffff7aa4b63 <+51>:    cmpl   [=10=]x0,0x336c6a(%rip)        # 0x7ffff7ddb7d4 <__libc_multiple_threads>
   0x00007ffff7aa4b6a <+58>:    je     0x7ffff7aa4b78 <getchar+72>
   0x00007ffff7aa4b6c <+60>:    lock cmpxchg %esi,(%rdx)
   0x00007ffff7aa4b70 <+64>:    jne    0x7ffff7aa4c41 <_L_lock_30>
   0x00007ffff7aa4b76 <+70>:    jmp    0x7ffff7aa4b81 <getchar+81>
   0x00007ffff7aa4b78 <+72>:    cmpxchg %esi,(%rdx)
   0x00007ffff7aa4b7b <+75>:    jne    0x7ffff7aa4c41 <_L_lock_30>
   0x00007ffff7aa4b81 <+81>:    mov    0x88(%rbx),%rax
   0x00007ffff7aa4b88 <+88>:    mov    0x88(%rbx),%rdx
   0x00007ffff7aa4b8f <+95>:    mov    0x332b82(%rip),%rdi        # 0x7ffff7dd7718 <stdin>
   0x00007ffff7aa4b96 <+102>:   mov    %r8,0x8(%rax)
   0x00007ffff7aa4b9a <+106>:   addl   [=10=]x1,0x4(%rdx)
   0x00007ffff7aa4b9e <+110>:   mov    0x8(%rdi),%rax
   0x00007ffff7aa4ba2 <+114>:   cmp    0x10(%rdi),%rax
   0x00007ffff7aa4ba6 <+118>:   jae    0x7ffff7aa4bf5 <getchar+197>
   0x00007ffff7aa4ba8 <+120>:   lea    0x1(%rax),%rdx
   0x00007ffff7aa4bac <+124>:   mov    %rdx,0x8(%rdi)
   0x00007ffff7aa4bb0 <+128>:   movzbl (%rax),%edx
   0x00007ffff7aa4bb3 <+131>:   testl  [=10=]x8000,(%rbx)
   0x00007ffff7aa4bb9 <+137>:   jne    0x7ffff7aa4be4 <getchar+180>
   0x00007ffff7aa4bbb <+139>:   mov    0x88(%rbx),%rsi
   0x00007ffff7aa4bc2 <+146>:   subl   [=10=]x1,0x4(%rsi)
   0x00007ffff7aa4bc6 <+150>:   jne    0x7ffff7aa4be4 <getchar+180>
   0x00007ffff7aa4bc8 <+152>:   movq   [=10=]x0,0x8(%rsi)
   0x00007ffff7aa4bd0 <+160>:   cmpl   [=10=]x0,0x336bfd(%rip)        # 0x7ffff7ddb7d4 <__libc_multiple_threads>
   0x00007ffff7aa4bd7 <+167>:   je     0x7ffff7aa4be0 <getchar+176>
   0x00007ffff7aa4bd9 <+169>:   lock decl (%rsi)
   0x00007ffff7aa4bdc <+172>:   jne    0x7ffff7aa4c5c <_L_unlock_79>
   0x00007ffff7aa4bde <+174>:   jmp    0x7ffff7aa4be4 <getchar+180>
   0x00007ffff7aa4be0 <+176>:   decl   (%rsi)
   0x00007ffff7aa4be2 <+178>:   jne    0x7ffff7aa4c5c <_L_unlock_79>
   0x00007ffff7aa4be4 <+180>:   mov    %edx,%eax
   0x00007ffff7aa4be6 <+182>:   pop    %rbx
   0x00007ffff7aa4be7 <+183>:   retq   
   0x00007ffff7aa4be8 <+184>:   nopl   0x0(%rax,%rax,1)
   0x00007ffff7aa4bf0 <+192>:   mov    %rbx,%rdi
   0x00007ffff7aa4bf3 <+195>:   jmp    0x7ffff7aa4b9a <getchar+106>
   0x00007ffff7aa4bf5 <+197>:   callq  0x7ffff7aa9060 <__GI___uflow>
   0x00007ffff7aa4bfa <+202>:   mov    %eax,%edx
   0x00007ffff7aa4bfc <+204>:   jmp    0x7ffff7aa4bb3 <getchar+131>
   0x00007ffff7aa4bfe <+206>:   testl  [=10=]x8000,(%rbx)
   0x00007ffff7aa4c04 <+212>:   mov    %rax,%rsi
   0x00007ffff7aa4c07 <+215>:   jne    0x7ffff7aa4c39 <getchar+265>
   0x00007ffff7aa4c09 <+217>:   mov    0x88(%rbx),%rdx
   0x00007ffff7aa4c10 <+224>:   mov    0x4(%rdx),%eax
   0x00007ffff7aa4c13 <+227>:   lea    -0x1(%rax),%ecx
   0x00007ffff7aa4c16 <+230>:   test   %ecx,%ecx
   0x00007ffff7aa4c18 <+232>:   mov    %ecx,0x4(%rdx)
   0x00007ffff7aa4c1b <+235>:   jne    0x7ffff7aa4c39 <getchar+265>
   0x00007ffff7aa4c1d <+237>:   movq   [=10=]x0,0x8(%rdx)
   0x00007ffff7aa4c25 <+245>:   cmpl   [=10=]x0,0x336ba8(%rip)        # 0x7ffff7ddb7d4 <__libc_multiple_threads>
   0x00007ffff7aa4c2c <+252>:   je     0x7ffff7aa4c35 <getchar+261>
   0x00007ffff7aa4c2e <+254>:   lock decl (%rdx)
   0x00007ffff7aa4c31 <+257>:   jne    0x7ffff7aa4c77 <_L_unlock_150>
   0x00007ffff7aa4c33 <+259>:   jmp    0x7ffff7aa4c39 <getchar+265>
   0x00007ffff7aa4c35 <+261>:   decl   (%rdx)
   0x00007ffff7aa4c37 <+263>:   jne    0x7ffff7aa4c77 <_L_unlock_150>
   0x00007ffff7aa4c39 <+265>:   mov    %rsi,%rdi
   0x00007ffff7aa4c3c <+268>:   callq  0x7ffff7a54d50 <_Unwind_Resume>
End of assembler dump.

您也可以直接查看 the source of your libc 以了解 getchar 是如何实现的,但是这会给您(有时可能是神秘的)C 代码。

getchar() 实现存在于 libc.so - 您系统上的标准 C 库实现(适用于类 Unix 系统),因此您需要对该库文件应用反汇编程序。

最简单的方法是使用 gdb 调试器(同样,特定于 Unix):

  1. 使用libc.so:

    编译和link你的程序
    $ gcc -o prog prog.c
    
  2. 运行 GDB:

    $ gdb prog
    
  3. 运行 您的程序,因此 libc.so 的 PLT 表将被填充并立即使用 Ctrl + [=49= 停止它]C:

    ...
    (gdb) r
    ^C
    
  4. getchar() 上应用 gdbdisas 子命令:

    (gdb) disas getchar
    
  5. 祝你阅读低级汇编


In particular I would understand the mechanism of keyboard interrupt behind the "getchar()".

不,你没有。 getchar() 和硬件键盘中断之间有好几层,主要由 Linux 内核实现:

  • 标准 C 库
  • 系统调用 - 从用户空间到 OS 内核的一扇门
  • tty 子系统或管道
  • 输入子系统
  • (可选)USB 驱动程序包括 usbhidusbkbd
  • 基本内核,包括 IRQ 基础结构

阅读反汇编中的所有源代码需要数年时间。你需要从顶层看架构。

这里是 getchar() 的源代码,在 C 中。

你可以编译它,同时要求中间.asm文件 获得你想要的。

/* Copyright (C) 1993-2015 Free Software Foundation, Inc.
2  This file is part of the GNU C Library.
3 
4  The GNU C Library is free software; you can redistribute it and/or
5  modify it under the terms of the GNU Lesser General Public
6  License as published by the Free Software Foundation; either
7  version 2.1 of the License, or (at your option) any later version.
8 
9  The GNU C Library is distributed in the hope that it will be useful,
10  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  Lesser General Public License for more details.
13 
14  You should have received a copy of the GNU Lesser General Public
15  License along with the GNU C Library; if not, see
16  <http://www.gnu.org/licenses/>.
17 
18  As a special exception, if you link the code in this file with
19  files compiled with a GNU compiler to produce an executable,
20  that does not cause the resulting executable to be covered by
21  the GNU Lesser General Public License. This exception does not
22  however invalidate any other reasons why the executable file
23  might be covered by the GNU Lesser General Public License.
24  This exception applies to code released by its copyright holders
25  in files containing the exception. */
26 
27 #include "libioP.h"
28 #include "stdio.h"
29 
30 #undef getchar
31 
32 int
33 getchar (void)
34 {
35  int result;
36  _IO_acquire_lock (_IO_stdin);
37  result = _IO_getc_unlocked (_IO_stdin);
38  _IO_release_lock (_IO_stdin);
39  return result;
40 }
41 
42 #if defined weak_alias && !defined _IO_MTSAFE_IO
43 #undef getchar_unlocked
44 weak_alias (getchar, getchar_unlocked)
45 #endif