汇编语言中的 getchar()
getchar() in assembly language
这是一个简单的 C 程序:
#include <stdio.h>
int main(){
getchar();
return 0;
}
这是 gcc 命令的输出 "gcc prog.c -S":
.file "prog.c"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
call getchar
movl [=11=], %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Debian 4.8.2-1) 4.8.2"
.section .note.GNU-stack,"",@progbits
在"main"中调用了"getchar"函数。是否可以查看此函数的代码(当然是汇编代码)?
特别是我会理解"getchar()"背后的键盘中断机制。
如果你想要一个 ASM 输出,你可以简单地要求 gdb
加载你的 C 程序并为你反汇编 getchar
。
这会给出这样的东西(在我的系统上):
$ gdb /bin/cat
(gdb) run
Starting program: /bin/cat
^C
Program received signal SIGINT, Interrupt.
0x00007ffff7b0c5c0 in __read_nocancel () at ../sysdeps/unix/syscall-template.S:81
(gdb) disas getchar
Dump of assembler code for function getchar:
0x00007ffff7aa4b30 <+0>: push %rbx
0x00007ffff7aa4b31 <+1>: mov 0x332be0(%rip),%rbx # 0x7ffff7dd7718 <stdin>
0x00007ffff7aa4b38 <+8>: mov (%rbx),%eax
0x00007ffff7aa4b3a <+10>: mov %rbx,%rdi
0x00007ffff7aa4b3d <+13>: and [=10=]x8000,%eax
0x00007ffff7aa4b42 <+18>: jne 0x7ffff7aa4b9e <getchar+110>
0x00007ffff7aa4b44 <+20>: mov 0x88(%rbx),%rdx
0x00007ffff7aa4b4b <+27>: mov %fs:0x10,%r8
0x00007ffff7aa4b54 <+36>: cmp 0x8(%rdx),%r8
0x00007ffff7aa4b58 <+40>: je 0x7ffff7aa4bf0 <getchar+192>
0x00007ffff7aa4b5e <+46>: mov [=10=]x1,%esi
0x00007ffff7aa4b63 <+51>: cmpl [=10=]x0,0x336c6a(%rip) # 0x7ffff7ddb7d4 <__libc_multiple_threads>
0x00007ffff7aa4b6a <+58>: je 0x7ffff7aa4b78 <getchar+72>
0x00007ffff7aa4b6c <+60>: lock cmpxchg %esi,(%rdx)
0x00007ffff7aa4b70 <+64>: jne 0x7ffff7aa4c41 <_L_lock_30>
0x00007ffff7aa4b76 <+70>: jmp 0x7ffff7aa4b81 <getchar+81>
0x00007ffff7aa4b78 <+72>: cmpxchg %esi,(%rdx)
0x00007ffff7aa4b7b <+75>: jne 0x7ffff7aa4c41 <_L_lock_30>
0x00007ffff7aa4b81 <+81>: mov 0x88(%rbx),%rax
0x00007ffff7aa4b88 <+88>: mov 0x88(%rbx),%rdx
0x00007ffff7aa4b8f <+95>: mov 0x332b82(%rip),%rdi # 0x7ffff7dd7718 <stdin>
0x00007ffff7aa4b96 <+102>: mov %r8,0x8(%rax)
0x00007ffff7aa4b9a <+106>: addl [=10=]x1,0x4(%rdx)
0x00007ffff7aa4b9e <+110>: mov 0x8(%rdi),%rax
0x00007ffff7aa4ba2 <+114>: cmp 0x10(%rdi),%rax
0x00007ffff7aa4ba6 <+118>: jae 0x7ffff7aa4bf5 <getchar+197>
0x00007ffff7aa4ba8 <+120>: lea 0x1(%rax),%rdx
0x00007ffff7aa4bac <+124>: mov %rdx,0x8(%rdi)
0x00007ffff7aa4bb0 <+128>: movzbl (%rax),%edx
0x00007ffff7aa4bb3 <+131>: testl [=10=]x8000,(%rbx)
0x00007ffff7aa4bb9 <+137>: jne 0x7ffff7aa4be4 <getchar+180>
0x00007ffff7aa4bbb <+139>: mov 0x88(%rbx),%rsi
0x00007ffff7aa4bc2 <+146>: subl [=10=]x1,0x4(%rsi)
0x00007ffff7aa4bc6 <+150>: jne 0x7ffff7aa4be4 <getchar+180>
0x00007ffff7aa4bc8 <+152>: movq [=10=]x0,0x8(%rsi)
0x00007ffff7aa4bd0 <+160>: cmpl [=10=]x0,0x336bfd(%rip) # 0x7ffff7ddb7d4 <__libc_multiple_threads>
0x00007ffff7aa4bd7 <+167>: je 0x7ffff7aa4be0 <getchar+176>
0x00007ffff7aa4bd9 <+169>: lock decl (%rsi)
0x00007ffff7aa4bdc <+172>: jne 0x7ffff7aa4c5c <_L_unlock_79>
0x00007ffff7aa4bde <+174>: jmp 0x7ffff7aa4be4 <getchar+180>
0x00007ffff7aa4be0 <+176>: decl (%rsi)
0x00007ffff7aa4be2 <+178>: jne 0x7ffff7aa4c5c <_L_unlock_79>
0x00007ffff7aa4be4 <+180>: mov %edx,%eax
0x00007ffff7aa4be6 <+182>: pop %rbx
0x00007ffff7aa4be7 <+183>: retq
0x00007ffff7aa4be8 <+184>: nopl 0x0(%rax,%rax,1)
0x00007ffff7aa4bf0 <+192>: mov %rbx,%rdi
0x00007ffff7aa4bf3 <+195>: jmp 0x7ffff7aa4b9a <getchar+106>
0x00007ffff7aa4bf5 <+197>: callq 0x7ffff7aa9060 <__GI___uflow>
0x00007ffff7aa4bfa <+202>: mov %eax,%edx
0x00007ffff7aa4bfc <+204>: jmp 0x7ffff7aa4bb3 <getchar+131>
0x00007ffff7aa4bfe <+206>: testl [=10=]x8000,(%rbx)
0x00007ffff7aa4c04 <+212>: mov %rax,%rsi
0x00007ffff7aa4c07 <+215>: jne 0x7ffff7aa4c39 <getchar+265>
0x00007ffff7aa4c09 <+217>: mov 0x88(%rbx),%rdx
0x00007ffff7aa4c10 <+224>: mov 0x4(%rdx),%eax
0x00007ffff7aa4c13 <+227>: lea -0x1(%rax),%ecx
0x00007ffff7aa4c16 <+230>: test %ecx,%ecx
0x00007ffff7aa4c18 <+232>: mov %ecx,0x4(%rdx)
0x00007ffff7aa4c1b <+235>: jne 0x7ffff7aa4c39 <getchar+265>
0x00007ffff7aa4c1d <+237>: movq [=10=]x0,0x8(%rdx)
0x00007ffff7aa4c25 <+245>: cmpl [=10=]x0,0x336ba8(%rip) # 0x7ffff7ddb7d4 <__libc_multiple_threads>
0x00007ffff7aa4c2c <+252>: je 0x7ffff7aa4c35 <getchar+261>
0x00007ffff7aa4c2e <+254>: lock decl (%rdx)
0x00007ffff7aa4c31 <+257>: jne 0x7ffff7aa4c77 <_L_unlock_150>
0x00007ffff7aa4c33 <+259>: jmp 0x7ffff7aa4c39 <getchar+265>
0x00007ffff7aa4c35 <+261>: decl (%rdx)
0x00007ffff7aa4c37 <+263>: jne 0x7ffff7aa4c77 <_L_unlock_150>
0x00007ffff7aa4c39 <+265>: mov %rsi,%rdi
0x00007ffff7aa4c3c <+268>: callq 0x7ffff7a54d50 <_Unwind_Resume>
End of assembler dump.
您也可以直接查看 the source of your libc 以了解 getchar 是如何实现的,但是这会给您(有时可能是神秘的)C 代码。
getchar()
实现存在于 libc.so
- 您系统上的标准 C 库实现(适用于类 Unix 系统),因此您需要对该库文件应用反汇编程序。
最简单的方法是使用 gdb
调试器(同样,特定于 Unix):
使用libc.so
:
编译和link你的程序
$ gcc -o prog prog.c
运行 GDB:
$ gdb prog
运行 您的程序,因此 libc.so
的 PLT 表将被填充并立即使用 Ctrl + [=49= 停止它]C:
...
(gdb) r
^C
在 getchar()
上应用 gdb
的 disas
子命令:
(gdb) disas getchar
祝你阅读低级汇编
In particular I would understand the mechanism of keyboard interrupt behind the "getchar()".
不,你没有。 getchar()
和硬件键盘中断之间有好几层,主要由 Linux 内核实现:
- 标准 C 库
- 系统调用 - 从用户空间到 OS 内核的一扇门
- tty 子系统或管道
- 输入子系统
- (可选)USB 驱动程序包括
usbhid
或 usbkbd
- 基本内核,包括 IRQ 基础结构
阅读反汇编中的所有源代码需要数年时间。你需要从顶层看架构。
这里是 getchar() 的源代码,在 C 中。
你可以编译它,同时要求中间.asm文件
获得你想要的。
/* Copyright (C) 1993-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
17
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
26
27 #include "libioP.h"
28 #include "stdio.h"
29
30 #undef getchar
31
32 int
33 getchar (void)
34 {
35 int result;
36 _IO_acquire_lock (_IO_stdin);
37 result = _IO_getc_unlocked (_IO_stdin);
38 _IO_release_lock (_IO_stdin);
39 return result;
40 }
41
42 #if defined weak_alias && !defined _IO_MTSAFE_IO
43 #undef getchar_unlocked
44 weak_alias (getchar, getchar_unlocked)
45 #endif
这是一个简单的 C 程序:
#include <stdio.h>
int main(){
getchar();
return 0;
}
这是 gcc 命令的输出 "gcc prog.c -S":
.file "prog.c"
.text
.globl main
.type main, @function
main:
.LFB0:
.cfi_startproc
pushl %ebp
.cfi_def_cfa_offset 8
.cfi_offset 5, -8
movl %esp, %ebp
.cfi_def_cfa_register 5
andl $-16, %esp
call getchar
movl [=11=], %eax
leave
.cfi_restore 5
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE0:
.size main, .-main
.ident "GCC: (Debian 4.8.2-1) 4.8.2"
.section .note.GNU-stack,"",@progbits
在"main"中调用了"getchar"函数。是否可以查看此函数的代码(当然是汇编代码)?
特别是我会理解"getchar()"背后的键盘中断机制。
如果你想要一个 ASM 输出,你可以简单地要求 gdb
加载你的 C 程序并为你反汇编 getchar
。
这会给出这样的东西(在我的系统上):
$ gdb /bin/cat
(gdb) run
Starting program: /bin/cat
^C
Program received signal SIGINT, Interrupt.
0x00007ffff7b0c5c0 in __read_nocancel () at ../sysdeps/unix/syscall-template.S:81
(gdb) disas getchar
Dump of assembler code for function getchar:
0x00007ffff7aa4b30 <+0>: push %rbx
0x00007ffff7aa4b31 <+1>: mov 0x332be0(%rip),%rbx # 0x7ffff7dd7718 <stdin>
0x00007ffff7aa4b38 <+8>: mov (%rbx),%eax
0x00007ffff7aa4b3a <+10>: mov %rbx,%rdi
0x00007ffff7aa4b3d <+13>: and [=10=]x8000,%eax
0x00007ffff7aa4b42 <+18>: jne 0x7ffff7aa4b9e <getchar+110>
0x00007ffff7aa4b44 <+20>: mov 0x88(%rbx),%rdx
0x00007ffff7aa4b4b <+27>: mov %fs:0x10,%r8
0x00007ffff7aa4b54 <+36>: cmp 0x8(%rdx),%r8
0x00007ffff7aa4b58 <+40>: je 0x7ffff7aa4bf0 <getchar+192>
0x00007ffff7aa4b5e <+46>: mov [=10=]x1,%esi
0x00007ffff7aa4b63 <+51>: cmpl [=10=]x0,0x336c6a(%rip) # 0x7ffff7ddb7d4 <__libc_multiple_threads>
0x00007ffff7aa4b6a <+58>: je 0x7ffff7aa4b78 <getchar+72>
0x00007ffff7aa4b6c <+60>: lock cmpxchg %esi,(%rdx)
0x00007ffff7aa4b70 <+64>: jne 0x7ffff7aa4c41 <_L_lock_30>
0x00007ffff7aa4b76 <+70>: jmp 0x7ffff7aa4b81 <getchar+81>
0x00007ffff7aa4b78 <+72>: cmpxchg %esi,(%rdx)
0x00007ffff7aa4b7b <+75>: jne 0x7ffff7aa4c41 <_L_lock_30>
0x00007ffff7aa4b81 <+81>: mov 0x88(%rbx),%rax
0x00007ffff7aa4b88 <+88>: mov 0x88(%rbx),%rdx
0x00007ffff7aa4b8f <+95>: mov 0x332b82(%rip),%rdi # 0x7ffff7dd7718 <stdin>
0x00007ffff7aa4b96 <+102>: mov %r8,0x8(%rax)
0x00007ffff7aa4b9a <+106>: addl [=10=]x1,0x4(%rdx)
0x00007ffff7aa4b9e <+110>: mov 0x8(%rdi),%rax
0x00007ffff7aa4ba2 <+114>: cmp 0x10(%rdi),%rax
0x00007ffff7aa4ba6 <+118>: jae 0x7ffff7aa4bf5 <getchar+197>
0x00007ffff7aa4ba8 <+120>: lea 0x1(%rax),%rdx
0x00007ffff7aa4bac <+124>: mov %rdx,0x8(%rdi)
0x00007ffff7aa4bb0 <+128>: movzbl (%rax),%edx
0x00007ffff7aa4bb3 <+131>: testl [=10=]x8000,(%rbx)
0x00007ffff7aa4bb9 <+137>: jne 0x7ffff7aa4be4 <getchar+180>
0x00007ffff7aa4bbb <+139>: mov 0x88(%rbx),%rsi
0x00007ffff7aa4bc2 <+146>: subl [=10=]x1,0x4(%rsi)
0x00007ffff7aa4bc6 <+150>: jne 0x7ffff7aa4be4 <getchar+180>
0x00007ffff7aa4bc8 <+152>: movq [=10=]x0,0x8(%rsi)
0x00007ffff7aa4bd0 <+160>: cmpl [=10=]x0,0x336bfd(%rip) # 0x7ffff7ddb7d4 <__libc_multiple_threads>
0x00007ffff7aa4bd7 <+167>: je 0x7ffff7aa4be0 <getchar+176>
0x00007ffff7aa4bd9 <+169>: lock decl (%rsi)
0x00007ffff7aa4bdc <+172>: jne 0x7ffff7aa4c5c <_L_unlock_79>
0x00007ffff7aa4bde <+174>: jmp 0x7ffff7aa4be4 <getchar+180>
0x00007ffff7aa4be0 <+176>: decl (%rsi)
0x00007ffff7aa4be2 <+178>: jne 0x7ffff7aa4c5c <_L_unlock_79>
0x00007ffff7aa4be4 <+180>: mov %edx,%eax
0x00007ffff7aa4be6 <+182>: pop %rbx
0x00007ffff7aa4be7 <+183>: retq
0x00007ffff7aa4be8 <+184>: nopl 0x0(%rax,%rax,1)
0x00007ffff7aa4bf0 <+192>: mov %rbx,%rdi
0x00007ffff7aa4bf3 <+195>: jmp 0x7ffff7aa4b9a <getchar+106>
0x00007ffff7aa4bf5 <+197>: callq 0x7ffff7aa9060 <__GI___uflow>
0x00007ffff7aa4bfa <+202>: mov %eax,%edx
0x00007ffff7aa4bfc <+204>: jmp 0x7ffff7aa4bb3 <getchar+131>
0x00007ffff7aa4bfe <+206>: testl [=10=]x8000,(%rbx)
0x00007ffff7aa4c04 <+212>: mov %rax,%rsi
0x00007ffff7aa4c07 <+215>: jne 0x7ffff7aa4c39 <getchar+265>
0x00007ffff7aa4c09 <+217>: mov 0x88(%rbx),%rdx
0x00007ffff7aa4c10 <+224>: mov 0x4(%rdx),%eax
0x00007ffff7aa4c13 <+227>: lea -0x1(%rax),%ecx
0x00007ffff7aa4c16 <+230>: test %ecx,%ecx
0x00007ffff7aa4c18 <+232>: mov %ecx,0x4(%rdx)
0x00007ffff7aa4c1b <+235>: jne 0x7ffff7aa4c39 <getchar+265>
0x00007ffff7aa4c1d <+237>: movq [=10=]x0,0x8(%rdx)
0x00007ffff7aa4c25 <+245>: cmpl [=10=]x0,0x336ba8(%rip) # 0x7ffff7ddb7d4 <__libc_multiple_threads>
0x00007ffff7aa4c2c <+252>: je 0x7ffff7aa4c35 <getchar+261>
0x00007ffff7aa4c2e <+254>: lock decl (%rdx)
0x00007ffff7aa4c31 <+257>: jne 0x7ffff7aa4c77 <_L_unlock_150>
0x00007ffff7aa4c33 <+259>: jmp 0x7ffff7aa4c39 <getchar+265>
0x00007ffff7aa4c35 <+261>: decl (%rdx)
0x00007ffff7aa4c37 <+263>: jne 0x7ffff7aa4c77 <_L_unlock_150>
0x00007ffff7aa4c39 <+265>: mov %rsi,%rdi
0x00007ffff7aa4c3c <+268>: callq 0x7ffff7a54d50 <_Unwind_Resume>
End of assembler dump.
您也可以直接查看 the source of your libc 以了解 getchar 是如何实现的,但是这会给您(有时可能是神秘的)C 代码。
getchar()
实现存在于 libc.so
- 您系统上的标准 C 库实现(适用于类 Unix 系统),因此您需要对该库文件应用反汇编程序。
最简单的方法是使用 gdb
调试器(同样,特定于 Unix):
使用
编译和link你的程序libc.so
:$ gcc -o prog prog.c
运行 GDB:
$ gdb prog
运行 您的程序,因此
libc.so
的 PLT 表将被填充并立即使用 Ctrl + [=49= 停止它]C:... (gdb) r ^C
在
getchar()
上应用gdb
的disas
子命令:(gdb) disas getchar
祝你阅读低级汇编
In particular I would understand the mechanism of keyboard interrupt behind the "getchar()".
不,你没有。 getchar()
和硬件键盘中断之间有好几层,主要由 Linux 内核实现:
- 标准 C 库
- 系统调用 - 从用户空间到 OS 内核的一扇门
- tty 子系统或管道
- 输入子系统
- (可选)USB 驱动程序包括
usbhid
或usbkbd
- 基本内核,包括 IRQ 基础结构
阅读反汇编中的所有源代码需要数年时间。你需要从顶层看架构。
这里是 getchar() 的源代码,在 C 中。
你可以编译它,同时要求中间.asm文件 获得你想要的。
/* Copyright (C) 1993-2015 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
17
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
26
27 #include "libioP.h"
28 #include "stdio.h"
29
30 #undef getchar
31
32 int
33 getchar (void)
34 {
35 int result;
36 _IO_acquire_lock (_IO_stdin);
37 result = _IO_getc_unlocked (_IO_stdin);
38 _IO_release_lock (_IO_stdin);
39 return result;
40 }
41
42 #if defined weak_alias && !defined _IO_MTSAFE_IO
43 #undef getchar_unlocked
44 weak_alias (getchar, getchar_unlocked)
45 #endif