从程序集 (x86/64) 调用读取系统调用会产生分段错误(编译器构造)
Calling read syscall from assembly (x86/64) yields segmentation fault (compiler construction)
我正在为类 C 语言构建一个编译器,我正在尝试 link 一个在汇编中实现的基本 "void readString(int, char*)" 函数,由我的编译器生成汇编。
编译后的类c文件为
void main () {
char t[20];
readString(7,t); // Read 7 bytes and place them in t buffer
}
编译后,生成的文件是:out.s 文件(注意调用约定是:通过堆栈传递参数。在这种语言中整数也有 2 个字节大小):
.[=12=]:
.globl main
main:
pushq %rbp
movq %rsp,%rbp
subq ,%rsp
.:
movw ,%ax # Push first argument in the stack
pushw %ax
.:
leaq -20(%rbp),%rax # Push address of the second arg in the stack
pushq %rax
.:
subq ,%rsp # this is not important, needed for the convention being followed
pushq 16(%rbp) # pushing "access link",
call _readString
addq ,%rsp # caller clears the "leftovers"
.:
.$main_0_11:
movq %rbp,%rsp
popq %rbp
ret
reads.asm 中库函数的代码:
.intel_syntax noprefix
.global _readString
_readString push rbp
mov rbp, rsp
push rdi
push rsi
mov rdi, [rbp+32] # First argument
mov rsi, [rbp+34] # Second Argument
mov rdx, rdi
doRead:
mov byte ptr [rsi], 0x00
xor rax, rax
mov rdi, rax
syscall # read syscall, reads up to $rdx bytes
or rax, rax # nothing read
jz finish #
add rsi, rax #
cmp byte ptr [rsi-1], 0x0a # check if last character read was '\n'
jne addZero #
sub rsi, 1 # if so, replace with '[=13=]'
addZero:
mov byte ptr [rsi], 0x00
finish:
pop rsi
pop rdi
pop rbp
ret
接下来是linking/running
$ gcc -c out.s
$ gcc -c reads.s
$ gcc out.o reads.o
$ ./a.out
[2] Segmentation fault ./a.out
描述调用约定的图片
好的,正如@David Hoelzer 建议的那样,我再次检查了 "readString" 从堆栈中获取参数的方式,并且它们按顺序颠倒了!
所以reads.asm的第一行会变成
_readString:
push rbp
mov rbp, rsp
push rdi
push rsi
movzx edi, word ptr [rbp+40] # Get first argument, check comments for more info
mov rsi, [rbp+32] # Second Argument
mov rdx, rdi
我正在为类 C 语言构建一个编译器,我正在尝试 link 一个在汇编中实现的基本 "void readString(int, char*)" 函数,由我的编译器生成汇编。
编译后的类c文件为
void main () {
char t[20];
readString(7,t); // Read 7 bytes and place them in t buffer
}
编译后,生成的文件是:out.s 文件(注意调用约定是:通过堆栈传递参数。在这种语言中整数也有 2 个字节大小):
.[=12=]:
.globl main
main:
pushq %rbp
movq %rsp,%rbp
subq ,%rsp
.:
movw ,%ax # Push first argument in the stack
pushw %ax
.:
leaq -20(%rbp),%rax # Push address of the second arg in the stack
pushq %rax
.:
subq ,%rsp # this is not important, needed for the convention being followed
pushq 16(%rbp) # pushing "access link",
call _readString
addq ,%rsp # caller clears the "leftovers"
.:
.$main_0_11:
movq %rbp,%rsp
popq %rbp
ret
reads.asm 中库函数的代码:
.intel_syntax noprefix
.global _readString
_readString push rbp
mov rbp, rsp
push rdi
push rsi
mov rdi, [rbp+32] # First argument
mov rsi, [rbp+34] # Second Argument
mov rdx, rdi
doRead:
mov byte ptr [rsi], 0x00
xor rax, rax
mov rdi, rax
syscall # read syscall, reads up to $rdx bytes
or rax, rax # nothing read
jz finish #
add rsi, rax #
cmp byte ptr [rsi-1], 0x0a # check if last character read was '\n'
jne addZero #
sub rsi, 1 # if so, replace with '[=13=]'
addZero:
mov byte ptr [rsi], 0x00
finish:
pop rsi
pop rdi
pop rbp
ret
接下来是linking/running
$ gcc -c out.s
$ gcc -c reads.s
$ gcc out.o reads.o
$ ./a.out
[2] Segmentation fault ./a.out
描述调用约定的图片
好的,正如@David Hoelzer 建议的那样,我再次检查了 "readString" 从堆栈中获取参数的方式,并且它们按顺序颠倒了!
所以reads.asm的第一行会变成
_readString:
push rbp
mov rbp, rsp
push rdi
push rsi
movzx edi, word ptr [rbp+40] # Get first argument, check comments for more info
mov rsi, [rbp+32] # Second Argument
mov rdx, rdi