在 x64 ASM 中循环并打印 argv[]

Question

我一直在研究 while 循环来遍历所有 CLI 参数。在研究仅打印 1 个元素的解决方案时，我注意到了一些事情；这就是让我来到这里的思考过程。

我注意到如果我这样做 lea 16(%rsp), %someRegisterToWrite，我能够 get/print argv[1]。接下来我尝试了 lea 24(%rsp), %someRTW，这让我可以访问 argv[2]。我一直往上看它是否会继续工作并且确实如此。

我的想法是继续将 8 添加到 %someRTW 并递增 "counter" 直到计数器等于 argc。下面的代码在输入单个参数时效果很好，但在输入 2 个参数时不打印任何内容，当我输入 3 个参数时，它将打印前 2 个，中间没有 space。

.section __DATA,__data
.section __TEXT,__text
.globl _main
_main:
    lea (%rsp), %rbx        #argc
    lea 16(%rsp), %rcx      #argv[1]
    mov [=10=]x2, %r14          #counter
    L1:
    mov (%rcx), %rsi        #%rsi = user_addr_t cbuf
    mov (%rcx), %r10
    mov 16(%rcx), %r11      
    sub %r10, %r11          #Get number of bytes until next arg
    mov [=10=]x2000004, %eax    #4 = write
    mov , %edi            #edi = file descriptor 
    mov %r11, %rdx          #user_size_t nbyte
    syscall
    cmp (%rbx), %r14        #if counter < argc
    jb L2
    jge L3
    L2:
    inc %r14                
    mov 8(%rcx), %rcx       #mov 24(%rsp) back into %rcx
    mov [=10=]x2000004, %eax
    mov [=10=]x20, %rsi         #0x20 = space
    mov , %rdx
    syscall
    jmp L1
    L3:
    xor %rax, %rax
    xor %edi, %edi
    mov [=10=]x2000001, %eax
    syscall

Answer 1

你已经猜对了，堆栈上的第一个参数是参数的数量，第三个和后面的是cli-arguments。第二个是程序的实际名称。您不必关心 argc，因为您可以弹出堆栈直到值为零。一个简单的解决方案是：

add [=10=]x10, %rsp
L0:
  pop %rsi
  or %rsi, %rsi
  jz L2
  mov %rsi, %rdi
  xor %rdx, %rdx
  L1:
    mov (%rsi), %al
    inc %rsi
    inc %rdx
    or %al, %al
  jnz L1
  ;%rdx - len(argument)
  ;%rdi - argument
  ;< do something with the argument >
  jmp L0
L2:

如果你想在每个参数后有一个 space 或换行符，只需打印它 :).

lea (newline), %rsi
mov [=11=]x02, %rdx
mov STDOUT, %rdi
mov sys_write, %rax
[...]
newline db 13, 10, 0

我对 %rax 中的系统调用编号有点困惑，但我猜它是一个 OSX 的东西？正如 Jester 和 Peter Cordes 已经提到的，系统调用会覆盖寄存器： %rcx 带有 return 地址 (%rip) 和 %r11 带有标志 (%rflags)。我建议查看英特尔 x86_64 文档：http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-manual-325462.pdf

此代码的另一件事：

jb L2
jge L3
L2:

argc 和 counter 是无符号的，所以这看起来好一点，我猜：

jae L3

抱歉，如果代码不起作用。我通常使用 intel-syntax 并且我没有测试它，但我想你明白了:)

Answer 2

我假设在 64 位 OS/X 上你正在组装和 linking 以致于你有意绕过 C 运行时代码。一个例子是在没有 C 运行时启动文件和系统库的情况下进行静态构建，并且您指定 _main 是您的程序入口点。 _start 通常是进程入口点，除非被覆盖。

在这种情况下，64 位内核会将 macho64 程序加载到内存中，并使用程序参数和环境变量等设置进程堆栈。 Apple OS/X 启动时的进程堆栈状态与 第 3.4 节 中的 System V x86-64 ABI 中记录的相同：

一个观察结果是参数指针列表以 NULL(0) 地址结尾。您可以使用它来遍历所有参数，直到找到 NULL(0) 地址作为依赖 argc.

中的值的替代方法

问题

一个问题是您的代码假定寄存器都保存在 SYSCALL 中。 SYSCALL指令本身会破坏RCX和R11:

的内容

SYSCALL invokes an OS system-call handler at privilege level 0. It does so by loading RIP from the IA32_LSTAR MSR (after saving the address of the instruction following SYSCALL into RCX). (The WRMSR instruction ensures that the IA32_LSTAR MSR always contain a canonical address.)

SYSCALL also saves RFLAGS into R11 and then masks RFLAGS using the IA32_FMASK MSR (MSR address C0000084H); specifically, the processor clears in RFLAGS every bit corresponding to a bit that is set in the IA32_FMASK MSR

避免这种情况的一种方法是尝试使用 RCX 和 R11 以外的寄存器。否则，您将不得不 save/restore 通过 SYSCALL 如果您需要保持它们的值不变。内核还将使用 return 值破坏 RAX。

列表Apple OS/X system calls provides the details of all the available kernel functions. In 64-bit OS/X code each of the system call numbers has 0x2000000 added到它：

In 64-bit systems, Mach system calls are positive, but are prefixed with 0x2000000 — which clearly separates and disambiguates them from the POSIX calls, which are prefixed with 0x1000000

您计算命令行参数长度的方法无效。一个参数的地址不一定要放在前一个参数之后的内存中。正确的方法是编写从您感兴趣的参数开头开始的代码，并搜索 NUL(0) 终止字符。

此打印 space 或分隔符的代码将不起作用：

mov 8(%rcx), %rcx       #mov 24(%rsp) back into %rcx
mov [=10=]x2000004, %eax
mov [=10=]x20, %rsi         #0x20 = space
mov , %rdx
syscall

当使用sys_write系统调用时，RSI寄存器是指向字符缓冲区的指针。您不能传递像 0x20 (space) 这样的立即值。您需要将 space 或其他一些分隔符（如换行符）放入缓冲区并通过 RSI.

传递该缓冲区

修改后的代码

此代码采用了前面信息和额外清理中的一些想法，并将每个命令行参数（不包括程序名称）写入标准输出。每个将由换行符分隔。 Darwin OS/X 上的换行符是 0x0a (\n).

# In 64-bit OSX syscall numbers = 0x2000000+(32-bit syscall #)
SYS_EXIT  = 0x2000001
SYS_WRITE = 0x2000004

STDOUT    = 1

.section __DATA, __const
newline: .ascii "\n"
newline_end: NEWLINE_LEN = newline_end-newline

.section __TEXT, __text
.globl _main
_main:
    mov (%rsp), %r8             # 0(%rsp) = # args. This code doesn't use it
                                #    Only save it to R8 as an example.
    lea 16(%rsp), %rbx          # 8(%rsp)=pointer to prog name
                                # 16(%rsp)=pointer to 1st parameter
.argloop:
    mov (%rbx), %rsi            # Get current cmd line parameter pointer
    test %rsi, %rsi
    jz .exit                    # If it's zero we are finished

    # Compute length of current cmd line parameter
    # Starting at the address in RSI (current parameter) search until
    # we find a NUL(0) terminating character.
    # rdx = length not including terminating NUL character

    xor %edx, %edx              # RDX = character index = 0
    mov %edx, %eax              # RAX = terminating character NUL(0) to look for
.strlenloop:
         inc %rdx               # advance to next character index
         cmpb %al, -1(%rsi,%rdx)# Is character at previous char index
                                #     a NUL(0) character?
         jne .strlenloop        # If it isn't a NUL(0) char then loop again
    dec %rdx                    # We don't want strlen to include NUL(0)

    # Display the cmd line argument
    # sys_write requires:
    #    rdi = output device number
    #    rsi = pointer to string (command line argument)
    #    rdx = length
    #
    mov $STDOUT, %edi
    mov $SYS_WRITE, %eax
    syscall

    # display a new line
    mov $NEWLINE_LEN, %edx
    lea newline(%rip), %rsi     # We use RIP addressing for the
                                #     string address
    mov $SYS_WRITE, %eax
    syscall

    add , %rbx                # Go to next cmd line argument pointer
                                #     In 64-bit pointers are 8 bytes
    # lea 8(%rbx), %rbx         # This LEA instruction can replace the
                                #     ADD since we don't care about the flags
                                #     rbx = 8 + rbx (flags unaltered)
    jmp .argloop

.exit:
    # Exit the program
    # sys_exit requires:
    #    rdi = return value
    #
    xor %edi, %edi
    mov $SYS_EXIT, %eax
    syscall

如果您打算在不同的地方使用像 strlen 这样的代码，那么我建议您创建一个函数来执行该操作。为简单起见，我已将 strlen 硬编码到代码中。如果您希望提高 strlen 实施的效率，那么 Agner Fog 的 Optimizing subroutines in assembly language.

是一个不错的起点

此代码应编译并 link 为静态可执行文件，无需 C 运行时使用：

gcc -e _main progargs.s -o progargs -nostartfiles -static

在 x64 ASM 中循环并打印 argv[]

Cycle Through and Print argv[] in x64 ASM

macos

64-bit

assembly

x86-64

att

问题

修改后的代码