使用 GCC 进行堆栈保护和粉碎

Question

我正在阅读 Smashing the Stack for Fun and Profit（特别是 post 指的是 "Buffer Overflows" 部分）。文章是为一台 32 位机器，但是我正在开发一台 64 位机器，为此我考虑到了我的例子。一个特定的例子导致了一些我无法解决的问题解释。 example3.c有覆盖return地址的功能跳过主函数中的指令。这是我的代码：

#include <stdio.h>

void function(int a, int b, int c)
{
  char buf1[5];
  char buf2[10];
  int *retptr;

  retptr = (void*)(buf2 + 40);
  (*retptr) += 8;
}

int main(void)
{
  int x;

  x = 0;
  function(1,2,3);
  x = 1;
  printf("%d\n", x);
  return 0;
}

我用 gcc v4.8.2 编译这个程序，命令如下：

gcc example3.c -o example3

请注意，默认情况下 gcc 编译器似乎实现了一些堆栈地址 space 布局随机化和堆栈金丝雀等保护。我在计算 ret 时已考虑这些安全措施指针值。这里是由 gcc example3.c -S -fverbose-asm -o stack-protection.s:

    .file   "example3.c"
# GNU C (Ubuntu 4.8.2-19ubuntu1) version 4.8.2 (x86_64-linux-gnu)
#   compiled by GNU C version 4.8.2, GMP version 5.1.3, MPFR version 3.1.2-p3, MPC version 1.0.1
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  -imultiarch x86_64-linux-gnu example3.c -mtune=generic
# -march=x86-64 -auxbase-strip verbose-stack-pro.s -fverbose-asm
# -fstack-protector -Wformat -Wformat-security
# options enabled:  -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg -fcommon
# -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining
# -feliminate-unused-debug-types -ffunction-cse -fgcse-lm -fgnu-runtime
# -fident -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-debug-strings
# -fmove-loop-invariants -fpeephole -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
# -fsigned-zeros -fsplit-ivs-in-unroller -fstack-protector
# -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math
# -ftree-coalesce-vars -ftree-cselim -ftree-forwprop -ftree-loop-if-convert
# -ftree-loop-im -ftree-loop-ivcanon -ftree-loop-optimize
# -ftree-parallelize-loops= -ftree-phiprop -ftree-pta -ftree-reassoc
# -ftree-scev-cprop -ftree-slp-vectorize -ftree-vect-loop-version
# -funit-at-a-time -funwind-tables -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args
# -malign-stringops -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc
# -mieee-fp -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse
# -msse2 -mtls-direct-seg-refs

    .text
    .globl  function
    .type   function, @function
function:
.LFB0:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    subq    , %rsp   #,
    movl    %edi, -52(%rbp) # a, a
    movl    %esi, -56(%rbp) # b, b
    movl    %edx, -60(%rbp) # c, c
    movq    %fs:40, %rax    #, tmp65
    movq    %rax, -8(%rbp)  # tmp65, D.2197
    xorl    %eax, %eax  # tmp65
    leaq    -32(%rbp), %rax #, tmp61
    addq    , %rax   #, tmp62
    movq    %rax, -40(%rbp) # tmp62, ret
    movq    -40(%rbp), %rax # ret, tmp63
    movl    (%rax), %eax    # *ret_1, D.2195
    leal    8(%rax), %edx   #, D.2195
    movq    -40(%rbp), %rax # ret, tmp64
    movl    %edx, (%rax)    # D.2195, *ret_1
    movq    -8(%rbp), %rax  # D.2197, tmp66
    xorq    %fs:40, %rax    #, tmp66
    je  .L2 #,
    call    __stack_chk_fail    #
.L2:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   function, .-function
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB1:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    subq    , %rsp   #,
    movl    [=12=], -4(%rbp)    #, x
    movl    , %edx    #,
    movl    , %esi    #,
    movl    , %edi    #,
    call    function    #
    movl    , -4(%rbp)    #, x
    movl    -4(%rbp), %eax  # x, tmp61
    movl    %eax, %esi  # tmp61,
    movl    $.LC0, %edi #,
    movl    [=12=], %eax    #,
    call    printf  #
    movl    [=12=], %eax    #, D.2200
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
    .section    .note.GNU-stack,"",@progbits

执行示例 3 具有跳过对 x 的第二个赋值的预期效果，并且程序输出 0.

但是，如果我使用 -fno-stack-protector 选项编译：

gcc -fno-stack-protector example3.c -S -fverbose-asm -o no-stack-protection.s

我收到以下汇编文件：

    .file   "example3.c"
# GNU C (Ubuntu 4.8.2-19ubuntu1) version 4.8.2 (x86_64-linux-gnu)
#   compiled by GNU C version 4.8.2, GMP version 5.1.3, MPFR version 3.1.2-p3, MPC version 1.0.1
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  -imultiarch x86_64-linux-gnu example3.c -mtune=generic
# -march=x86-64 -auxbase-strip verbose-no-stack-pro.s -fno-stack-protector
# -fverbose-asm -Wformat -Wformat-security
# options enabled:  -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg -fcommon
# -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining
# -feliminate-unused-debug-types -ffunction-cse -fgcse-lm -fgnu-runtime
# -fident -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-debug-strings
# -fmove-loop-invariants -fpeephole -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
# -fsigned-zeros -fsplit-ivs-in-unroller -fstrict-volatile-bitfields
# -fsync-libcalls -ftrapping-math -ftree-coalesce-vars -ftree-cselim
# -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im -ftree-loop-ivcanon
# -ftree-loop-optimize -ftree-parallelize-loops= -ftree-phiprop -ftree-pta
# -ftree-reassoc -ftree-scev-cprop -ftree-slp-vectorize
# -ftree-vect-loop-version -funit-at-a-time -funwind-tables -fverbose-asm
# -fzero-initialized-in-bss -m128bit-long-double -m64 -m80387
# -maccumulate-outgoing-args -malign-stringops -mfancy-math-387
# -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp -mlong-double-80 -mmmx -mno-sse4
# -mpush-args -mred-zone -msse -msse2 -mtls-direct-seg-refs

    .text
    .globl  function
    .type   function, @function
function:
.LFB0:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    movl    %edi, -36(%rbp) # a, a
    movl    %esi, -40(%rbp) # b, b
    movl    %edx, -44(%rbp) # c, c
    leaq    -32(%rbp), %rax #, tmp61
    addq    , %rax   #, tmp62
    movq    %rax, -8(%rbp)  # tmp62, ret
    movq    -8(%rbp), %rax  # ret, tmp63
    movl    (%rax), %eax    # *ret_1, D.2195
    leal    8(%rax), %edx   #, D.2195
    movq    -8(%rbp), %rax  # ret, tmp64
    movl    %edx, (%rax)    # D.2195, *ret_1
    popq    %rbp    #
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   function, .-function
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB1:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    subq    , %rsp   #,
    movl    [=14=], -4(%rbp)    #, x
    movl    , %edx    #,
    movl    , %esi    #,
    movl    , %edi    #,
    call    function    #
    movl    , -4(%rbp)    #, x
    movl    -4(%rbp), %eax  # x, tmp61
    movl    %eax, %esi  # tmp61,
    movl    $.LC0, %edi #,
    movl    [=14=], %eax    #,
    call    printf  #
    movl    [=14=], %eax    #, D.2196
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
    .section    .note.GNU-stack,"",@progbits

并且相应的可执行文件不会生成所需的值 0，而是生成一个我无法与程序集文件协调的随机值。

我对 -fno-stack-protector 情况下堆栈帧的印象是（sfp = 保存的帧指针，ret = return 地址）：

low memory address     buf2 (16 bytes)  buf1 (8 bytes)  retptr (8 bytes)  sfp (8 bytes) ret       high memory address
<---                  [              ][              ][                ][             ][    ] ...
top of stack                                                                                      bottom of stack

我的问题：

我是不是算错了未保护情况下return地址的位置？

Answer 1

Am I miscalculating the position of the return address in the unprotected case?

那部分是正确的，至少只要地址适合 int。 retptr 的正确类型应该是 long x86-64 asm，这样指针可以保存 64 位地址。

您可以通过运行以下程序仔细检查：

#include <stdio.h>

void function(int a, int b, int c)
{
  char buf1[5];
  char buf2[10];
  int *retptr;

  retptr = (void*)(buf2 + 40);
  printf("retptr points to: %p\n", (long*)(long)*retptr);
  (*retptr) += 8;
}

int main(void)
{
  int x;


  printf("ret address is %p\n", &&label);
  x = 0;
  function(1,2,3);
label:
  x = 1;
  printf("%d\n", x);

  return 0;
}

通过运行这个，你应该可以确认function后面的地址也是retptr.

我认为您没有得到预期 0 的原因在于这一行：

(*retptr) += 8;

在我的 64 位系统上，x = 1 被编译为：

  40058a:   c7 45 fc 01 00 00 00    movl   [=12=]x1,-0x4(%rbp)
  400591:   8b 45 fc                mov    -0x4(%rbp),%eax
  400594:   89 c6                   mov    %eax,%esi

第一行在 x 中加载 1，另外两行将 x 的值作为参数传递给 printf()。请注意这是 7 个字节，而不是 8 个字节。如果将增量更改为 7，您应该会看到 0，如您所料。

实际上，通过加 8，ret 指令将指令指针设置为指向 45，而不是 8b。然后该代码变为：

  45 fc                 rex.RB cld 
  89 c6                 mov    %eax,%esi

我不完全确定此时会发生什么，我怀疑这取决于 CPU 模型。我的似乎跳过了 mov %eax,%esi 之前的指令，因此 printf 显示了 %eax 的任何值。如果你看function()的反汇编，原来%rax是用来存储retptr的值的，也就是打印出来的看似随机的值

使用 GCC 进行堆栈保护和粉碎

Stack Protection and Smashing using GCC

c

stack-smash