为什么在 macOS 上使用 clang -O2 编译 C 程序时 "empty" loop 导致总线错误？

Question

我在 macOS High Sierra 上。

$ uname -v
Darwin Kernel Version 17.2.0: Fri Sep 29 18:27:05 PDT 2017; root:xnu-4570.20.62~3/RELEASE_X86_64

我有以下综合程序。

void nop1() {
  for (;;);
}

void nop2() {
  while (1);
}

void nop3() {
  int i = 0;
  while(1) {
    i++;
  }
}

void nop4() {
  static int i = 0;
  while(1) {
    i++;
  };
}

int main() {
  nop1();
  return 0;
}

编辑 2： 我现在已经在下面的示例中使用 clang 进行了显式编译。

当我用 clang -O2 编译和运行以下 C 程序时，当 main() 调用 nop1()、nop2()、[=24 时出现总线错误=] 但不适用于 nop4().

$ ./a.out
[1]    93029 bus error (core dumped)  ./a.out

在没有 -O2 的情况下编译时所有版本运行没有总线错误。我猜优化器将 nop3() 转换为 nop2()。我想了解在每种情况下导致总线错误的原因以及为什么在 nop4() 中使用静态变量不会导致总线错误。

这是我的 clang 版本：

$ clang -v
Apple LLVM version 8.1.0 (clang-802.0.42)
Target: x86_64-apple-darwin17.2.0
Thread model: posix
InstalledDir: /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin

我还在 Linux 上使用 gcc 进行了测试：

$ uname -a
Linux trygger 4.4.0-112-generic #135-Ubuntu SMP Fri Jan 19 11:48:36 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux

，程序运行适用于所有 nop 函数，无论有没有 -O2。

这是我在 Linux 上的 gcc 版本。

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/5/lto-wrapper
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 5.4.0-6ubuntu1~16.04.6' --with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs --enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr --program-suffix=-5 --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-libmpx --enable-plugin --with-system-zlib --disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo --with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-amd64/jre --enable-java-home --with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-amd64 --with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-amd64 --with-arch-directory=amd64 --with-ecj-jar=/usr/share/java/eclipse-ecj.jar --enable-objc-gc --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.6)

编辑 4

也许 otool 的输出更容易分析。首先是 -O2.

$ clang -O2 segfault.c
$ otool -vt a.out
a.out:
(__TEXT,__text) section
_nop1:
0000000100000f30    pushq   %rbp
0000000100000f31    movq    %rsp, %rbp
0000000100000f34    nopw    %cs:(%rax,%rax)
0000000100000f40    jmp 0x100000f40
0000000100000f42    nopw    %cs:(%rax,%rax)
_nop2:
0000000100000f50    pushq   %rbp
0000000100000f51    movq    %rsp, %rbp
0000000100000f54    nopw    %cs:(%rax,%rax)
0000000100000f60    jmp 0x100000f60
0000000100000f62    nopw    %cs:(%rax,%rax)
_nop3:
0000000100000f70    pushq   %rbp
0000000100000f71    movq    %rsp, %rbp
0000000100000f74    nopw    %cs:(%rax,%rax)
0000000100000f80    jmp 0x100000f80
0000000100000f82    nopw    %cs:(%rax,%rax)
_nop4:
0000000100000f90    pushq   %rbp
0000000100000f91    movq    %rsp, %rbp
0000000100000f94    nopw    %cs:(%rax,%rax)
0000000100000fa0    jmp 0x100000fa0
0000000100000fa2    nopw    %cs:(%rax,%rax)
_main:
0000000100000fb0    pushq   %rbp
0000000100000fb1    movq    %rsp, %rbp

没有-O2。

$ clang segfault.c
$ otool -vt a.out
a.out:
(__TEXT,__text) section
_nop1:
0000000100000f30    pushq   %rbp
0000000100000f31    movq    %rsp, %rbp
0000000100000f34    jmp 0x100000f39
0000000100000f39    jmp 0x100000f39
0000000100000f3e    nop
_nop2:
0000000100000f40    pushq   %rbp
0000000100000f41    movq    %rsp, %rbp
0000000100000f44    jmp 0x100000f49
0000000100000f49    jmp 0x100000f49
0000000100000f4e    nop
_nop3:
0000000100000f50    pushq   %rbp
0000000100000f51    movq    %rsp, %rbp
0000000100000f54    movl    [=18=]x0, -0x4(%rbp)
0000000100000f5b    movl    -0x4(%rbp), %eax
0000000100000f5e    addl    [=18=]x1, %eax
0000000100000f61    movl    %eax, -0x4(%rbp)
0000000100000f64    jmp 0x100000f5b
0000000100000f69    nopl    (%rax)
_nop4:
0000000100000f70    pushq   %rbp
0000000100000f71    movq    %rsp, %rbp
0000000100000f74    jmp 0x100000f79
0000000100000f79    movl    0x81(%rip), %eax
0000000100000f7f    addl    [=18=]x1, %eax
0000000100000f82    movl    %eax, 0x78(%rip)
0000000100000f88    jmp 0x100000f79
0000000100000f8d    nopl    (%rax)
_main:
0000000100000f90    pushq   %rbp
0000000100000f91    movq    %rsp, %rbp
0000000100000f94    subq    [=18=]x10, %rsp
0000000100000f98    movl    [=18=]x0, -0x4(%rbp)
0000000100000f9f    callq   0x100000f40
0000000100000fa4    xorl    %eax, %eax
0000000100000fa6    addq    [=18=]x10, %rsp
0000000100000faa    popq    %rbp
0000000100000fab    retq

编辑 3

应@Olaf 的要求，我添加了由 clang -S 生成的程序集。

    .section    __TEXT,__text,regular,pure_instructions
    .macosx_version_min 10, 12
    .globl  _nop1
    .p2align    4, 0x90
_nop1:                                  ## @nop1
    .cfi_startproc
## BB#0:
    pushq   %rbp
Ltmp0:
    .cfi_def_cfa_offset 16
Ltmp1:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp2:
    .cfi_def_cfa_register %rbp
    jmp LBB0_1
LBB0_1:                                 ## =>This Inner Loop Header: Depth=1
    jmp LBB0_1
    .cfi_endproc

    .globl  _nop2
    .p2align    4, 0x90
_nop2:                                  ## @nop2
    .cfi_startproc
## BB#0:
    pushq   %rbp
Ltmp3:
    .cfi_def_cfa_offset 16
Ltmp4:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp5:
    .cfi_def_cfa_register %rbp
    jmp LBB1_1
LBB1_1:                                 ## =>This Inner Loop Header: Depth=1
    jmp LBB1_1
    .cfi_endproc

    .globl  _nop3
    .p2align    4, 0x90
_nop3:                                  ## @nop3
    .cfi_startproc
## BB#0:
    pushq   %rbp
Ltmp6:
    .cfi_def_cfa_offset 16
Ltmp7:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp8:
    .cfi_def_cfa_register %rbp
    movl    [=19=], -4(%rbp)
LBB2_1:                                 ## =>This Inner Loop Header: Depth=1
    movl    -4(%rbp), %eax
    addl    , %eax
    movl    %eax, -4(%rbp)
    jmp LBB2_1
    .cfi_endproc

    .globl  _nop4
    .p2align    4, 0x90
_nop4:                                  ## @nop4
    .cfi_startproc
## BB#0:
    pushq   %rbp
Ltmp9:
    .cfi_def_cfa_offset 16
Ltmp10:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp11:
    .cfi_def_cfa_register %rbp
    jmp LBB3_1
LBB3_1:                                 ## =>This Inner Loop Header: Depth=1
    movl    _nop4.i(%rip), %eax
    addl    , %eax
    movl    %eax, _nop4.i(%rip)
    jmp LBB3_1
    .cfi_endproc

    .globl  _main
    .p2align    4, 0x90
_main:                                  ## @main
    .cfi_startproc
## BB#0:
    pushq   %rbp
Ltmp12:
    .cfi_def_cfa_offset 16
Ltmp13:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
Ltmp14:
    .cfi_def_cfa_register %rbp
    subq    , %rsp
    movl    [=19=], -4(%rbp)
    callq   _nop1
    xorl    %eax, %eax
    addq    , %rsp
    popq    %rbp
    retq
    .cfi_endproc

.zerofill __DATA,__bss,_nop4.i,4,2      ## @nop4.i

.subsections_via_symbols

Answer 1

看起来 clang 生成了函数的序言，没有别的，让执行落到另一个不相关的代码位。在我的机器上，它产生：

0000000100000fa0 <_main>:
   100000fa0:   55                      push   rbp
   100000fa1:   48 89 e5                mov    rbp,rsp

Disassembly of section __TEXT.__unwind_info:

0000000100000fa4 <__TEXT.__unwind_info>:
   100000fa4:   01 00                   add    DWORD PTR [rax],eax
   100000fa6:   00 00                   add    BYTE PTR [rax],al
   100000fa8:   1c 00                   sbb    al,0x0
   100000faa:   00 00                   add    BYTE PTR [rax],al
   100000fac:   00 00                   add    BYTE PTR [rax],al

总线错误是由第一个add指令引起的，因为rax指向_main，它试图写入只读内存。

有趣的是，将 __asm__ volatile("nop\n"); 作为 nop1 中的第一行给出了正确的行为。

Answer 2

这是 LLVM 中的一个已知错误。您看到的行为对 C++ 有效，但对 C 无效。

查看 2006 年的错误报告 #965 here。

最近，由于生锈，这个问题又出现了being hit by this。

有一个补丁 here，已于 2017 年 11 月合并，但我不知道它将在哪个版本中发布。

另请参阅邮件列表中的讨论 here。

为什么在 macOS 上使用 clang -O2 编译 C 程序时 "empty" loop 导致总线错误？

Why does "empty" loop cause bus error when compiling C program with clang -O2 on macOS?

c

macos

llvm

clang

bus-error