-O2 优化器标志导致基本 for 循环失败

-O2 optimizer flag causes basic for loop to fail

在编译简单程序时添加 -O2 优化器标志时注意到非常奇怪的行为。

#include <iostream>

int main() {
    for (int i = 0; i < 10; i++) {
        std::cout << i << std::endl;
    }
    return 0;
}

程序输出:

1
2
3
4
5
6
7
...
1228881
...

该程序在 -O3-Ofast 标志下按预期工作。

运行的输出clang++ -O2 -v test.cpp:

vm-hw06{mrussell}125: clang++ -O2 -v test.cpp
clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.co\
m/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /comp/15/bin
Found candidate GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Selected GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Candidate multilib: .;@m64
Selected multilib: .;@m64
 "/h/mrussell/clang/bin/clang-10" -cc1 -triple x86_64-unknown-linux-gnu -emit-obj -disable-free -main-file-name test.cpp -mrelocation-model static -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -v -resource-dir /h/mrussell/clang/lib/clang/10.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0/x86_64-pc-linux-gnu -internal-isystem /h/mrussell

奇怪的是,删除 << endl 可以修复任何错误。同样,将 std::cout 行替换为 printf 会按预期工作。更新 - 删除 << i 但保留 << std::endl 也有效 - 打印 10 个空白行!在这里感到困惑,任何帮助将不胜感激!

PS:请注意,我基于此文档构建了 clang++:https://btorpey.github.io/blog/2015/01/02/building-clang/ - 已经使用它来编译基本程序 >1 年没有出现此类问题。

更新: clang++ -O2 -S test.cpp

的输出
        .text
        .file   "test.cpp"
        .globl  main                    # -- Begin function main
        .p2align        4, 0x90
        .type   main,@function
main:                                   # @main
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        pushq   %r14
        .cfi_def_cfa_offset 24
        pushq   %rbx
        .cfi_def_cfa_offset 32
        .cfi_offset %rbx, -32
        .cfi_offset %r14, -24
        .cfi_offset %rbp, -16
        xorl    %r14d, %r14d
        jmp     .LBB0_1
        .p2align        4, 0x90
.LBB0_4:                                # %if.end.i
                                        #   in Loop: Header=BB0_1 Depth=1
        movq    %rbp, %rdi
        callq   _ZNKSt5ctypeIcE13_M_widen_initEv
        movq    (%rbp), %rax
        movq    %rbp, %rdi
        movl    , %esi
        callq   *48(%rax)
.LBB0_5:                                # %_ZNKSt5ctypeIcE5widenEc.exit
                                        #   in Loop: Header=BB0_1 Depth=1
        movsbl  %al, %esi
        movq    %rbx, %rdi
        callq   _ZNSo3putEc
        movq    %rax, %rdi
        callq   _ZNSo5flushEv
        addl    , %r14d
        cmpl    $-101, %r14d
        je      .LBB0_6
.LBB0_1:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        movl    $_ZSt4cout, %edi
        movl    %r14d, %esi
        callq   _ZNSolsEi
        movq    %rax, %rbx
        movq    (%rax), %rax
        movq    -24(%rax), %rax
        movq    240(%rbx,%rax), %rbp
        testq   %rbp, %rbp
        je      .LBB0_7
# %bb.2:                                # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit
                                        #   in Loop: Header=BB0_1 Depth=1
        cmpb    [=13=], 56(%rbp)
        je      .LBB0_4
# %bb.3:                                # %if.then.i
                                        #   in Loop: Header=BB0_1 Depth=1
        movzbl  67(%rbp), %eax
        jmp     .LBB0_5
.LBB0_6:                                # %for.cond.cleanup
        xorl    %eax, %eax
        popq    %rbx
        .cfi_def_cfa_offset 24
        popq    %r14
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        retq
.LBB0_7:                                # %if.then.i10
        .cfi_def_cfa_offset 32
        callq   _ZSt16__throw_bad_castv
.Lfunc_end0:
        .size   main, .Lfunc_end0-main
        .cfi_endproc
                                        # -- End function
        .section        .text.startup,"ax",@progbits
        .p2align        4, 0x90         # -- Begin function _GLOBAL__sub_I_test.cpp
        .type   _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp:                # @_GLOBAL__sub_I_test.cpp
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rax
        .cfi_def_cfa_offset 16
        movl    $_ZStL8__ioinit, %edi
        callq   _ZNSt8ios_base4InitC1Ev
        movl    $_ZNSt8ios_base4InitD1Ev, %edi
        movl    $_ZStL8__ioinit, %esi
        movl    $__dso_handle, %edx
        popq    %rax
        .cfi_def_cfa_offset 8
        jmp     __cxa_atexit            # TAILCALL
.Lfunc_end1:
        .size   _GLOBAL__sub_I_test.cpp, .Lfunc_end1-_GLOBAL__sub_I_test.cpp
        .cfi_endproc
                                        # -- End function
        .type   _ZStL8__ioinit,@object  # @_ZStL8__ioinit
        .local  _ZStL8__ioinit
        .comm   _ZStL8__ioinit,1,1
        .hidden __dso_handle
        .section        .init_array,"aw",@init_array
        .p2align        3
        .quad   _GLOBAL__sub_I_test.cpp

        .ident  "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
        .section        ".note.GNU-stack","",@progbits
        .addrsig
        .addrsig_sym _GLOBAL__sub_I_test.cpp
        .addrsig_sym _ZStL8__ioinit
        .addrsig_sym __dso_handle
        .addrsig_sym _ZSt4cout

并且,作为参考,clang++ -S test.cpp

的输出
        .text
        .file   "test.cpp"
        .section        .text.startup,"ax",@progbits
        .p2align        4, 0x90         # -- Begin function __cxx_global_var_init
        .type   __cxx_global_var_init,@function
__cxx_global_var_init:                  # @__cxx_global_var_init
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register %rbp
        movabsq $_ZStL8__ioinit, %rdi
        callq   _ZNSt8ios_base4InitC1Ev
        movabsq $_ZNSt8ios_base4InitD1Ev, %rax
        movq    %rax, %rdi
        movabsq $_ZStL8__ioinit, %rsi
        movabsq $__dso_handle, %rdx
        callq   __cxa_atexit
        popq    %rbp
        .cfi_def_cfa %rsp, 8
        retq
.Lfunc_end0:
        .size   __cxx_global_var_init, .Lfunc_end0-__cxx_global_var_init
        .cfi_endproc
                                        # -- End function
        .text
        .globl  main                    # -- Begin function main
        .p2align        4, 0x90
        .type   main,@function
main:                                   # @main
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register %rbp
        subq    , %rsp
        movl    [=14=], -4(%rbp)
        movl    [=14=], -8(%rbp)
.LBB1_1:                                # %for.cond
                                        # =>This Inner Loop Header: Depth=1
        cmpl    , -8(%rbp)
        jge     .LBB1_4
# %bb.2:                                # %for.body
                                        #   in Loop: Header=BB1_1 Depth=1
        movl    -8(%rbp), %esi
        movabsq $_ZSt4cout, %rdi
        callq   _ZNSolsEi
        movq    %rax, %rdi
        movabsq $_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_, %rsi
        callq   _ZNSolsEPFRSoS_E
# %bb.3:                                # %for.inc
                                        #   in Loop: Header=BB1_1 Depth=1
        movl    -8(%rbp), %eax
        addl    , %eax
        movl    %eax, -8(%rbp)
        jmp     .LBB1_1
.LBB1_4:                                # %for.end
        xorl    %eax, %eax
        addq    , %rsp
        popq    %rbp
        .cfi_def_cfa %rsp, 8
        retq
.Lfunc_end1:
        .size   main, .Lfunc_end1-main
        .cfi_endproc
                                        # -- End function
        .section        .text.startup,"ax",@progbits
        .p2align        4, 0x90         # -- Begin function _GLOBAL__sub_I_test.cpp
        .type   _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp:                # @_GLOBAL__sub_I_test.cpp
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register %rbp
        callq   __cxx_global_var_init
        popq    %rbp
        .cfi_def_cfa %rsp, 8
        retq
.Lfunc_end2:
        .size   _GLOBAL__sub_I_test.cpp, .Lfunc_end2-_GLOBAL__sub_I_test.cpp
        .cfi_endproc
                                        # -- End function
        .type   _ZStL8__ioinit,@object  # @_ZStL8__ioinit
        .local  _ZStL8__ioinit
        .comm   _ZStL8__ioinit,1,1
        .hidden __dso_handle
        .section        .init_array,"aw",@init_array
        .p2align        3
        .quad   _GLOBAL__sub_I_test.cpp

        .ident  "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
        .section        ".note.GNU-stack","",@progbits
        .addrsig
        .addrsig_sym __cxx_global_var_init
        .addrsig_sym __cxa_atexit
        .addrsig_sym _ZNSolsEi
        .addrsig_sym _ZNSolsEPFRSoS_E
        .addrsig_sym _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
        .addrsig_sym _GLOBAL__sub_I_test.cpp
        .addrsig_sym _ZStL8__ioinit
        .addrsig_sym __dso_handle
        .addrsig_sym _ZSt4cout

对于任何出现在这个线程中寻找答案的人 - @AlanBirtles 给了我一些很好的提示。首先是原始构建来自主干,而不是标记的分支,因此可能存在错误。然而,在从源代码重建 clang++ 12、13 和 14 之后,运气不佳。然而,正如他稍后提到的,使用的 libstdc++ 可能存在兼容性问题。在从新 gcc 重新构建 gccclang 之后,宇宙一切正常。从某种意义上说,这是一个不能令人满意的答案,因为我不确定不兼容的性质。但是,如果有人偶然发现这个线程遇到同样的问题,你知道该怎么做。