-O2 优化器标志导致基本 for 循环失败
-O2 optimizer flag causes basic for loop to fail
在编译简单程序时添加 -O2
优化器标志时注意到非常奇怪的行为。
#include <iostream>
int main() {
for (int i = 0; i < 10; i++) {
std::cout << i << std::endl;
}
return 0;
}
程序输出:
1
2
3
4
5
6
7
...
1228881
...
该程序在 -O3
或 -Ofast
标志下按预期工作。
运行的输出clang++ -O2 -v test.cpp
:
vm-hw06{mrussell}125: clang++ -O2 -v test.cpp
clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.co\
m/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /comp/15/bin
Found candidate GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Selected GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Candidate multilib: .;@m64
Selected multilib: .;@m64
"/h/mrussell/clang/bin/clang-10" -cc1 -triple x86_64-unknown-linux-gnu -emit-obj -disable-free -main-file-name test.cpp -mrelocation-model static -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -v -resource-dir /h/mrussell/clang/lib/clang/10.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0/x86_64-pc-linux-gnu -internal-isystem /h/mrussell
奇怪的是,删除 << endl
可以修复任何错误。同样,将 std::cout
行替换为 printf
会按预期工作。更新 - 删除 << i
但保留 << std::endl
也有效 - 打印 10 个空白行!在这里感到困惑,任何帮助将不胜感激!
PS:请注意,我基于此文档构建了 clang++:https://btorpey.github.io/blog/2015/01/02/building-clang/ - 已经使用它来编译基本程序 >1 年没有出现此类问题。
更新:
clang++ -O2 -S test.cpp
的输出
.text
.file "test.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
pushq %r14
.cfi_def_cfa_offset 24
pushq %rbx
.cfi_def_cfa_offset 32
.cfi_offset %rbx, -32
.cfi_offset %r14, -24
.cfi_offset %rbp, -16
xorl %r14d, %r14d
jmp .LBB0_1
.p2align 4, 0x90
.LBB0_4: # %if.end.i
# in Loop: Header=BB0_1 Depth=1
movq %rbp, %rdi
callq _ZNKSt5ctypeIcE13_M_widen_initEv
movq (%rbp), %rax
movq %rbp, %rdi
movl , %esi
callq *48(%rax)
.LBB0_5: # %_ZNKSt5ctypeIcE5widenEc.exit
# in Loop: Header=BB0_1 Depth=1
movsbl %al, %esi
movq %rbx, %rdi
callq _ZNSo3putEc
movq %rax, %rdi
callq _ZNSo5flushEv
addl , %r14d
cmpl $-101, %r14d
je .LBB0_6
.LBB0_1: # %for.body
# =>This Inner Loop Header: Depth=1
movl $_ZSt4cout, %edi
movl %r14d, %esi
callq _ZNSolsEi
movq %rax, %rbx
movq (%rax), %rax
movq -24(%rax), %rax
movq 240(%rbx,%rax), %rbp
testq %rbp, %rbp
je .LBB0_7
# %bb.2: # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit
# in Loop: Header=BB0_1 Depth=1
cmpb [=13=], 56(%rbp)
je .LBB0_4
# %bb.3: # %if.then.i
# in Loop: Header=BB0_1 Depth=1
movzbl 67(%rbp), %eax
jmp .LBB0_5
.LBB0_6: # %for.cond.cleanup
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
retq
.LBB0_7: # %if.then.i10
.cfi_def_cfa_offset 32
callq _ZSt16__throw_bad_castv
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rax
.cfi_def_cfa_offset 16
movl $_ZStL8__ioinit, %edi
callq _ZNSt8ios_base4InitC1Ev
movl $_ZNSt8ios_base4InitD1Ev, %edi
movl $_ZStL8__ioinit, %esi
movl $__dso_handle, %edx
popq %rax
.cfi_def_cfa_offset 8
jmp __cxa_atexit # TAILCALL
.Lfunc_end1:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end1-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout
并且,作为参考,clang++ -S test.cpp
的输出
.text
.file "test.cpp"
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function __cxx_global_var_init
.type __cxx_global_var_init,@function
__cxx_global_var_init: # @__cxx_global_var_init
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movabsq $_ZStL8__ioinit, %rdi
callq _ZNSt8ios_base4InitC1Ev
movabsq $_ZNSt8ios_base4InitD1Ev, %rax
movq %rax, %rdi
movabsq $_ZStL8__ioinit, %rsi
movabsq $__dso_handle, %rdx
callq __cxa_atexit
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size __cxx_global_var_init, .Lfunc_end0-__cxx_global_var_init
.cfi_endproc
# -- End function
.text
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq , %rsp
movl [=14=], -4(%rbp)
movl [=14=], -8(%rbp)
.LBB1_1: # %for.cond
# =>This Inner Loop Header: Depth=1
cmpl , -8(%rbp)
jge .LBB1_4
# %bb.2: # %for.body
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %esi
movabsq $_ZSt4cout, %rdi
callq _ZNSolsEi
movq %rax, %rdi
movabsq $_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_, %rsi
callq _ZNSolsEPFRSoS_E
# %bb.3: # %for.inc
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %eax
addl , %eax
movl %eax, -8(%rbp)
jmp .LBB1_1
.LBB1_4: # %for.end
xorl %eax, %eax
addq , %rsp
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size main, .Lfunc_end1-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
callq __cxx_global_var_init
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end2-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym __cxx_global_var_init
.addrsig_sym __cxa_atexit
.addrsig_sym _ZNSolsEi
.addrsig_sym _ZNSolsEPFRSoS_E
.addrsig_sym _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout
对于任何出现在这个线程中寻找答案的人 - @AlanBirtles 给了我一些很好的提示。首先是原始构建来自主干,而不是标记的分支,因此可能存在错误。然而,在从源代码重建 clang++
12、13 和 14 之后,运气不佳。然而,正如他稍后提到的,使用的 libstdc++
可能存在兼容性问题。在从新 gcc
重新构建 gcc
和 clang
之后,宇宙一切正常。从某种意义上说,这是一个不能令人满意的答案,因为我不确定不兼容的性质。但是,如果有人偶然发现这个线程遇到同样的问题,你知道该怎么做。
在编译简单程序时添加 -O2
优化器标志时注意到非常奇怪的行为。
#include <iostream>
int main() {
for (int i = 0; i < 10; i++) {
std::cout << i << std::endl;
}
return 0;
}
程序输出:
1
2
3
4
5
6
7
...
1228881
...
该程序在 -O3
或 -Ofast
标志下按预期工作。
运行的输出clang++ -O2 -v test.cpp
:
vm-hw06{mrussell}125: clang++ -O2 -v test.cpp
clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.co\
m/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /comp/15/bin
Found candidate GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Selected GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Candidate multilib: .;@m64
Selected multilib: .;@m64
"/h/mrussell/clang/bin/clang-10" -cc1 -triple x86_64-unknown-linux-gnu -emit-obj -disable-free -main-file-name test.cpp -mrelocation-model static -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -v -resource-dir /h/mrussell/clang/lib/clang/10.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0/x86_64-pc-linux-gnu -internal-isystem /h/mrussell
奇怪的是,删除 << endl
可以修复任何错误。同样,将 std::cout
行替换为 printf
会按预期工作。更新 - 删除 << i
但保留 << std::endl
也有效 - 打印 10 个空白行!在这里感到困惑,任何帮助将不胜感激!
PS:请注意,我基于此文档构建了 clang++:https://btorpey.github.io/blog/2015/01/02/building-clang/ - 已经使用它来编译基本程序 >1 年没有出现此类问题。
更新:
clang++ -O2 -S test.cpp
.text
.file "test.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
pushq %r14
.cfi_def_cfa_offset 24
pushq %rbx
.cfi_def_cfa_offset 32
.cfi_offset %rbx, -32
.cfi_offset %r14, -24
.cfi_offset %rbp, -16
xorl %r14d, %r14d
jmp .LBB0_1
.p2align 4, 0x90
.LBB0_4: # %if.end.i
# in Loop: Header=BB0_1 Depth=1
movq %rbp, %rdi
callq _ZNKSt5ctypeIcE13_M_widen_initEv
movq (%rbp), %rax
movq %rbp, %rdi
movl , %esi
callq *48(%rax)
.LBB0_5: # %_ZNKSt5ctypeIcE5widenEc.exit
# in Loop: Header=BB0_1 Depth=1
movsbl %al, %esi
movq %rbx, %rdi
callq _ZNSo3putEc
movq %rax, %rdi
callq _ZNSo5flushEv
addl , %r14d
cmpl $-101, %r14d
je .LBB0_6
.LBB0_1: # %for.body
# =>This Inner Loop Header: Depth=1
movl $_ZSt4cout, %edi
movl %r14d, %esi
callq _ZNSolsEi
movq %rax, %rbx
movq (%rax), %rax
movq -24(%rax), %rax
movq 240(%rbx,%rax), %rbp
testq %rbp, %rbp
je .LBB0_7
# %bb.2: # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit
# in Loop: Header=BB0_1 Depth=1
cmpb [=13=], 56(%rbp)
je .LBB0_4
# %bb.3: # %if.then.i
# in Loop: Header=BB0_1 Depth=1
movzbl 67(%rbp), %eax
jmp .LBB0_5
.LBB0_6: # %for.cond.cleanup
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
retq
.LBB0_7: # %if.then.i10
.cfi_def_cfa_offset 32
callq _ZSt16__throw_bad_castv
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rax
.cfi_def_cfa_offset 16
movl $_ZStL8__ioinit, %edi
callq _ZNSt8ios_base4InitC1Ev
movl $_ZNSt8ios_base4InitD1Ev, %edi
movl $_ZStL8__ioinit, %esi
movl $__dso_handle, %edx
popq %rax
.cfi_def_cfa_offset 8
jmp __cxa_atexit # TAILCALL
.Lfunc_end1:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end1-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout
并且,作为参考,clang++ -S test.cpp
.text
.file "test.cpp"
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function __cxx_global_var_init
.type __cxx_global_var_init,@function
__cxx_global_var_init: # @__cxx_global_var_init
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movabsq $_ZStL8__ioinit, %rdi
callq _ZNSt8ios_base4InitC1Ev
movabsq $_ZNSt8ios_base4InitD1Ev, %rax
movq %rax, %rdi
movabsq $_ZStL8__ioinit, %rsi
movabsq $__dso_handle, %rdx
callq __cxa_atexit
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size __cxx_global_var_init, .Lfunc_end0-__cxx_global_var_init
.cfi_endproc
# -- End function
.text
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq , %rsp
movl [=14=], -4(%rbp)
movl [=14=], -8(%rbp)
.LBB1_1: # %for.cond
# =>This Inner Loop Header: Depth=1
cmpl , -8(%rbp)
jge .LBB1_4
# %bb.2: # %for.body
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %esi
movabsq $_ZSt4cout, %rdi
callq _ZNSolsEi
movq %rax, %rdi
movabsq $_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_, %rsi
callq _ZNSolsEPFRSoS_E
# %bb.3: # %for.inc
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %eax
addl , %eax
movl %eax, -8(%rbp)
jmp .LBB1_1
.LBB1_4: # %for.end
xorl %eax, %eax
addq , %rsp
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size main, .Lfunc_end1-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
callq __cxx_global_var_init
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end2-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym __cxx_global_var_init
.addrsig_sym __cxa_atexit
.addrsig_sym _ZNSolsEi
.addrsig_sym _ZNSolsEPFRSoS_E
.addrsig_sym _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout
对于任何出现在这个线程中寻找答案的人 - @AlanBirtles 给了我一些很好的提示。首先是原始构建来自主干,而不是标记的分支,因此可能存在错误。然而,在从源代码重建 clang++
12、13 和 14 之后,运气不佳。然而,正如他稍后提到的,使用的 libstdc++
可能存在兼容性问题。在从新 gcc
重新构建 gcc
和 clang
之后,宇宙一切正常。从某种意义上说,这是一个不能令人满意的答案,因为我不确定不兼容的性质。但是,如果有人偶然发现这个线程遇到同样的问题,你知道该怎么做。