如何强制 g++ 内联 STL 函数
How to force g++ to inline STL functions
测试代码:
#include <array>
int test(const std::array<int, 10> &arr) {
return arr[9];
}
我想让 arr[0]
和 C 风格数组一样高效,这意味着内联 STL 数组 [] 运算符函数。
我检查了生成汇编代码:
$ g++ --std=c++17 -c test.cpp && objdump -d -C test.o
test.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <test(std::array<int, 10ul> const&)>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub [=11=]x10,%rsp
8: 48 89 7d f8 mov %rdi,0xfffffffffffffff8(%rbp)
c: 48 8b 45 f8 mov 0xfffffffffffffff8(%rbp),%rax
10: be 09 00 00 00 mov [=11=]x9,%esi
15: 48 89 c7 mov %rax,%rdi
18: e8 00 00 00 00 callq 1d <test(std::array<int, 10ul> const&)+0x1d>
1d: 8b 00 mov (%rax),%eax
1f: c9 leaveq
20: c3 retq
Disassembly of section .text._ZNKSt5arrayIiLm10EEixEm:
0000000000000000 <std::array<int, 10ul>::operator[](unsigned long) const>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub [=11=]x10,%rsp
8: 48 89 7d f8 mov %rdi,0xfffffffffffffff8(%rbp)
c: 48 89 75 f0 mov %rsi,0xfffffffffffffff0(%rbp)
10: 48 8b 45 f8 mov 0xfffffffffffffff8(%rbp),%rax
14: 48 8b 55 f0 mov 0xfffffffffffffff0(%rbp),%rdx
18: 48 89 d6 mov %rdx,%rsi
1b: 48 89 c7 mov %rax,%rdi
1e: e8 00 00 00 00 callq 23 <std::array<int, 10ul>::operator[](unsigned long) const+0x23>
23: c9 leaveq
24: c3 retq
Disassembly of section .text._ZNSt14__array_traitsIiLm10EE6_S_refERA10_Kim:
0000000000000000 <std::__array_traits<int, 10ul>::_S_ref(int const (&) [10], unsigned long)>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 89 7d f8 mov %rdi,0xfffffffffffffff8(%rbp)
8: 48 89 75 f0 mov %rsi,0xfffffffffffffff0(%rbp)
c: 48 8b 45 f0 mov 0xfffffffffffffff0(%rbp),%rax
10: 48 8d 14 85 00 00 00 lea 0x0(,%rax,4),%rdx
17: 00
18: 48 8b 45 f8 mov 0xfffffffffffffff8(%rbp),%rax
1c: 48 01 d0 add %rdx,%rax
1f: 5d pop %rbp
20: c3 retq
arr[9]
是生成代码中的函数调用 1d <test(std::array<int, 10ul> const&)+0x1d>
。
如果我指定了优化级别,STL 函数将按预期内联:
$ g++ --std=c++17 -Og -c test.cpp && objdump -d -C test.o
test.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <test(std::array<int, 10ul> const&)>:
0: 8b 47 24 mov 0x24(%rdi),%eax
3: c3 retq
但是我的真实项目是一个大项目,我无法更改全局编译优化标志。所以我想为一些文件指定优化标志。
所以我在我的程序中添加#pragma GCC optimize ("string"...)
:
#pragma GCC optimize ("-Og")
#include <array>
int test(const std::array<int, 10> &arr) {
return arr[9];
}
这个选项确实有些道理:
$ g++ --std=c++17 -c test.cpp && objdump -d -C test.o
test.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <test(std::array<int, 10ul> const&)>:
0: 48 83 ec 08 sub [=14=]x8,%rsp
4: be 09 00 00 00 mov [=14=]x9,%esi
9: e8 00 00 00 00 callq e <test(std::array<int, 10ul> const&)+0xe>
e: 8b 00 mov (%rax),%eax
10: 48 83 c4 08 add [=14=]x8,%rsp
14: c3 retq
Disassembly of section .text._ZNKSt5arrayIiLm10EEixEm:
0000000000000000 <std::array<int, 10ul>::operator[](unsigned long) const>:
0: 48 83 ec 08 sub [=14=]x8,%rsp
4: e8 00 00 00 00 callq 9 <std::array<int, 10ul>::operator[](unsigned long) const+0x9>
9: 48 83 c4 08 add [=14=]x8,%rsp
d: c3 retq
Disassembly of section .text._ZNSt14__array_traitsIiLm10EE6_S_refERA10_Kim:
0000000000000000 <std::__array_traits<int, 10ul>::_S_ref(int const (&) [10], unsigned long)>:
0: 48 8d 04 b7 lea (%rdi,%rsi,4),%rax
4: c3 retq
<std::array<int, 10ul>::operator[]
& __array_traits
函数优化了,但是我们可以看到,还有一个函数调用:callq e <test(std::array<int, 10ul> const&)+0xe>
.
所以我想知道为什么#pragma GCC optimize ("-Og")
确实像我预期的那样生效了。我想知道如何为指定文件强制内联 STL 函数?
注:GCC 版本:8.2
#pragma GCC optimize ("Og")
默认不启用内联,因为不优化时默认是 -fno-inline
.
使用#pragma GCC optimize ("Og,inline")
启用内联。
测试代码:
#include <array>
int test(const std::array<int, 10> &arr) {
return arr[9];
}
我想让 arr[0]
和 C 风格数组一样高效,这意味着内联 STL 数组 [] 运算符函数。
我检查了生成汇编代码:
$ g++ --std=c++17 -c test.cpp && objdump -d -C test.o
test.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <test(std::array<int, 10ul> const&)>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub [=11=]x10,%rsp
8: 48 89 7d f8 mov %rdi,0xfffffffffffffff8(%rbp)
c: 48 8b 45 f8 mov 0xfffffffffffffff8(%rbp),%rax
10: be 09 00 00 00 mov [=11=]x9,%esi
15: 48 89 c7 mov %rax,%rdi
18: e8 00 00 00 00 callq 1d <test(std::array<int, 10ul> const&)+0x1d>
1d: 8b 00 mov (%rax),%eax
1f: c9 leaveq
20: c3 retq
Disassembly of section .text._ZNKSt5arrayIiLm10EEixEm:
0000000000000000 <std::array<int, 10ul>::operator[](unsigned long) const>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 83 ec 10 sub [=11=]x10,%rsp
8: 48 89 7d f8 mov %rdi,0xfffffffffffffff8(%rbp)
c: 48 89 75 f0 mov %rsi,0xfffffffffffffff0(%rbp)
10: 48 8b 45 f8 mov 0xfffffffffffffff8(%rbp),%rax
14: 48 8b 55 f0 mov 0xfffffffffffffff0(%rbp),%rdx
18: 48 89 d6 mov %rdx,%rsi
1b: 48 89 c7 mov %rax,%rdi
1e: e8 00 00 00 00 callq 23 <std::array<int, 10ul>::operator[](unsigned long) const+0x23>
23: c9 leaveq
24: c3 retq
Disassembly of section .text._ZNSt14__array_traitsIiLm10EE6_S_refERA10_Kim:
0000000000000000 <std::__array_traits<int, 10ul>::_S_ref(int const (&) [10], unsigned long)>:
0: 55 push %rbp
1: 48 89 e5 mov %rsp,%rbp
4: 48 89 7d f8 mov %rdi,0xfffffffffffffff8(%rbp)
8: 48 89 75 f0 mov %rsi,0xfffffffffffffff0(%rbp)
c: 48 8b 45 f0 mov 0xfffffffffffffff0(%rbp),%rax
10: 48 8d 14 85 00 00 00 lea 0x0(,%rax,4),%rdx
17: 00
18: 48 8b 45 f8 mov 0xfffffffffffffff8(%rbp),%rax
1c: 48 01 d0 add %rdx,%rax
1f: 5d pop %rbp
20: c3 retq
arr[9]
是生成代码中的函数调用 1d <test(std::array<int, 10ul> const&)+0x1d>
。
如果我指定了优化级别,STL 函数将按预期内联:
$ g++ --std=c++17 -Og -c test.cpp && objdump -d -C test.o
test.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <test(std::array<int, 10ul> const&)>:
0: 8b 47 24 mov 0x24(%rdi),%eax
3: c3 retq
但是我的真实项目是一个大项目,我无法更改全局编译优化标志。所以我想为一些文件指定优化标志。
所以我在我的程序中添加#pragma GCC optimize ("string"...)
:
#pragma GCC optimize ("-Og")
#include <array>
int test(const std::array<int, 10> &arr) {
return arr[9];
}
这个选项确实有些道理:
$ g++ --std=c++17 -c test.cpp && objdump -d -C test.o
test.o: file format elf64-x86-64
Disassembly of section .text:
0000000000000000 <test(std::array<int, 10ul> const&)>:
0: 48 83 ec 08 sub [=14=]x8,%rsp
4: be 09 00 00 00 mov [=14=]x9,%esi
9: e8 00 00 00 00 callq e <test(std::array<int, 10ul> const&)+0xe>
e: 8b 00 mov (%rax),%eax
10: 48 83 c4 08 add [=14=]x8,%rsp
14: c3 retq
Disassembly of section .text._ZNKSt5arrayIiLm10EEixEm:
0000000000000000 <std::array<int, 10ul>::operator[](unsigned long) const>:
0: 48 83 ec 08 sub [=14=]x8,%rsp
4: e8 00 00 00 00 callq 9 <std::array<int, 10ul>::operator[](unsigned long) const+0x9>
9: 48 83 c4 08 add [=14=]x8,%rsp
d: c3 retq
Disassembly of section .text._ZNSt14__array_traitsIiLm10EE6_S_refERA10_Kim:
0000000000000000 <std::__array_traits<int, 10ul>::_S_ref(int const (&) [10], unsigned long)>:
0: 48 8d 04 b7 lea (%rdi,%rsi,4),%rax
4: c3 retq
<std::array<int, 10ul>::operator[]
& __array_traits
函数优化了,但是我们可以看到,还有一个函数调用:callq e <test(std::array<int, 10ul> const&)+0xe>
.
所以我想知道为什么#pragma GCC optimize ("-Og")
确实像我预期的那样生效了。我想知道如何为指定文件强制内联 STL 函数?
注:GCC 版本:8.2
#pragma GCC optimize ("Og")
默认不启用内联,因为不优化时默认是 -fno-inline
.
使用#pragma GCC optimize ("Og,inline")
启用内联。