perf 启用调用图的分解
perf enable demangling of callgraph
如何为 perf 调用图启用 C++ 分解?当我进入注释模式时,它似乎会分解符号,但在主调用图中不会。
示例代码(使用Google Benchmark):
#include <benchmark/benchmark.h>
#include <vector>
static __attribute__ ((noinline)) int my_really_big_function()
{
for(size_t i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(i % 5);
}
return 0;
}
static __attribute__ ((noinline)) void caller1()
{
for(size_t i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(my_really_big_function());
benchmark::DoNotOptimize(i % 5);
}
}
static __attribute__ ((noinline)) void myfun(benchmark::State& state)
{
while(state.KeepRunning())
{
caller1();
}
}
BENCHMARK(myfun);
BENCHMARK_MAIN();
构建命令:
clang++ main.cpp -o main -fno-omit-frame-pointer -O0 -lpthread -lbenchmark
性能命令:
perf record -g ./main
perf report -g 'graph,0.5,caller'
我也试过启用 --demangle 选项,但这似乎不会影响输出。
调用图缺少 demangled 符号:
Samples: 3K of event 'cycles', Event count (approx.): 2946754102
Children Self Command Shared Object Symbol
+ 99.82% 0.00% main main [.] _ZL5myfunRN9benchmark5StateE
+ 99.82% 0.00% main main [.] _ZN9benchmark12_GLOBAL__N_111RunInThreadEPKNS_8internal9Benchmark8InstanceEmiPNS0_11ThreadStatsE
+ 99.82% 0.00% main main [.] _ZN9benchmark22RunSpecifiedBenchmarksEPNS_17BenchmarkReporterE
+ 99.82% 0.00% main main [.] main
+ 99.82% 0.00% main libc-2.21.so [.] __libc_start_main
+ 99.82% 0.00% main [unknown] [.] 0x7fbe258d4c544155
+ 99.75% 0.30% main main [.] _ZL7caller1v
+ 99.52% 99.46% main main [.] _ZL22my_really_big_functionv
带注释的反汇编显示 demangled 调用:
│
│ 0000000000404310 <caller1()>:
│ _ZL7caller1v():
│ push %rbp
│ mov %rsp,%rbp
| [=14=]x30,%rsp
| [=14=]x0,-0x18(%rbp)
│10: cmpq [=14=]x3e8,-0x18(%rbp)
│ ↓ jae 6f
│ → callq my_really_big_function()
│ lea -0x1c(%rbp),%rcx
│ mov %eax,-0x1c(%rbp)
14.29 │ mov %rcx,-0x10(%rbp)
│ mov -0x10(%rbp),%rcx
│ lea -0x28(%rbp),%rcx
│ mov [=14=]x5,%eax
│ mov %eax,%edx
│ mov -0x18(%rbp),%rax
│ xor %esi,%esi
│ mov %rdx,-0x30(%rbp)
│ mov %esi,%edx
│ mov -0x30(%rbp),%rdi
│ div %rdi
85.71 │ mov %rdx,-0x28(%rbp)
│ mov %rcx,-0x8(%rbp)
│ mov -0x8(%rbp),%rcx
│ mov -0x18(%rbp),%rax
│ add [=14=]x1,%rax
│ mov %rax,-0x18(%rbp)
│ ↑ jmpq 10
│6f: add [=14=]x30,%rsp
│ pop %rbp
│ ← retq
系统信息:
- Ubuntu 15.04 64 位
- 英特尔 i5-6600k
- 性能 3.19.8-ckt6
- clang 3.6.0-2ubuntu1
我在 Ubuntu 15.10 上遇到了同样的问题,我在这里找到了解决方案:https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1396654
更新:也适用于 Ubuntu 18.10
步骤如下:
sudo apt-get install libiberty-dev binutils-dev
mkdir ~/install
cd ~/install
# If the following apt-get doesn't work on your system,
# uncomment deb-src lines in your /etc/apt/sources.list,
# as suggested by @ctitze
# or you can download it manually from packages.ubuntu.com
# as @aleixrocks suggested in the comment below
apt-get source linux-tools-`uname -r`
sudo apt-get build-dep linux-tools-`uname -r`
cd linux-`uname -r | sed 's/-.*//'`/tools/perf
make
# now you should see the new "perf" executable here
./perf
还应该有一些方法可以创建一个新的 linux-tools-common 包以将其真正集成到您的系统中。现在要用你的新性能覆盖官方性能,只需设置你的路径:
export PATH=~/install/linux-`uname -r | sed 's/-.*//'`/tools/perf:$PATH
如果您不明白要从 packages.ubuntu.com 下载什么(如第一个答案),那么您也可以 从 git 下载 linux 内核源代码]:
sudo apt-get install libiberty-dev binutils-dev
mkdir ~/install
cd ~/install
git clone https://github.com/torvalds/linux --depth 1
cd linux/tools/perf
make
# now you should see the new "perf" executable here
./perf
并修改路径(如第一个答案):
export PATH=~/install/linux/tools/perf:$PATH
如何为 perf 调用图启用 C++ 分解?当我进入注释模式时,它似乎会分解符号,但在主调用图中不会。
示例代码(使用Google Benchmark):
#include <benchmark/benchmark.h>
#include <vector>
static __attribute__ ((noinline)) int my_really_big_function()
{
for(size_t i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(i % 5);
}
return 0;
}
static __attribute__ ((noinline)) void caller1()
{
for(size_t i = 0; i < 1000; ++i)
{
benchmark::DoNotOptimize(my_really_big_function());
benchmark::DoNotOptimize(i % 5);
}
}
static __attribute__ ((noinline)) void myfun(benchmark::State& state)
{
while(state.KeepRunning())
{
caller1();
}
}
BENCHMARK(myfun);
BENCHMARK_MAIN();
构建命令:
clang++ main.cpp -o main -fno-omit-frame-pointer -O0 -lpthread -lbenchmark
性能命令:
perf record -g ./main
perf report -g 'graph,0.5,caller'
我也试过启用 --demangle 选项,但这似乎不会影响输出。
调用图缺少 demangled 符号:
Samples: 3K of event 'cycles', Event count (approx.): 2946754102
Children Self Command Shared Object Symbol
+ 99.82% 0.00% main main [.] _ZL5myfunRN9benchmark5StateE
+ 99.82% 0.00% main main [.] _ZN9benchmark12_GLOBAL__N_111RunInThreadEPKNS_8internal9Benchmark8InstanceEmiPNS0_11ThreadStatsE
+ 99.82% 0.00% main main [.] _ZN9benchmark22RunSpecifiedBenchmarksEPNS_17BenchmarkReporterE
+ 99.82% 0.00% main main [.] main
+ 99.82% 0.00% main libc-2.21.so [.] __libc_start_main
+ 99.82% 0.00% main [unknown] [.] 0x7fbe258d4c544155
+ 99.75% 0.30% main main [.] _ZL7caller1v
+ 99.52% 99.46% main main [.] _ZL22my_really_big_functionv
带注释的反汇编显示 demangled 调用:
│
│ 0000000000404310 <caller1()>:
│ _ZL7caller1v():
│ push %rbp
│ mov %rsp,%rbp
| [=14=]x30,%rsp
| [=14=]x0,-0x18(%rbp)
│10: cmpq [=14=]x3e8,-0x18(%rbp)
│ ↓ jae 6f
│ → callq my_really_big_function()
│ lea -0x1c(%rbp),%rcx
│ mov %eax,-0x1c(%rbp)
14.29 │ mov %rcx,-0x10(%rbp)
│ mov -0x10(%rbp),%rcx
│ lea -0x28(%rbp),%rcx
│ mov [=14=]x5,%eax
│ mov %eax,%edx
│ mov -0x18(%rbp),%rax
│ xor %esi,%esi
│ mov %rdx,-0x30(%rbp)
│ mov %esi,%edx
│ mov -0x30(%rbp),%rdi
│ div %rdi
85.71 │ mov %rdx,-0x28(%rbp)
│ mov %rcx,-0x8(%rbp)
│ mov -0x8(%rbp),%rcx
│ mov -0x18(%rbp),%rax
│ add [=14=]x1,%rax
│ mov %rax,-0x18(%rbp)
│ ↑ jmpq 10
│6f: add [=14=]x30,%rsp
│ pop %rbp
│ ← retq
系统信息:
- Ubuntu 15.04 64 位
- 英特尔 i5-6600k
- 性能 3.19.8-ckt6
- clang 3.6.0-2ubuntu1
我在 Ubuntu 15.10 上遇到了同样的问题,我在这里找到了解决方案:https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1396654
更新:也适用于 Ubuntu 18.10
步骤如下:
sudo apt-get install libiberty-dev binutils-dev
mkdir ~/install
cd ~/install
# If the following apt-get doesn't work on your system,
# uncomment deb-src lines in your /etc/apt/sources.list,
# as suggested by @ctitze
# or you can download it manually from packages.ubuntu.com
# as @aleixrocks suggested in the comment below
apt-get source linux-tools-`uname -r`
sudo apt-get build-dep linux-tools-`uname -r`
cd linux-`uname -r | sed 's/-.*//'`/tools/perf
make
# now you should see the new "perf" executable here
./perf
还应该有一些方法可以创建一个新的 linux-tools-common 包以将其真正集成到您的系统中。现在要用你的新性能覆盖官方性能,只需设置你的路径:
export PATH=~/install/linux-`uname -r | sed 's/-.*//'`/tools/perf:$PATH
如果您不明白要从 packages.ubuntu.com 下载什么(如第一个答案),那么您也可以 从 git 下载 linux 内核源代码]:
sudo apt-get install libiberty-dev binutils-dev
mkdir ~/install
cd ~/install
git clone https://github.com/torvalds/linux --depth 1
cd linux/tools/perf
make
# now you should see the new "perf" executable here
./perf
并修改路径(如第一个答案):
export PATH=~/install/linux/tools/perf:$PATH