golang、ebpf 和函数持续时间
golang, ebpf and functions duration
我正在玩 gobpf,但在计算跟踪用户的持续时间时遇到了问题-space 函数。
我使用 bpf_ktime_get_ns() 来读取时间,然后尝试计算增量,但得到了巨大的数字,尽管跟踪函数只休眠了 1 秒。
这里是经过测试的C程序,其中有一个函数叫做"ameba"。
#include <stdio.h>
#include <strings.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
char * ameba(char * s1, char * s2);
int main(void) {
time_t rawtime;
struct tm * timeinfo;
time(&rawtime);
timeinfo = localtime(&rawtime);
printf("enter: %s", asctime (timeinfo));
printf("%s\n", ameba("lessqqmorepewpew", "good luck, have fun"));
time(&rawtime);
timeinfo = localtime(&rawtime);
printf("return: %s", asctime(timeinfo));
}
char * ameba(char * s1, char * s2) {
char *s;
s = (char *) malloc(128);
sleep(1);
snprintf(s, 128, "phrase1: %s, phrase2: %s", s1, s2);
return s;
}
转到代码
package main
import (
"bytes"
"encoding/binary"
"fmt"
"os"
"os/signal"
"time"
bpf "github.com/iovisor/gobpf/bcc"
)
const source string = `
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
struct val_t {
u32 pid;
char comm[TASK_COMM_LEN];
u64 ts;
};
struct data_t {
u32 pid;
char comm[TASK_COMM_LEN];
u64 delta;
};
BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(ameba_events);
int do_entry(struct pt_regs *ctx) {
if (!PT_REGS_PARM1(ctx))
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
}
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
struct data_t data = {};
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
u64 tsp = bpf_ktime_get_ns();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
data.pid = valp->pid;
data.delta = tsp - valp->ts;
ameba_events.perf_submit(ctx, &data, sizeof(data));
start.delete(&pid);
return 0;
}
`
type amebaEvent struct {
Pid uint32
Comm [16]byte
Delta uint64
}
func main() {
m := bpf.NewModule(source, []string{})
defer m.Close()
amebaUprobe, err := m.LoadUprobe("do_entry")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to load do_entry: %s\n", err)
os.Exit(1)
}
amebaUretprobe, err := m.LoadUprobe("do_return")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to load do_return: %s\n", err)
os.Exit(1)
}
err = m.AttachUprobe("/tmp/sandbox/ameba", "ameba", amebaUprobe, -1)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to attach do_entry uprobe: %s\n", err)
os.Exit(1)
}
err = m.AttachUretprobe("/tmp/sandbox/ameba", "ameba", amebaUretprobe, -1)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to attach do_return uretprobe: %s\n", err)
os.Exit(1)
}
table := bpf.NewTable(m.TableId("ameba_events"), m)
channel := make(chan []byte)
perfMap, err := bpf.InitPerfMap(table, channel)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to init perf map: %s\n", err)
os.Exit(1)
}
sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, os.Kill)
fmt.Printf("%10s\t%s\t%s\t%s\n", "PID", "COMMAND", "DURATION", "RAW")
go func() {
var event amebaEvent
for {
data := <-channel
err := binary.Read(bytes.NewBuffer(data), binary.LittleEndian, &event)
if err != nil {
fmt.Printf("failed to decode received data: %s\n", err)
continue
}
// Convert C string (null-terminated) to Go string
comm := string(event.Comm[:bytes.IndexByte(event.Comm[:], 0)])
fmt.Printf("%10d\t%s\t%s\t%d\n", event.Pid, comm, time.Duration(event.Delta), event.Delta)
}
}()
perfMap.Start()
<-sig
perfMap.Stop()
}
程序输出示例:
PID 命令持续时间原始
15515 阿米巴 1193179h21m9.457496929s 4295445669457496929
15550 阿米巴 1193198h27m37.653709676s 4295514457653709676
哪里错了??
这看起来像是内核和用户端之间 paddings 不匹配造成的。 data_t
结构实际上在编译时被填充为等同于以下内容:
struct data_t {
u32 pid;
char padding[4];
char comm[TASK_COMM_LEN];
u64 delta;
};
如果您在 Go 端明确添加相同的填充,您的问题就会消失:
type amebaEvent struct {
Pid uint32
Pad [4]byte
Comm [16]byte
Delta uint64
}
产生:
PID COMMAND DURATION RAW
8258 a 1.000179625s 1000179625
8260 a 1.000158337s 1000158337
正如您在评论中提到的,另一种解决方案是打包 C 结构以删除填充,__attribute__((packed))
。
它与 bcc 上的这个问题密切相关——尽管不完全相同:https://github.com/iovisor/bcc/issues/2017。
我正在玩 gobpf,但在计算跟踪用户的持续时间时遇到了问题-space 函数。 我使用 bpf_ktime_get_ns() 来读取时间,然后尝试计算增量,但得到了巨大的数字,尽管跟踪函数只休眠了 1 秒。
这里是经过测试的C程序,其中有一个函数叫做"ameba"。
#include <stdio.h>
#include <strings.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
char * ameba(char * s1, char * s2);
int main(void) {
time_t rawtime;
struct tm * timeinfo;
time(&rawtime);
timeinfo = localtime(&rawtime);
printf("enter: %s", asctime (timeinfo));
printf("%s\n", ameba("lessqqmorepewpew", "good luck, have fun"));
time(&rawtime);
timeinfo = localtime(&rawtime);
printf("return: %s", asctime(timeinfo));
}
char * ameba(char * s1, char * s2) {
char *s;
s = (char *) malloc(128);
sleep(1);
snprintf(s, 128, "phrase1: %s, phrase2: %s", s1, s2);
return s;
}
转到代码
package main
import (
"bytes"
"encoding/binary"
"fmt"
"os"
"os/signal"
"time"
bpf "github.com/iovisor/gobpf/bcc"
)
const source string = `
#include <uapi/linux/ptrace.h>
#include <linux/sched.h>
struct val_t {
u32 pid;
char comm[TASK_COMM_LEN];
u64 ts;
};
struct data_t {
u32 pid;
char comm[TASK_COMM_LEN];
u64 delta;
};
BPF_HASH(start, u32, struct val_t);
BPF_PERF_OUTPUT(ameba_events);
int do_entry(struct pt_regs *ctx) {
if (!PT_REGS_PARM1(ctx))
return 0;
struct val_t val = {};
u32 pid = bpf_get_current_pid_tgid();
if (bpf_get_current_comm(&val.comm, sizeof(val.comm)) == 0) {
val.pid = bpf_get_current_pid_tgid();
val.ts = bpf_ktime_get_ns();
start.update(&pid, &val);
}
return 0;
}
int do_return(struct pt_regs *ctx) {
struct val_t *valp;
struct data_t data = {};
u64 delta;
u32 pid = bpf_get_current_pid_tgid();
u64 tsp = bpf_ktime_get_ns();
valp = start.lookup(&pid);
if (valp == 0)
return 0; // missed start
bpf_probe_read(&data.comm, sizeof(data.comm), valp->comm);
data.pid = valp->pid;
data.delta = tsp - valp->ts;
ameba_events.perf_submit(ctx, &data, sizeof(data));
start.delete(&pid);
return 0;
}
`
type amebaEvent struct {
Pid uint32
Comm [16]byte
Delta uint64
}
func main() {
m := bpf.NewModule(source, []string{})
defer m.Close()
amebaUprobe, err := m.LoadUprobe("do_entry")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to load do_entry: %s\n", err)
os.Exit(1)
}
amebaUretprobe, err := m.LoadUprobe("do_return")
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to load do_return: %s\n", err)
os.Exit(1)
}
err = m.AttachUprobe("/tmp/sandbox/ameba", "ameba", amebaUprobe, -1)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to attach do_entry uprobe: %s\n", err)
os.Exit(1)
}
err = m.AttachUretprobe("/tmp/sandbox/ameba", "ameba", amebaUretprobe, -1)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to attach do_return uretprobe: %s\n", err)
os.Exit(1)
}
table := bpf.NewTable(m.TableId("ameba_events"), m)
channel := make(chan []byte)
perfMap, err := bpf.InitPerfMap(table, channel)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to init perf map: %s\n", err)
os.Exit(1)
}
sig := make(chan os.Signal, 1)
signal.Notify(sig, os.Interrupt, os.Kill)
fmt.Printf("%10s\t%s\t%s\t%s\n", "PID", "COMMAND", "DURATION", "RAW")
go func() {
var event amebaEvent
for {
data := <-channel
err := binary.Read(bytes.NewBuffer(data), binary.LittleEndian, &event)
if err != nil {
fmt.Printf("failed to decode received data: %s\n", err)
continue
}
// Convert C string (null-terminated) to Go string
comm := string(event.Comm[:bytes.IndexByte(event.Comm[:], 0)])
fmt.Printf("%10d\t%s\t%s\t%d\n", event.Pid, comm, time.Duration(event.Delta), event.Delta)
}
}()
perfMap.Start()
<-sig
perfMap.Stop()
}
程序输出示例:
PID 命令持续时间原始
15515 阿米巴 1193179h21m9.457496929s 4295445669457496929
15550 阿米巴 1193198h27m37.653709676s 4295514457653709676
哪里错了??
这看起来像是内核和用户端之间 paddings 不匹配造成的。 data_t
结构实际上在编译时被填充为等同于以下内容:
struct data_t {
u32 pid;
char padding[4];
char comm[TASK_COMM_LEN];
u64 delta;
};
如果您在 Go 端明确添加相同的填充,您的问题就会消失:
type amebaEvent struct {
Pid uint32
Pad [4]byte
Comm [16]byte
Delta uint64
}
产生:
PID COMMAND DURATION RAW
8258 a 1.000179625s 1000179625
8260 a 1.000158337s 1000158337
正如您在评论中提到的,另一种解决方案是打包 C 结构以删除填充,__attribute__((packed))
。
它与 bcc 上的这个问题密切相关——尽管不完全相同:https://github.com/iovisor/bcc/issues/2017。