From 7f403067288f05335f7d8b8283ae1dac6cb1ab31 Mon Sep 17 00:00:00 2001 From: Howard Chu Date: Sun, 25 Aug 2024 00:33:16 +0800 Subject: [PATCH] perf trace: Add trace__bpf_sys_enter_beauty_map() to prepare for fetching data in BPF MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set up beauty_map, load it to BPF, in such format: if argument No.3 is a struct of size 32 bytes (of syscall number 114) beauty_map[114][2] = 32; if argument No.3 is a string (of syscall number 114) beauty_map[114][2] = 1; if argument No.3 is a buffer, its size is indicated by argument No.4 (of syscall number 114) beauty_map[114][2] = -4; /* -1 ~ -6, we'll read this buffer size in BPF */ Committer notes: Moved syscall_arg_fmt__cache_btf_struct() from a ifdef HAVE_LIBBPF_SUPPORT to closer to where it is used, that is ifdef'ed on HAVE_BPF_SKEL and thus breaks the build when building with BUILD_BPF_SKEL=0, as detected using 'make -C tools/perf build-test'. Also add 'struct beauty_map_enter' to tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c as we're using it in this patch, otherwise we get this while trying to build at this point in the original patch series: builtin-trace.c: In function ‘trace__init_syscalls_bpf_prog_array_maps’: builtin-trace.c:3725:58: error: ‘struct ’ has no member named ‘beauty_map_enter’ 3725 | int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter); | We also have to take into account syscall_arg_fmt.from_user when telling the kernel what to copy in the sys_enter generic collector, we don't want to collect bogus data in buffers that will only be available to us at sys_exit time, i.e. after the kernel has filled it, so leave this for when we have such a sys_exit based collector. Committer testing: Not wired up yet, so all continues to work, using the existing BPF collector and userspace beautifiers that are augmentation aware: root@number:~# rm -f 987654 ; touch 123456 ; perf trace -e rename* mv 123456 987654 0.000 ( 0.031 ms): mv/20888 renameat2(olddfd: CWD, oldname: "123456", newdfd: CWD, newname: "987654", flags: NOREPLACE) = 0 root@number:~# perf trace -e connect,sendto ping -c 1 www.google.com 0.000 ( 0.014 ms): ping/20892 connect(fd: 5, uservaddr: { .family: LOCAL, path: /run/systemd/resolve/io.systemd.Resolve }, addrlen: 42) = 0 0.040 ( 0.003 ms): ping/20892 sendto(fd: 5, buff: 0x560b4ff17980, len: 97, flags: DONTWAIT|NOSIGNAL) = 97 0.480 ( 0.017 ms): ping/20892 sendto(fd: 5, buff: 0x7ffd82d07150, len: 20, addr: { .family: NETLINK }, addr_len: 0xc) = 20 0.526 ( 0.014 ms): ping/20892 connect(fd: 5, uservaddr: { .family: INET6, port: 0, addr: 2800:3f0:4004:810::2004 }, addrlen: 28) = 0 0.542 ( 0.002 ms): ping/20892 connect(fd: 5, uservaddr: { .family: UNSPEC }, addrlen: 16) = 0 0.544 ( 0.004 ms): ping/20892 connect(fd: 5, uservaddr: { .family: INET, port: 0, addr: 142.251.135.100 }, addrlen: 16) = 0 0.559 ( 0.002 ms): ping/20892 connect(fd: 5, uservaddr: { .family: INET, port: 1025, addr: 142.251.135.100 }, addrlen: 16PING www.google.com (142.251.135.100) 56(84) bytes of data. ) = 0 0.589 ( 0.058 ms): ping/20892 sendto(fd: 3, buff: 0x560b4ff11ac0, len: 64, addr: { .family: INET, port: 0, addr: 142.251.135.100 }, addr_len: 0x10) = 64 45.250 ( 0.029 ms): ping/20892 connect(fd: 5, uservaddr: { .family: LOCAL, path: /run/systemd/resolve/io.systemd.Resolve }, addrlen: 42) = 0 45.344 ( 0.012 ms): ping/20892 sendto(fd: 5, buff: 0x560b4ff19340, len: 111, flags: DONTWAIT|NOSIGNAL) = 111 64 bytes from rio09s08-in-f4.1e100.net (142.251.135.100): icmp_seq=1 ttl=49 time=44.4 ms --- www.google.com ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 44.361/44.361/44.361/0.000 ms root@number:~# Signed-off-by: Howard Chu Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: https://lore.kernel.org/r/20240815013626.935097-4-howardchu95@gmail.com Link: https://lore.kernel.org/r/20240824163322.60796-3-howardchu95@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 110 ++++++++++++++++++ .../bpf_skel/augmented_raw_syscalls.bpf.c | 7 ++ 2 files changed, 117 insertions(+) diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 72f61e928e2f..c672702fc0cf 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -120,6 +120,7 @@ struct syscall_arg_fmt { bool show_zero; #ifdef HAVE_LIBBPF_SUPPORT const struct btf_type *type; + int type_id; /* used in btf_dump */ #endif }; @@ -3462,6 +3463,23 @@ out_enomem: } #ifdef HAVE_BPF_SKEL +static int syscall_arg_fmt__cache_btf_struct(struct syscall_arg_fmt *arg_fmt, struct btf *btf, char *type) +{ + int id; + + if (arg_fmt->type != NULL) + return -1; + + id = btf__find_by_name(btf, type); + if (id < 0) + return -1; + + arg_fmt->type = btf__type_by_id(btf, id); + arg_fmt->type_id = id; + + return 0; +} + static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) { struct bpf_program *pos, *prog = NULL; @@ -3537,6 +3555,87 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } +static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array) +{ + struct tep_format_field *field; + struct syscall *sc = trace__syscall_info(trace, NULL, key); + const struct btf_type *bt; + char *struct_offset, *tmp, name[32]; + bool can_augment = false; + int i, cnt; + + if (sc == NULL) + return -1; + + trace__load_vmlinux_btf(trace); + if (trace->btf == NULL) + return -1; + + for (i = 0, field = sc->args; field; ++i, field = field->next) { + struct_offset = strstr(field->type, "struct "); + + // XXX We're only collecting pointer payloads _from_ user space + if (!sc->arg_fmt[i].from_user) + continue; + + if (field->flags & TEP_FIELD_IS_POINTER && struct_offset) { /* struct */ + struct_offset += 7; + + /* for 'struct foo *', we only want 'foo' */ + for (tmp = struct_offset, cnt = 0; *tmp != ' ' && *tmp != '\0'; ++tmp, ++cnt) { + } + + strncpy(name, struct_offset, cnt); + name[cnt] = '\0'; + + /* cache struct's btf_type and type_id */ + if (syscall_arg_fmt__cache_btf_struct(&sc->arg_fmt[i], trace->btf, name)) + continue; + + bt = sc->arg_fmt[i].type; + beauty_array[i] = bt->size; + can_augment = true; + } else if (field->flags & TEP_FIELD_IS_POINTER && /* string */ + strcmp(field->type, "const char *") == 0 && + (strstr(field->name, "name") || + strstr(field->name, "path") || + strstr(field->name, "file") || + strstr(field->name, "root") || + strstr(field->name, "key") || + strstr(field->name, "special") || + strstr(field->name, "type") || + strstr(field->name, "description"))) { + beauty_array[i] = 1; + can_augment = true; + } else if (field->flags & TEP_FIELD_IS_POINTER && /* buffer */ + strstr(field->type, "char *") && + (strstr(field->name, "buf") || + strstr(field->name, "val") || + strstr(field->name, "msg"))) { + int j; + struct tep_format_field *field_tmp; + + /* find the size of the buffer that appears in pairs with buf */ + for (j = 0, field_tmp = sc->args; field_tmp; ++j, field_tmp = field_tmp->next) { + if (!(field_tmp->flags & TEP_FIELD_IS_POINTER) && /* only integers */ + (strstr(field_tmp->name, "count") || + strstr(field_tmp->name, "siz") || /* size, bufsiz */ + (strstr(field_tmp->name, "len") && strcmp(field_tmp->name, "filename")))) { + /* filename's got 'len' in it, we don't want that */ + beauty_array[i] = -(j + 1); + can_augment = true; + break; + } + } + } + } + + if (can_augment) + return 0; + + return -1; +} + static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) { struct tep_format_field *field, *candidate_field; @@ -3641,7 +3740,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) { int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); + int beauty_map_fd = bpf_map__fd(trace->skel->maps.beauty_map_enter); int err = 0; + unsigned int beauty_array[6]; for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i); @@ -3660,6 +3761,15 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); if (err) break; + + /* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */ + memset(beauty_array, 0, sizeof(beauty_array)); + err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array); + if (err) + continue; + err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY); + if (err) + break; } /* diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index 9c7d2f855294..4ebce6763743 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -124,6 +124,13 @@ struct augmented_args_tmp { __uint(max_entries, 1); } augmented_args_tmp SEC(".maps"); +struct beauty_map_enter { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); + __type(value, __u32[6]); + __uint(max_entries, 512); +} beauty_map_enter SEC(".maps"); + static inline struct augmented_args_payload *augmented_args_payload(void) { int key = 0;