From 4afdc00c378f34943fb3a3cc08db4babdacb5c5b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 2 Sep 2024 13:05:14 -0700 Subject: [PATCH] perf lock contention: Constify control data for BPF The control knobs set before loading BPF programs should be declared as 'const volatile' so that it can be optimized by the BPF core. Committer testing: root@x1:~# perf lock contention --use-bpf contended total wait max wait avg wait type caller 5 31.57 us 14.93 us 6.31 us mutex btrfs_delayed_update_inode+0x43 1 16.91 us 16.91 us 16.91 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 15.13 us 15.13 us 15.13 us spinlock btrfs_getattr+0xd1 1 6.65 us 6.65 us 6.65 us rwsem:R btrfs_tree_read_lock_nested+0x1b 1 4.34 us 4.34 us 4.34 us spinlock process_one_work+0x1a9 root@x1:~# root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf 0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8) = -1 EOPNOTSUPP (Operation not supported) 0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16 0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16 26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148) = 16 26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80) = 16 87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40) = 16 87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40) = 16 88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40) = 16 88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148) = 17 88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40) = 16 root@x1:~# Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Cc: Song Liu Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf_lock_contention.c | 45 ++++++++++--------- .../perf/util/bpf_skel/lock_contention.bpf.c | 27 +++++------ 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index bc4e92c0c08b..41a1ad087895 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con) else bpf_map__set_max_entries(skel->maps.stacks, 1); - if (target__has_cpu(target)) + if (target__has_cpu(target)) { + skel->rodata->has_cpu = 1; ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); - if (target__has_task(target)) + } + if (target__has_task(target)) { + skel->rodata->has_task = 1; ntasks = perf_thread_map__nr(evlist->core.threads); - if (con->filters->nr_types) + } + if (con->filters->nr_types) { + skel->rodata->has_type = 1; ntypes = con->filters->nr_types; - if (con->filters->nr_cgrps) + } + if (con->filters->nr_cgrps) { + skel->rodata->has_cgroup = 1; ncgrps = con->filters->nr_cgrps; + } /* resolve lock name filters to addr */ if (con->filters->nr_syms) { @@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con) con->filters->addrs = addrs; } naddrs = con->filters->nr_addrs; + skel->rodata->has_addr = 1; } bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); @@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); + skel->rodata->stack_skip = con->stack_skip; + skel->rodata->aggr_mode = con->aggr_mode; + skel->rodata->needs_callstack = con->save_callstack; + skel->rodata->lock_owner = con->owner; + + if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) { + if (cgroup_is_v2("perf_event")) + skel->rodata->use_cgroup_v2 = 1; + } + if (lock_contention_bpf__load(skel) < 0) { pr_err("Failed to load lock-contention BPF skeleton\n"); return -1; @@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 cpu; u8 val = 1; - skel->bss->has_cpu = 1; fd = bpf_map__fd(skel->maps.cpu_filter); for (i = 0; i < ncpus; i++) { @@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); for (i = 0; i < ntasks; i++) { @@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con) u32 pid = evlist->workload.pid; u8 val = 1; - skel->bss->has_task = 1; fd = bpf_map__fd(skel->maps.task_filter); bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } @@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_types) { u8 val = 1; - skel->bss->has_type = 1; fd = bpf_map__fd(skel->maps.type_filter); for (i = 0; i < con->filters->nr_types; i++) @@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_addrs) { u8 val = 1; - skel->bss->has_addr = 1; fd = bpf_map__fd(skel->maps.addr_filter); for (i = 0; i < con->filters->nr_addrs; i++) @@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con) if (con->filters->nr_cgrps) { u8 val = 1; - skel->bss->has_cgroup = 1; fd = bpf_map__fd(skel->maps.cgroup_filter); for (i = 0; i < con->filters->nr_cgrps; i++) bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); } - /* these don't work well if in the rodata section */ - skel->bss->stack_skip = con->stack_skip; - skel->bss->aggr_mode = con->aggr_mode; - skel->bss->needs_callstack = con->save_callstack; - skel->bss->lock_owner = con->owner; - - if (con->aggr_mode == LOCK_AGGR_CGROUP) { - if (cgroup_is_v2("perf_event")) - skel->bss->use_cgroup_v2 = 1; - + if (con->aggr_mode == LOCK_AGGR_CGROUP) read_all_cgroups(&con->cgroups); - } bpf_program__set_autoload(skel->progs.collect_lock_syms, false); diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 52a876b42699..1069bda5d733 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -117,21 +117,22 @@ struct mm_struct___new { } __attribute__((preserve_access_index)); /* control flags */ -int enabled; -int has_cpu; -int has_task; -int has_type; -int has_addr; -int has_cgroup; -int needs_callstack; -int stack_skip; -int lock_owner; - -int use_cgroup_v2; -int perf_subsys_id = -1; +const volatile int has_cpu; +const volatile int has_task; +const volatile int has_type; +const volatile int has_addr; +const volatile int has_cgroup; +const volatile int needs_callstack; +const volatile int stack_skip; +const volatile int lock_owner; +const volatile int use_cgroup_v2; /* determine the key of lock stat */ -int aggr_mode; +const volatile int aggr_mode; + +int enabled; + +int perf_subsys_id = -1; __u64 end_ts;