1

perf lock contention: Constify control data for BPF

The control knobs set before loading BPF programs should be declared as
'const volatile' so that it can be optimized by the BPF core.

Committer testing:

  root@x1:~# perf lock contention --use-bpf
   contended   total wait     max wait     avg wait         type   caller

           5     31.57 us     14.93 us      6.31 us        mutex   btrfs_delayed_update_inode+0x43
           1     16.91 us     16.91 us     16.91 us      rwsem:R   btrfs_tree_read_lock_nested+0x1b
           1     15.13 us     15.13 us     15.13 us     spinlock   btrfs_getattr+0xd1
           1      6.65 us      6.65 us      6.65 us      rwsem:R   btrfs_tree_read_lock_nested+0x1b
           1      4.34 us      4.34 us      4.34 us     spinlock   process_one_work+0x1a9
  root@x1:~#
  root@x1:~# perf trace -e bpf --max-events 10 perf lock contention --use-bpf
       0.000 ( 0.013 ms): :2948281/2948281 bpf(cmd: 36, uattr: 0x7ffd5f12d730, size: 8)          = -1 EOPNOTSUPP (Operation not supported)
       0.024 ( 0.120 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d460, size: 148) = 16
       0.158 ( 0.034 ms): :2948281/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d520, size: 148) = 16
      26.653 ( 0.154 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d3d0, size: 148)     = 16
      26.825 ( 0.014 ms): perf/2948281 bpf(uattr: 0x7ffd5f12d580, size: 80)                      = 16
      87.924 ( 0.038 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d400, size: 40)       = 16
      87.988 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d470, size: 40)       = 16
      88.019 ( 0.006 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d250, size: 40)       = 16
      88.029 ( 0.172 ms): perf/2948281 bpf(cmd: PROG_LOAD, uattr: 0x7ffd5f12d320, size: 148)     = 17
      88.217 ( 0.005 ms): perf/2948281 bpf(cmd: BTF_LOAD, uattr: 0x7ffd5f12d4d0, size: 40)       = 16
  root@x1:~#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240902200515.2103769-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Namhyung Kim 2024-09-02 13:05:14 -07:00 committed by Arnaldo Carvalho de Melo
parent 066fd84087
commit 4afdc00c37
2 changed files with 38 additions and 34 deletions

View File

@ -46,14 +46,22 @@ int lock_contention_prepare(struct lock_contention *con)
else else
bpf_map__set_max_entries(skel->maps.stacks, 1); bpf_map__set_max_entries(skel->maps.stacks, 1);
if (target__has_cpu(target)) if (target__has_cpu(target)) {
skel->rodata->has_cpu = 1;
ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus); ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
if (target__has_task(target)) }
if (target__has_task(target)) {
skel->rodata->has_task = 1;
ntasks = perf_thread_map__nr(evlist->core.threads); ntasks = perf_thread_map__nr(evlist->core.threads);
if (con->filters->nr_types) }
if (con->filters->nr_types) {
skel->rodata->has_type = 1;
ntypes = con->filters->nr_types; ntypes = con->filters->nr_types;
if (con->filters->nr_cgrps) }
if (con->filters->nr_cgrps) {
skel->rodata->has_cgroup = 1;
ncgrps = con->filters->nr_cgrps; ncgrps = con->filters->nr_cgrps;
}
/* resolve lock name filters to addr */ /* resolve lock name filters to addr */
if (con->filters->nr_syms) { if (con->filters->nr_syms) {
@ -82,6 +90,7 @@ int lock_contention_prepare(struct lock_contention *con)
con->filters->addrs = addrs; con->filters->addrs = addrs;
} }
naddrs = con->filters->nr_addrs; naddrs = con->filters->nr_addrs;
skel->rodata->has_addr = 1;
} }
bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus);
@ -90,6 +99,16 @@ int lock_contention_prepare(struct lock_contention *con)
bpf_map__set_max_entries(skel->maps.addr_filter, naddrs); bpf_map__set_max_entries(skel->maps.addr_filter, naddrs);
bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps); bpf_map__set_max_entries(skel->maps.cgroup_filter, ncgrps);
skel->rodata->stack_skip = con->stack_skip;
skel->rodata->aggr_mode = con->aggr_mode;
skel->rodata->needs_callstack = con->save_callstack;
skel->rodata->lock_owner = con->owner;
if (con->aggr_mode == LOCK_AGGR_CGROUP || con->filters->nr_cgrps) {
if (cgroup_is_v2("perf_event"))
skel->rodata->use_cgroup_v2 = 1;
}
if (lock_contention_bpf__load(skel) < 0) { if (lock_contention_bpf__load(skel) < 0) {
pr_err("Failed to load lock-contention BPF skeleton\n"); pr_err("Failed to load lock-contention BPF skeleton\n");
return -1; return -1;
@ -99,7 +118,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 cpu; u32 cpu;
u8 val = 1; u8 val = 1;
skel->bss->has_cpu = 1;
fd = bpf_map__fd(skel->maps.cpu_filter); fd = bpf_map__fd(skel->maps.cpu_filter);
for (i = 0; i < ncpus; i++) { for (i = 0; i < ncpus; i++) {
@ -112,7 +130,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid; u32 pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
for (i = 0; i < ntasks; i++) { for (i = 0; i < ntasks; i++) {
@ -125,7 +142,6 @@ int lock_contention_prepare(struct lock_contention *con)
u32 pid = evlist->workload.pid; u32 pid = evlist->workload.pid;
u8 val = 1; u8 val = 1;
skel->bss->has_task = 1;
fd = bpf_map__fd(skel->maps.task_filter); fd = bpf_map__fd(skel->maps.task_filter);
bpf_map_update_elem(fd, &pid, &val, BPF_ANY); bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
} }
@ -133,7 +149,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_types) { if (con->filters->nr_types) {
u8 val = 1; u8 val = 1;
skel->bss->has_type = 1;
fd = bpf_map__fd(skel->maps.type_filter); fd = bpf_map__fd(skel->maps.type_filter);
for (i = 0; i < con->filters->nr_types; i++) for (i = 0; i < con->filters->nr_types; i++)
@ -143,7 +158,6 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_addrs) { if (con->filters->nr_addrs) {
u8 val = 1; u8 val = 1;
skel->bss->has_addr = 1;
fd = bpf_map__fd(skel->maps.addr_filter); fd = bpf_map__fd(skel->maps.addr_filter);
for (i = 0; i < con->filters->nr_addrs; i++) for (i = 0; i < con->filters->nr_addrs; i++)
@ -153,25 +167,14 @@ int lock_contention_prepare(struct lock_contention *con)
if (con->filters->nr_cgrps) { if (con->filters->nr_cgrps) {
u8 val = 1; u8 val = 1;
skel->bss->has_cgroup = 1;
fd = bpf_map__fd(skel->maps.cgroup_filter); fd = bpf_map__fd(skel->maps.cgroup_filter);
for (i = 0; i < con->filters->nr_cgrps; i++) for (i = 0; i < con->filters->nr_cgrps; i++)
bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY);
} }
/* these don't work well if in the rodata section */ if (con->aggr_mode == LOCK_AGGR_CGROUP)
skel->bss->stack_skip = con->stack_skip;
skel->bss->aggr_mode = con->aggr_mode;
skel->bss->needs_callstack = con->save_callstack;
skel->bss->lock_owner = con->owner;
if (con->aggr_mode == LOCK_AGGR_CGROUP) {
if (cgroup_is_v2("perf_event"))
skel->bss->use_cgroup_v2 = 1;
read_all_cgroups(&con->cgroups); read_all_cgroups(&con->cgroups);
}
bpf_program__set_autoload(skel->progs.collect_lock_syms, false); bpf_program__set_autoload(skel->progs.collect_lock_syms, false);

View File

@ -117,21 +117,22 @@ struct mm_struct___new {
} __attribute__((preserve_access_index)); } __attribute__((preserve_access_index));
/* control flags */ /* control flags */
int enabled; const volatile int has_cpu;
int has_cpu; const volatile int has_task;
int has_task; const volatile int has_type;
int has_type; const volatile int has_addr;
int has_addr; const volatile int has_cgroup;
int has_cgroup; const volatile int needs_callstack;
int needs_callstack; const volatile int stack_skip;
int stack_skip; const volatile int lock_owner;
int lock_owner; const volatile int use_cgroup_v2;
int use_cgroup_v2;
int perf_subsys_id = -1;
/* determine the key of lock stat */ /* determine the key of lock stat */
int aggr_mode; const volatile int aggr_mode;
int enabled;
int perf_subsys_id = -1;
__u64 end_ts; __u64 end_ts;