selftests/bpf: add multi-uprobe benchmarks
Add multi-uprobe and multi-uretprobe benchmarks to bench tool. Multi- and classic uprobes/uretprobes have different low-level triggering code paths, so it's sometimes important to be able to benchmark both flavors of uprobes/uretprobes. Sample examples from my dev machine below. Single-threaded peformance almost doesn't differ, but with more parallel CPUs triggering the same uprobe/uretprobe the difference grows. This might be due to [0], but given the code is slightly different, there could be other sources of slowdown. Note, all these numbers will change due to ongoing work to improve uprobe/uretprobe scalability (e.g., [1]), but having benchmark like this is useful for measurements and debugging nevertheless. \#!/bin/bash set -eufo pipefail for p in 1 8 16 32; do for i in uprobe-nop uretprobe-nop uprobe-multi-nop uretprobe-multi-nop; do summary=$(sudo ./bench -w1 -d3 -p$p -a trig-$i | tail -n1) total=$(echo "$summary" | cut -d'(' -f1 | cut -d' ' -f3-) percpu=$(echo "$summary" | cut -d'(' -f2 | cut -d')' -f1 | cut -d'/' -f1) printf "%-21s (%2d cpus): %s (%s/s/cpu)\n" $i $p "$total" "$percpu" done echo done uprobe-nop ( 1 cpus): 1.020 ± 0.005M/s ( 1.020M/s/cpu) uretprobe-nop ( 1 cpus): 0.515 ± 0.009M/s ( 0.515M/s/cpu) uprobe-multi-nop ( 1 cpus): 1.036 ± 0.004M/s ( 1.036M/s/cpu) uretprobe-multi-nop ( 1 cpus): 0.512 ± 0.005M/s ( 0.512M/s/cpu) uprobe-nop ( 8 cpus): 3.481 ± 0.030M/s ( 0.435M/s/cpu) uretprobe-nop ( 8 cpus): 2.222 ± 0.008M/s ( 0.278M/s/cpu) uprobe-multi-nop ( 8 cpus): 3.769 ± 0.094M/s ( 0.471M/s/cpu) uretprobe-multi-nop ( 8 cpus): 2.482 ± 0.007M/s ( 0.310M/s/cpu) uprobe-nop (16 cpus): 2.968 ± 0.011M/s ( 0.185M/s/cpu) uretprobe-nop (16 cpus): 1.870 ± 0.002M/s ( 0.117M/s/cpu) uprobe-multi-nop (16 cpus): 3.541 ± 0.037M/s ( 0.221M/s/cpu) uretprobe-multi-nop (16 cpus): 2.123 ± 0.026M/s ( 0.133M/s/cpu) uprobe-nop (32 cpus): 2.524 ± 0.026M/s ( 0.079M/s/cpu) uretprobe-nop (32 cpus): 1.572 ± 0.003M/s ( 0.049M/s/cpu) uprobe-multi-nop (32 cpus): 2.717 ± 0.003M/s ( 0.085M/s/cpu) uretprobe-multi-nop (32 cpus): 1.687 ± 0.007M/s ( 0.053M/s/cpu) [0] https://lore.kernel.org/linux-trace-kernel/20240805202803.1813090-1-andrii@kernel.org/ [1] https://lore.kernel.org/linux-trace-kernel/20240731214256.3588718-1-andrii@kernel.org/ Signed-off-by: Andrii Nakryiko <andrii@kernel.org> Acked-by: Jiri Olsa <jolsa@kernel.org> Link: https://lore.kernel.org/r/20240806042935.3867862-1-andrii@kernel.org Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
4e9e07603e
commit
f727b13dbe
@ -520,6 +520,12 @@ extern const struct bench bench_trig_uprobe_push;
|
||||
extern const struct bench bench_trig_uretprobe_push;
|
||||
extern const struct bench bench_trig_uprobe_ret;
|
||||
extern const struct bench bench_trig_uretprobe_ret;
|
||||
extern const struct bench bench_trig_uprobe_multi_nop;
|
||||
extern const struct bench bench_trig_uretprobe_multi_nop;
|
||||
extern const struct bench bench_trig_uprobe_multi_push;
|
||||
extern const struct bench bench_trig_uretprobe_multi_push;
|
||||
extern const struct bench bench_trig_uprobe_multi_ret;
|
||||
extern const struct bench bench_trig_uretprobe_multi_ret;
|
||||
|
||||
extern const struct bench bench_rb_libbpf;
|
||||
extern const struct bench bench_rb_custom;
|
||||
@ -574,6 +580,12 @@ static const struct bench *benchs[] = {
|
||||
&bench_trig_uretprobe_push,
|
||||
&bench_trig_uprobe_ret,
|
||||
&bench_trig_uretprobe_ret,
|
||||
&bench_trig_uprobe_multi_nop,
|
||||
&bench_trig_uretprobe_multi_nop,
|
||||
&bench_trig_uprobe_multi_push,
|
||||
&bench_trig_uretprobe_multi_push,
|
||||
&bench_trig_uprobe_multi_ret,
|
||||
&bench_trig_uretprobe_multi_ret,
|
||||
/* ringbuf/perfbuf benchmarks */
|
||||
&bench_rb_libbpf,
|
||||
&bench_rb_custom,
|
||||
|
@ -332,7 +332,7 @@ static void *uprobe_producer_ret(void *input)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void usetup(bool use_retprobe, void *target_addr)
|
||||
static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
|
||||
{
|
||||
size_t uprobe_offset;
|
||||
struct bpf_link *link;
|
||||
@ -346,7 +346,10 @@ static void usetup(bool use_retprobe, void *target_addr)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
|
||||
if (use_multi)
|
||||
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe_multi, true);
|
||||
else
|
||||
bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
|
||||
|
||||
err = trigger_bench__load(ctx.skel);
|
||||
if (err) {
|
||||
@ -355,16 +358,28 @@ static void usetup(bool use_retprobe, void *target_addr)
|
||||
}
|
||||
|
||||
uprobe_offset = get_uprobe_offset(target_addr);
|
||||
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
|
||||
use_retprobe,
|
||||
-1 /* all PIDs */,
|
||||
"/proc/self/exe",
|
||||
uprobe_offset);
|
||||
if (use_multi) {
|
||||
LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
|
||||
.retprobe = use_retprobe,
|
||||
.cnt = 1,
|
||||
.offsets = &uprobe_offset,
|
||||
);
|
||||
link = bpf_program__attach_uprobe_multi(
|
||||
ctx.skel->progs.bench_trigger_uprobe_multi,
|
||||
-1 /* all PIDs */, "/proc/self/exe", NULL, &opts);
|
||||
ctx.skel->links.bench_trigger_uprobe_multi = link;
|
||||
} else {
|
||||
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
|
||||
use_retprobe,
|
||||
-1 /* all PIDs */,
|
||||
"/proc/self/exe",
|
||||
uprobe_offset);
|
||||
ctx.skel->links.bench_trigger_uprobe = link;
|
||||
}
|
||||
if (!link) {
|
||||
fprintf(stderr, "failed to attach uprobe!\n");
|
||||
fprintf(stderr, "failed to attach %s!\n", use_multi ? "multi-uprobe" : "uprobe");
|
||||
exit(1);
|
||||
}
|
||||
ctx.skel->links.bench_trigger_uprobe = link;
|
||||
}
|
||||
|
||||
static void usermode_count_setup(void)
|
||||
@ -374,32 +389,62 @@ static void usermode_count_setup(void)
|
||||
|
||||
static void uprobe_nop_setup(void)
|
||||
{
|
||||
usetup(false, &uprobe_target_nop);
|
||||
usetup(false, false /* !use_multi */, &uprobe_target_nop);
|
||||
}
|
||||
|
||||
static void uretprobe_nop_setup(void)
|
||||
{
|
||||
usetup(true, &uprobe_target_nop);
|
||||
usetup(true, false /* !use_multi */, &uprobe_target_nop);
|
||||
}
|
||||
|
||||
static void uprobe_push_setup(void)
|
||||
{
|
||||
usetup(false, &uprobe_target_push);
|
||||
usetup(false, false /* !use_multi */, &uprobe_target_push);
|
||||
}
|
||||
|
||||
static void uretprobe_push_setup(void)
|
||||
{
|
||||
usetup(true, &uprobe_target_push);
|
||||
usetup(true, false /* !use_multi */, &uprobe_target_push);
|
||||
}
|
||||
|
||||
static void uprobe_ret_setup(void)
|
||||
{
|
||||
usetup(false, &uprobe_target_ret);
|
||||
usetup(false, false /* !use_multi */, &uprobe_target_ret);
|
||||
}
|
||||
|
||||
static void uretprobe_ret_setup(void)
|
||||
{
|
||||
usetup(true, &uprobe_target_ret);
|
||||
usetup(true, false /* !use_multi */, &uprobe_target_ret);
|
||||
}
|
||||
|
||||
static void uprobe_multi_nop_setup(void)
|
||||
{
|
||||
usetup(false, true /* use_multi */, &uprobe_target_nop);
|
||||
}
|
||||
|
||||
static void uretprobe_multi_nop_setup(void)
|
||||
{
|
||||
usetup(true, true /* use_multi */, &uprobe_target_nop);
|
||||
}
|
||||
|
||||
static void uprobe_multi_push_setup(void)
|
||||
{
|
||||
usetup(false, true /* use_multi */, &uprobe_target_push);
|
||||
}
|
||||
|
||||
static void uretprobe_multi_push_setup(void)
|
||||
{
|
||||
usetup(true, true /* use_multi */, &uprobe_target_push);
|
||||
}
|
||||
|
||||
static void uprobe_multi_ret_setup(void)
|
||||
{
|
||||
usetup(false, true /* use_multi */, &uprobe_target_ret);
|
||||
}
|
||||
|
||||
static void uretprobe_multi_ret_setup(void)
|
||||
{
|
||||
usetup(true, true /* use_multi */, &uprobe_target_ret);
|
||||
}
|
||||
|
||||
const struct bench bench_trig_syscall_count = {
|
||||
@ -454,3 +499,9 @@ BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
|
||||
BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
|
||||
BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
|
||||
BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
|
||||
BENCH_TRIG_USERMODE(uprobe_multi_nop, nop, "uprobe-multi-nop");
|
||||
BENCH_TRIG_USERMODE(uprobe_multi_push, push, "uprobe-multi-push");
|
||||
BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
|
||||
BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
|
||||
BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
|
||||
BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
|
||||
|
@ -32,6 +32,13 @@ int bench_trigger_uprobe(void *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?uprobe.multi")
|
||||
int bench_trigger_uprobe_multi(void *ctx)
|
||||
{
|
||||
inc_counter();
|
||||
return 0;
|
||||
}
|
||||
|
||||
const volatile int batch_iters = 0;
|
||||
|
||||
SEC("?raw_tp")
|
||||
|
Loading…
Reference in New Issue
Block a user