1

perf callchain: Allow symbols to be optional when resolving a callchain

In uses like 'perf inject' it is not necessary to gather the symbol for
each call chain location, the map for the sample IP is wanted so that
build IDs and the like can be injected. Make gathering the symbol in the
callchain_cursor optional.

For a 'perf inject -B' command this lowers the peak RSS from 54.1MB to
29.6MB by avoiding loading symbols.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anne Macedo <retpolanne@posteo.net>
Cc: Casey Chen <cachen@purestorage.com>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sun Haiyong <sunhaiyong@loongson.cn>
Link: https://lore.kernel.org/r/20240909203740.143492-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
Ian Rogers 2024-09-09 13:37:40 -07:00 committed by Arnaldo Carvalho de Melo
parent 64eed019f3
commit 02b2705017
5 changed files with 85 additions and 52 deletions

View File

@ -942,7 +942,7 @@ int perf_event__inject_buildid(const struct perf_tool *tool, union perf_event *e
}
sample__for_each_callchain_node(thread, evsel, sample, PERF_MAX_STACK_DEPTH,
mark_dso_hit_callback, &args);
/*symbols=*/false, mark_dso_hit_callback, &args);
thread__put(thread);
repipe:

View File

@ -1800,7 +1800,7 @@ s64 callchain_avg_cycles(struct callchain_node *cnode)
int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
struct perf_sample *sample, int max_stack,
callchain_iter_fn cb, void *data)
bool symbols, callchain_iter_fn cb, void *data)
{
struct callchain_cursor *cursor = get_tls_callchain_cursor();
int ret;
@ -1809,9 +1809,9 @@ int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
return -ENOMEM;
/* Fill in the callchain. */
ret = thread__resolve_callchain(thread, cursor, evsel, sample,
/*parent=*/NULL, /*root_al=*/NULL,
max_stack);
ret = __thread__resolve_callchain(thread, cursor, evsel, sample,
/*parent=*/NULL, /*root_al=*/NULL,
max_stack, symbols);
if (ret)
return ret;

View File

@ -315,6 +315,6 @@ typedef int (*callchain_iter_fn)(struct callchain_cursor_node *node, void *data)
int sample__for_each_callchain_node(struct thread *thread, struct evsel *evsel,
struct perf_sample *sample, int max_stack,
callchain_iter_fn cb, void *data);
bool symbols, callchain_iter_fn cb, void *data);
#endif /* __PERF_CALLCHAIN_H */

View File

@ -2060,7 +2060,8 @@ static int add_callchain_ip(struct thread *thread,
bool branch,
struct branch_flags *flags,
struct iterations *iter,
u64 branch_from)
u64 branch_from,
bool symbols)
{
struct map_symbol ms = {};
struct addr_location al;
@ -2099,7 +2100,8 @@ static int add_callchain_ip(struct thread *thread,
}
goto out;
}
thread__find_symbol(thread, *cpumode, ip, &al);
if (symbols)
thread__find_symbol(thread, *cpumode, ip, &al);
}
if (al.sym != NULL) {
@ -2228,7 +2230,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u64 branch_from,
bool callee, int end)
bool callee, int end,
bool symbols)
{
struct ip_callchain *chain = sample->callchain;
u8 cpumode = PERF_RECORD_MISC_USER;
@ -2238,7 +2241,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
for (i = 0; i < end + 1; i++) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, chain->ips[i],
false, NULL, NULL, branch_from);
false, NULL, NULL, branch_from,
symbols);
if (err)
return err;
}
@ -2248,7 +2252,8 @@ static int lbr_callchain_add_kernel_ip(struct thread *thread,
for (i = end; i >= 0; i--) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, chain->ips[i],
false, NULL, NULL, branch_from);
false, NULL, NULL, branch_from,
symbols);
if (err)
return err;
}
@ -2291,7 +2296,8 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
u64 *branch_from,
bool callee)
bool callee,
bool symbols)
{
struct branch_stack *lbr_stack = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@ -2324,7 +2330,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
*branch_from);
*branch_from, symbols);
if (err)
return err;
@ -2349,7 +2355,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
*branch_from);
*branch_from, symbols);
if (err)
return err;
save_lbr_cursor_node(thread, cursor, i);
@ -2364,7 +2370,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
*branch_from);
*branch_from, symbols);
if (err)
return err;
save_lbr_cursor_node(thread, cursor, i);
@ -2378,7 +2384,7 @@ static int lbr_callchain_add_lbr_ip(struct thread *thread,
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
true, flags, NULL,
*branch_from);
*branch_from, symbols);
if (err)
return err;
}
@ -2545,7 +2551,8 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
struct symbol **parent,
struct addr_location *root_al,
int max_stack,
unsigned int max_lbr)
unsigned int max_lbr,
bool symbols)
{
bool callee = (callchain_param.order == ORDER_CALLEE);
struct ip_callchain *chain = sample->callchain;
@ -2587,12 +2594,12 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
/* Add kernel ip */
err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
parent, root_al, branch_from,
true, i);
true, i, symbols);
if (err)
goto error;
err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
root_al, &branch_from, true);
root_al, &branch_from, true, symbols);
if (err)
goto error;
@ -2609,14 +2616,14 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
goto error;
}
err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
root_al, &branch_from, false);
root_al, &branch_from, false, symbols);
if (err)
goto error;
/* Add kernel ip */
err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
parent, root_al, branch_from,
false, i);
false, i, symbols);
if (err)
goto error;
}
@ -2630,7 +2637,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
struct callchain_cursor *cursor,
struct symbol **parent,
struct addr_location *root_al,
u8 *cpumode, int ent)
u8 *cpumode, int ent, bool symbols)
{
int err = 0;
@ -2640,7 +2647,7 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
if (ip >= PERF_CONTEXT_MAX) {
err = add_callchain_ip(thread, cursor, parent,
root_al, cpumode, ip,
false, NULL, NULL, 0);
false, NULL, NULL, 0, symbols);
break;
}
}
@ -2662,7 +2669,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
int max_stack)
int max_stack,
bool symbols)
{
struct branch_stack *branch = sample->branch_stack;
struct branch_entry *entries = perf_sample__branch_entries(sample);
@ -2682,7 +2690,8 @@ static int thread__resolve_callchain_sample(struct thread *thread,
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
root_al, max_stack,
!env ? 0 : env->max_branches);
!env ? 0 : env->max_branches,
symbols);
if (err)
return (err < 0) ? err : 0;
}
@ -2747,13 +2756,14 @@ static int thread__resolve_callchain_sample(struct thread *thread,
root_al,
NULL, be[i].to,
true, &be[i].flags,
NULL, be[i].from);
NULL, be[i].from, symbols);
if (!err)
if (!err) {
err = add_callchain_ip(thread, cursor, parent, root_al,
NULL, be[i].from,
true, &be[i].flags,
&iter[i], 0);
&iter[i], 0, symbols);
}
if (err == -EINVAL)
break;
if (err)
@ -2769,7 +2779,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
check_calls:
if (chain && callchain_param.order != ORDER_CALLEE) {
err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
&cpumode, chain->nr - first_call);
&cpumode, chain->nr - first_call, symbols);
if (err)
return (err < 0) ? err : 0;
}
@ -2791,7 +2801,7 @@ check_calls:
++nr_entries;
else if (callchain_param.order != ORDER_CALLEE) {
err = find_prev_cpumode(chain, thread, cursor, parent,
root_al, &cpumode, j);
root_al, &cpumode, j, symbols);
if (err)
return (err < 0) ? err : 0;
continue;
@ -2818,8 +2828,8 @@ check_calls:
if (leaf_frame_caller && leaf_frame_caller != ip) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, leaf_frame_caller,
false, NULL, NULL, 0);
root_al, &cpumode, leaf_frame_caller,
false, NULL, NULL, 0, symbols);
if (err)
return (err < 0) ? err : 0;
}
@ -2827,7 +2837,7 @@ check_calls:
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
false, NULL, NULL, 0, symbols);
if (err)
return (err < 0) ? err : 0;
@ -2907,7 +2917,7 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
int max_stack)
int max_stack, bool symbols)
{
/* Can we do dwarf post unwind? */
if (!((evsel->core.attr.sample_type & PERF_SAMPLE_REGS_USER) &&
@ -2919,17 +2929,21 @@ static int thread__resolve_callchain_unwind(struct thread *thread,
(!sample->user_stack.size))
return 0;
if (!symbols)
pr_debug("Not resolving symbols with an unwinder isn't currently supported\n");
return unwind__get_entries(unwind_entry, cursor,
thread, sample, max_stack, false);
}
int thread__resolve_callchain(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
int max_stack)
int __thread__resolve_callchain(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
int max_stack,
bool symbols)
{
int ret = 0;
@ -2942,22 +2956,22 @@ int thread__resolve_callchain(struct thread *thread,
ret = thread__resolve_callchain_sample(thread, cursor,
evsel, sample,
parent, root_al,
max_stack);
max_stack, symbols);
if (ret)
return ret;
ret = thread__resolve_callchain_unwind(thread, cursor,
evsel, sample,
max_stack);
max_stack, symbols);
} else {
ret = thread__resolve_callchain_unwind(thread, cursor,
evsel, sample,
max_stack);
max_stack, symbols);
if (ret)
return ret;
ret = thread__resolve_callchain_sample(thread, cursor,
evsel, sample,
parent, root_al,
max_stack);
max_stack, symbols);
}
return ret;

View File

@ -178,13 +178,32 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample,
struct callchain_cursor;
int thread__resolve_callchain(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
int max_stack);
int __thread__resolve_callchain(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
int max_stack,
bool symbols);
static inline int thread__resolve_callchain(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
int max_stack)
{
return __thread__resolve_callchain(thread,
cursor,
evsel,
sample,
parent,
root_al,
max_stack,
/*symbols=*/true);
}
/*
* Default guest kernel is defined by parameter --guestkallsyms