1

function_graph: Add "task variables" per task for fgraph_ops

Add a "task variables" array on the tasks shadow ret_stack that is the
size of longs for each possible registered fgraph_ops. That's a total
of 16, taking up 8 * 16 = 128 bytes (out of a page size 4k).

This will allow for fgraph_ops to do specific features on a per task basis
having a way to maintain state for each task.

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509104383.162236.12239656156685718550.stgit@devnote2
Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.308806126@goodmis.org

Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Florent Revest <revest@chromium.org>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: bpf <bpf@vger.kernel.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alan Maguire <alan.maguire@oracle.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Guo Ren <guoren@kernel.org>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
This commit is contained in:
Steven Rostedt (VMware) 2024-06-03 15:07:19 -04:00 committed by Steven Rostedt (Google)
parent 6d4786592a
commit 4497412a1f
2 changed files with 74 additions and 1 deletions

View File

@ -1089,6 +1089,7 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp); unsigned long ret, unsigned long *retp);
unsigned long *fgraph_get_task_var(struct fgraph_ops *gops);
/* /*
* Sometimes we don't want to trace a function with the function * Sometimes we don't want to trace a function with the function

View File

@ -54,6 +54,10 @@
* on the return of the function being traced, this is what will be on the * on the return of the function being traced, this is what will be on the
* task's shadow ret_stack: (the stack grows upward) * task's shadow ret_stack: (the stack grows upward)
* *
* ret_stack[SHADOW_STACK_OFFSET]
* | SHADOW_STACK_TASK_VARS(ret_stack)[15] |
* ...
* | SHADOW_STACK_TASK_VARS(ret_stack)[0] |
* ret_stack[SHADOW_STACK_MAX_OFFSET] * ret_stack[SHADOW_STACK_MAX_OFFSET]
* ... * ...
* | | <- task->curr_ret_stack * | | <- task->curr_ret_stack
@ -116,11 +120,19 @@ enum {
#define SHADOW_STACK_SIZE (PAGE_SIZE) #define SHADOW_STACK_SIZE (PAGE_SIZE)
#define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) #define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long))
/* Leave on a buffer at the end */ /* Leave on a buffer at the end */
#define SHADOW_STACK_MAX_OFFSET (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1)) #define SHADOW_STACK_MAX_OFFSET \
(SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1 + FGRAPH_ARRAY_SIZE))
/* RET_STACK(): Return the frame from a given @offset from task @t */ /* RET_STACK(): Return the frame from a given @offset from task @t */
#define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset])) #define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset]))
/*
* Each fgraph_ops has a reservered unsigned long at the end (top) of the
* ret_stack to store task specific state.
*/
#define SHADOW_STACK_TASK_VARS(ret_stack) \
((unsigned long *)(&(ret_stack)[SHADOW_STACK_OFFSET - FGRAPH_ARRAY_SIZE]))
DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
int ftrace_graph_active; int ftrace_graph_active;
@ -211,6 +223,44 @@ static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
{ {
} }
static void ret_stack_set_task_var(struct task_struct *t, int idx, long val)
{
unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack);
gvals[idx] = val;
}
static unsigned long *
ret_stack_get_task_var(struct task_struct *t, int idx)
{
unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack);
return &gvals[idx];
}
static void ret_stack_init_task_vars(unsigned long *ret_stack)
{
unsigned long *gvals = SHADOW_STACK_TASK_VARS(ret_stack);
memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE);
}
/**
* fgraph_get_task_var - retrieve a task specific state variable
* @gops: The ftrace_ops that owns the task specific variable
*
* Every registered fgraph_ops has a task state variable
* reserved on the task's ret_stack. This function returns the
* address to that variable.
*
* Returns the address to the fgraph_ops @gops tasks specific
* unsigned long variable.
*/
unsigned long *fgraph_get_task_var(struct fgraph_ops *gops)
{
return ret_stack_get_task_var(current, gops->idx);
}
/* /*
* @offset: The offset into @t->ret_stack to find the ret_stack entry * @offset: The offset into @t->ret_stack to find the ret_stack entry
* @frame_offset: Where to place the offset into @t->ret_stack of that entry * @frame_offset: Where to place the offset into @t->ret_stack of that entry
@ -766,6 +816,7 @@ static int alloc_retstack_tasklist(unsigned long **ret_stack_list)
if (t->ret_stack == NULL) { if (t->ret_stack == NULL) {
atomic_set(&t->trace_overrun, 0); atomic_set(&t->trace_overrun, 0);
ret_stack_init_task_vars(ret_stack_list[start]);
t->curr_ret_stack = 0; t->curr_ret_stack = 0;
t->curr_ret_depth = -1; t->curr_ret_depth = -1;
/* Make sure the tasks see the 0 first: */ /* Make sure the tasks see the 0 first: */
@ -826,6 +877,7 @@ static void
graph_init_task(struct task_struct *t, unsigned long *ret_stack) graph_init_task(struct task_struct *t, unsigned long *ret_stack)
{ {
atomic_set(&t->trace_overrun, 0); atomic_set(&t->trace_overrun, 0);
ret_stack_init_task_vars(ret_stack);
t->ftrace_timestamp = 0; t->ftrace_timestamp = 0;
t->curr_ret_stack = 0; t->curr_ret_stack = 0;
t->curr_ret_depth = -1; t->curr_ret_depth = -1;
@ -959,6 +1011,24 @@ static int start_graph_tracing(void)
return ret; return ret;
} }
static void init_task_vars(int idx)
{
struct task_struct *g, *t;
int cpu;
for_each_online_cpu(cpu) {
if (idle_task(cpu)->ret_stack)
ret_stack_set_task_var(idle_task(cpu), idx, 0);
}
read_lock(&tasklist_lock);
for_each_process_thread(g, t) {
if (t->ret_stack)
ret_stack_set_task_var(t, idx, 0);
}
read_unlock(&tasklist_lock);
}
int register_ftrace_graph(struct fgraph_ops *gops) int register_ftrace_graph(struct fgraph_ops *gops)
{ {
int command = 0; int command = 0;
@ -997,6 +1067,8 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_return = return_run; ftrace_graph_return = return_run;
ftrace_graph_entry = entry_run; ftrace_graph_entry = entry_run;
command = FTRACE_START_FUNC_RET; command = FTRACE_START_FUNC_RET;
} else {
init_task_vars(gops->idx);
} }
/* Always save the function, and reset at unregistering */ /* Always save the function, and reset at unregistering */