1
linux/kernel/trace/trace_fprobe.c
Mikel Rychliski 73f3508047 tracing/probes: Fix MAX_TRACE_ARGS limit handling
When creating a trace_probe we would set nr_args prior to truncating the
arguments to MAX_TRACE_ARGS. However, we would only initialize arguments
up to the limit.

This caused invalid memory access when attempting to set up probes with
more than 128 fetchargs.

  BUG: kernel NULL pointer dereference, address: 0000000000000020
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 0 P4D 0
  Oops: Oops: 0000 [#1] PREEMPT SMP PTI
  CPU: 0 UID: 0 PID: 1769 Comm: cat Not tainted 6.11.0-rc7+ #8
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-1.fc39 04/01/2014
  RIP: 0010:__set_print_fmt+0x134/0x330

Resolve the issue by applying the MAX_TRACE_ARGS limit earlier. Return
an error when there are too many arguments instead of silently
truncating.

Link: https://lore.kernel.org/all/20240930202656.292869-1-mikel@mikelr.com/

Fixes: 035ba76014 ("tracing/probes: cleanup: Set trace_probe::nr_args at trace_probe_init")
Signed-off-by: Mikel Rychliski <mikel@mikelr.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
2024-10-23 17:24:44 +09:00

1346 lines
33 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Fprobe-based tracing events
* Copyright (C) 2022 Google LLC.
*/
#define pr_fmt(fmt) "trace_fprobe: " fmt
#include <asm/ptrace.h>
#include <linux/fprobe.h>
#include <linux/module.h>
#include <linux/rculist.h>
#include <linux/security.h>
#include <linux/tracepoint.h>
#include <linux/uaccess.h>
#include "trace_dynevent.h"
#include "trace_probe.h"
#include "trace_probe_kernel.h"
#include "trace_probe_tmpl.h"
#define FPROBE_EVENT_SYSTEM "fprobes"
#define TRACEPOINT_EVENT_SYSTEM "tracepoints"
#define RETHOOK_MAXACTIVE_MAX 4096
#define TRACEPOINT_STUB ERR_PTR(-ENOENT)
static int trace_fprobe_create(const char *raw_command);
static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev);
static int trace_fprobe_release(struct dyn_event *ev);
static bool trace_fprobe_is_busy(struct dyn_event *ev);
static bool trace_fprobe_match(const char *system, const char *event,
int argc, const char **argv, struct dyn_event *ev);
static struct dyn_event_operations trace_fprobe_ops = {
.create = trace_fprobe_create,
.show = trace_fprobe_show,
.is_busy = trace_fprobe_is_busy,
.free = trace_fprobe_release,
.match = trace_fprobe_match,
};
/*
* Fprobe event core functions
*/
struct trace_fprobe {
struct dyn_event devent;
struct fprobe fp;
const char *symbol;
struct tracepoint *tpoint;
struct module *mod;
struct trace_probe tp;
};
static bool is_trace_fprobe(struct dyn_event *ev)
{
return ev->ops == &trace_fprobe_ops;
}
static struct trace_fprobe *to_trace_fprobe(struct dyn_event *ev)
{
return container_of(ev, struct trace_fprobe, devent);
}
/**
* for_each_trace_fprobe - iterate over the trace_fprobe list
* @pos: the struct trace_fprobe * for each entry
* @dpos: the struct dyn_event * to use as a loop cursor
*/
#define for_each_trace_fprobe(pos, dpos) \
for_each_dyn_event(dpos) \
if (is_trace_fprobe(dpos) && (pos = to_trace_fprobe(dpos)))
static bool trace_fprobe_is_return(struct trace_fprobe *tf)
{
return tf->fp.exit_handler != NULL;
}
static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf)
{
return tf->tpoint != NULL;
}
static const char *trace_fprobe_symbol(struct trace_fprobe *tf)
{
return tf->symbol ? tf->symbol : "unknown";
}
static bool trace_fprobe_is_busy(struct dyn_event *ev)
{
struct trace_fprobe *tf = to_trace_fprobe(ev);
return trace_probe_is_enabled(&tf->tp);
}
static bool trace_fprobe_match_command_head(struct trace_fprobe *tf,
int argc, const char **argv)
{
char buf[MAX_ARGSTR_LEN + 1];
if (!argc)
return true;
snprintf(buf, sizeof(buf), "%s", trace_fprobe_symbol(tf));
if (strcmp(buf, argv[0]))
return false;
argc--; argv++;
return trace_probe_match_command_args(&tf->tp, argc, argv);
}
static bool trace_fprobe_match(const char *system, const char *event,
int argc, const char **argv, struct dyn_event *ev)
{
struct trace_fprobe *tf = to_trace_fprobe(ev);
if (event[0] != '\0' && strcmp(trace_probe_name(&tf->tp), event))
return false;
if (system && strcmp(trace_probe_group_name(&tf->tp), system))
return false;
return trace_fprobe_match_command_head(tf, argc, argv);
}
static bool trace_fprobe_is_registered(struct trace_fprobe *tf)
{
return fprobe_is_registered(&tf->fp);
}
/*
* Note that we don't verify the fetch_insn code, since it does not come
* from user space.
*/
static int
process_fetch_insn(struct fetch_insn *code, void *rec, void *edata,
void *dest, void *base)
{
struct pt_regs *regs = rec;
unsigned long val;
int ret;
retry:
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
val = regs_get_kernel_stack_nth(regs, code->param);
break;
case FETCH_OP_STACKP:
val = kernel_stack_pointer(regs);
break;
case FETCH_OP_RETVAL:
val = regs_return_value(regs);
break;
#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
val = regs_get_kernel_argument(regs, code->param);
break;
case FETCH_OP_EDATA:
val = *(unsigned long *)((unsigned long)edata + code->offset);
break;
#endif
case FETCH_NOP_SYMBOL: /* Ignore a place holder */
code++;
goto retry;
default:
ret = process_common_fetch_insn(code, &val);
if (ret < 0)
return ret;
}
code++;
return process_fetch_insn_bottom(code, val, dest, base);
}
NOKPROBE_SYMBOL(process_fetch_insn)
/* function entry handler */
static nokprobe_inline void
__fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
struct pt_regs *regs,
struct trace_event_file *trace_file)
{
struct fentry_trace_entry_head *entry;
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct trace_event_buffer fbuffer;
int dsize;
if (WARN_ON_ONCE(call != trace_file->event_call))
return;
if (trace_trigger_soft_disabled(trace_file))
return;
dsize = __get_data_size(&tf->tp, regs, NULL);
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;
fbuffer.regs = regs;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->ip = entry_ip;
store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
}
static void
fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
struct pt_regs *regs)
{
struct event_file_link *link;
trace_probe_for_each_link_rcu(link, &tf->tp)
__fentry_trace_func(tf, entry_ip, regs, link->file);
}
NOKPROBE_SYMBOL(fentry_trace_func);
/* function exit handler */
static int trace_fprobe_entry_handler(struct fprobe *fp, unsigned long entry_ip,
unsigned long ret_ip, struct pt_regs *regs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
if (tf->tp.entry_arg)
store_trace_entry_data(entry_data, &tf->tp, regs);
return 0;
}
NOKPROBE_SYMBOL(trace_fprobe_entry_handler)
static nokprobe_inline void
__fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
unsigned long ret_ip, struct pt_regs *regs,
void *entry_data, struct trace_event_file *trace_file)
{
struct fexit_trace_entry_head *entry;
struct trace_event_buffer fbuffer;
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
int dsize;
if (WARN_ON_ONCE(call != trace_file->event_call))
return;
if (trace_trigger_soft_disabled(trace_file))
return;
dsize = __get_data_size(&tf->tp, regs, entry_data);
entry = trace_event_buffer_reserve(&fbuffer, trace_file,
sizeof(*entry) + tf->tp.size + dsize);
if (!entry)
return;
fbuffer.regs = regs;
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->func = entry_ip;
entry->ret_ip = ret_ip;
store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
trace_event_buffer_commit(&fbuffer);
}
static void
fexit_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
unsigned long ret_ip, struct pt_regs *regs, void *entry_data)
{
struct event_file_link *link;
trace_probe_for_each_link_rcu(link, &tf->tp)
__fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data, link->file);
}
NOKPROBE_SYMBOL(fexit_trace_func);
#ifdef CONFIG_PERF_EVENTS
static int fentry_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
struct pt_regs *regs)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fentry_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
int rctx;
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
return 0;
dsize = __get_data_size(&tf->tp, regs, NULL);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
entry = perf_trace_buf_alloc(size, NULL, &rctx);
if (!entry)
return 0;
entry->ip = entry_ip;
memset(&entry[1], 0, dsize);
store_trace_args(&entry[1], &tf->tp, regs, NULL, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
return 0;
}
NOKPROBE_SYMBOL(fentry_perf_func);
static void
fexit_perf_func(struct trace_fprobe *tf, unsigned long entry_ip,
unsigned long ret_ip, struct pt_regs *regs,
void *entry_data)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
struct fexit_trace_entry_head *entry;
struct hlist_head *head;
int size, __size, dsize;
int rctx;
head = this_cpu_ptr(call->perf_events);
if (hlist_empty(head))
return;
dsize = __get_data_size(&tf->tp, regs, entry_data);
__size = sizeof(*entry) + tf->tp.size + dsize;
size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32);
entry = perf_trace_buf_alloc(size, NULL, &rctx);
if (!entry)
return;
entry->func = entry_ip;
entry->ret_ip = ret_ip;
store_trace_args(&entry[1], &tf->tp, regs, entry_data, sizeof(*entry), dsize);
perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
head, NULL);
}
NOKPROBE_SYMBOL(fexit_perf_func);
#endif /* CONFIG_PERF_EVENTS */
static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
unsigned long ret_ip, struct pt_regs *regs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
int ret = 0;
if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
fentry_trace_func(tf, entry_ip, regs);
#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
ret = fentry_perf_func(tf, entry_ip, regs);
#endif
return ret;
}
NOKPROBE_SYMBOL(fentry_dispatcher);
static void fexit_dispatcher(struct fprobe *fp, unsigned long entry_ip,
unsigned long ret_ip, struct pt_regs *regs,
void *entry_data)
{
struct trace_fprobe *tf = container_of(fp, struct trace_fprobe, fp);
if (trace_probe_test_flag(&tf->tp, TP_FLAG_TRACE))
fexit_trace_func(tf, entry_ip, ret_ip, regs, entry_data);
#ifdef CONFIG_PERF_EVENTS
if (trace_probe_test_flag(&tf->tp, TP_FLAG_PROFILE))
fexit_perf_func(tf, entry_ip, ret_ip, regs, entry_data);
#endif
}
NOKPROBE_SYMBOL(fexit_dispatcher);
static void free_trace_fprobe(struct trace_fprobe *tf)
{
if (tf) {
trace_probe_cleanup(&tf->tp);
kfree(tf->symbol);
kfree(tf);
}
}
/*
* Allocate new trace_probe and initialize it (including fprobe).
*/
static struct trace_fprobe *alloc_trace_fprobe(const char *group,
const char *event,
const char *symbol,
struct tracepoint *tpoint,
struct module *mod,
int maxactive,
int nargs, bool is_return)
{
struct trace_fprobe *tf;
int ret = -ENOMEM;
tf = kzalloc(struct_size(tf, tp.args, nargs), GFP_KERNEL);
if (!tf)
return ERR_PTR(ret);
tf->symbol = kstrdup(symbol, GFP_KERNEL);
if (!tf->symbol)
goto error;
if (is_return)
tf->fp.exit_handler = fexit_dispatcher;
else
tf->fp.entry_handler = fentry_dispatcher;
tf->tpoint = tpoint;
tf->mod = mod;
tf->fp.nr_maxactive = maxactive;
ret = trace_probe_init(&tf->tp, event, group, false, nargs);
if (ret < 0)
goto error;
dyn_event_init(&tf->devent, &trace_fprobe_ops);
return tf;
error:
free_trace_fprobe(tf);
return ERR_PTR(ret);
}
static struct trace_fprobe *find_trace_fprobe(const char *event,
const char *group)
{
struct dyn_event *pos;
struct trace_fprobe *tf;
for_each_trace_fprobe(tf, pos)
if (strcmp(trace_probe_name(&tf->tp), event) == 0 &&
strcmp(trace_probe_group_name(&tf->tp), group) == 0)
return tf;
return NULL;
}
static inline int __enable_trace_fprobe(struct trace_fprobe *tf)
{
if (trace_fprobe_is_registered(tf))
enable_fprobe(&tf->fp);
return 0;
}
static void __disable_trace_fprobe(struct trace_probe *tp)
{
struct trace_fprobe *tf;
list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
if (!trace_fprobe_is_registered(tf))
continue;
disable_fprobe(&tf->fp);
}
}
/*
* Enable trace_probe
* if the file is NULL, enable "perf" handler, or enable "trace" handler.
*/
static int enable_trace_fprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
struct trace_probe *tp;
struct trace_fprobe *tf;
bool enabled;
int ret = 0;
tp = trace_probe_primary_from_call(call);
if (WARN_ON_ONCE(!tp))
return -ENODEV;
enabled = trace_probe_is_enabled(tp);
/* This also changes "enabled" state */
if (file) {
ret = trace_probe_add_file(tp, file);
if (ret)
return ret;
} else
trace_probe_set_flag(tp, TP_FLAG_PROFILE);
if (!enabled) {
list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
/* TODO: check the fprobe is gone */
__enable_trace_fprobe(tf);
}
}
return 0;
}
/*
* Disable trace_probe
* if the file is NULL, disable "perf" handler, or disable "trace" handler.
*/
static int disable_trace_fprobe(struct trace_event_call *call,
struct trace_event_file *file)
{
struct trace_probe *tp;
tp = trace_probe_primary_from_call(call);
if (WARN_ON_ONCE(!tp))
return -ENODEV;
if (file) {
if (!trace_probe_get_file_link(tp, file))
return -ENOENT;
if (!trace_probe_has_single_file(tp))
goto out;
trace_probe_clear_flag(tp, TP_FLAG_TRACE);
} else
trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
if (!trace_probe_is_enabled(tp))
__disable_trace_fprobe(tp);
out:
if (file)
/*
* Synchronization is done in below function. For perf event,
* file == NULL and perf_trace_event_unreg() calls
* tracepoint_synchronize_unregister() to ensure synchronize
* event. We don't need to care about it.
*/
trace_probe_remove_file(tp, file);
return 0;
}
/* Event entry printers */
static enum print_line_t
print_fentry_event(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct fentry_trace_entry_head *field;
struct trace_seq *s = &iter->seq;
struct trace_probe *tp;
field = (struct fentry_trace_entry_head *)iter->ent;
tp = trace_probe_primary_from_call(
container_of(event, struct trace_event_call, event));
if (WARN_ON_ONCE(!tp))
goto out;
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
trace_seq_putc(s, ')');
if (trace_probe_print_args(s, tp->args, tp->nr_args,
(u8 *)&field[1], field) < 0)
goto out;
trace_seq_putc(s, '\n');
out:
return trace_handle_return(s);
}
static enum print_line_t
print_fexit_event(struct trace_iterator *iter, int flags,
struct trace_event *event)
{
struct fexit_trace_entry_head *field;
struct trace_seq *s = &iter->seq;
struct trace_probe *tp;
field = (struct fexit_trace_entry_head *)iter->ent;
tp = trace_probe_primary_from_call(
container_of(event, struct trace_event_call, event));
if (WARN_ON_ONCE(!tp))
goto out;
trace_seq_printf(s, "%s: (", trace_probe_name(tp));
if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
goto out;
trace_seq_puts(s, " <- ");
if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
goto out;
trace_seq_putc(s, ')');
if (trace_probe_print_args(s, tp->args, tp->nr_args,
(u8 *)&field[1], field) < 0)
goto out;
trace_seq_putc(s, '\n');
out:
return trace_handle_return(s);
}
static int fentry_event_define_fields(struct trace_event_call *event_call)
{
int ret;
struct fentry_trace_entry_head field;
struct trace_probe *tp;
tp = trace_probe_primary_from_call(event_call);
if (WARN_ON_ONCE(!tp))
return -ENOENT;
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
static int fexit_event_define_fields(struct trace_event_call *event_call)
{
int ret;
struct fexit_trace_entry_head field;
struct trace_probe *tp;
tp = trace_probe_primary_from_call(event_call);
if (WARN_ON_ONCE(!tp))
return -ENOENT;
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
}
static struct trace_event_functions fentry_funcs = {
.trace = print_fentry_event
};
static struct trace_event_functions fexit_funcs = {
.trace = print_fexit_event
};
static struct trace_event_fields fentry_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = fentry_event_define_fields },
{}
};
static struct trace_event_fields fexit_fields_array[] = {
{ .type = TRACE_FUNCTION_TYPE,
.define_fields = fexit_event_define_fields },
{}
};
static int fprobe_register(struct trace_event_call *event,
enum trace_reg type, void *data);
static inline void init_trace_event_call(struct trace_fprobe *tf)
{
struct trace_event_call *call = trace_probe_event_call(&tf->tp);
if (trace_fprobe_is_return(tf)) {
call->event.funcs = &fexit_funcs;
call->class->fields_array = fexit_fields_array;
} else {
call->event.funcs = &fentry_funcs;
call->class->fields_array = fentry_fields_array;
}
call->flags = TRACE_EVENT_FL_FPROBE;
call->class->reg = fprobe_register;
}
static int register_fprobe_event(struct trace_fprobe *tf)
{
init_trace_event_call(tf);
return trace_probe_register_event_call(&tf->tp);
}
static int unregister_fprobe_event(struct trace_fprobe *tf)
{
return trace_probe_unregister_event_call(&tf->tp);
}
static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf)
{
struct tracepoint *tpoint = tf->tpoint;
unsigned long ip = (unsigned long)tpoint->probestub;
int ret;
/*
* Here, we do 2 steps to enable fprobe on a tracepoint.
* At first, put __probestub_##TP function on the tracepoint
* and put a fprobe on the stub function.
*/
ret = tracepoint_probe_register_prio_may_exist(tpoint,
tpoint->probestub, NULL, 0);
if (ret < 0)
return ret;
return register_fprobe_ips(&tf->fp, &ip, 1);
}
/* Internal register function - just handle fprobe and flags */
static int __register_trace_fprobe(struct trace_fprobe *tf)
{
int i, ret;
/* Should we need new LOCKDOWN flag for fprobe? */
ret = security_locked_down(LOCKDOWN_KPROBES);
if (ret)
return ret;
if (trace_fprobe_is_registered(tf))
return -EINVAL;
for (i = 0; i < tf->tp.nr_args; i++) {
ret = traceprobe_update_arg(&tf->tp.args[i]);
if (ret)
return ret;
}
/* Set/clear disabled flag according to tp->flag */
if (trace_probe_is_enabled(&tf->tp))
tf->fp.flags &= ~FPROBE_FL_DISABLED;
else
tf->fp.flags |= FPROBE_FL_DISABLED;
if (trace_fprobe_is_tracepoint(tf)) {
/* This tracepoint is not loaded yet */
if (tf->tpoint == TRACEPOINT_STUB)
return 0;
return __regsiter_tracepoint_fprobe(tf);
}
/* TODO: handle filter, nofilter or symbol list */
return register_fprobe(&tf->fp, tf->symbol, NULL);
}
/* Internal unregister function - just handle fprobe and flags */
static void __unregister_trace_fprobe(struct trace_fprobe *tf)
{
if (trace_fprobe_is_registered(tf)) {
unregister_fprobe(&tf->fp);
memset(&tf->fp, 0, sizeof(tf->fp));
if (trace_fprobe_is_tracepoint(tf)) {
tracepoint_probe_unregister(tf->tpoint,
tf->tpoint->probestub, NULL);
tf->tpoint = NULL;
tf->mod = NULL;
}
}
}
/* TODO: make this trace_*probe common function */
/* Unregister a trace_probe and probe_event */
static int unregister_trace_fprobe(struct trace_fprobe *tf)
{
/* If other probes are on the event, just unregister fprobe */
if (trace_probe_has_sibling(&tf->tp))
goto unreg;
/* Enabled event can not be unregistered */
if (trace_probe_is_enabled(&tf->tp))
return -EBUSY;
/* If there's a reference to the dynamic event */
if (trace_event_dyn_busy(trace_probe_event_call(&tf->tp)))
return -EBUSY;
/* Will fail if probe is being used by ftrace or perf */
if (unregister_fprobe_event(tf))
return -EBUSY;
unreg:
__unregister_trace_fprobe(tf);
dyn_event_remove(&tf->devent);
trace_probe_unlink(&tf->tp);
return 0;
}
static bool trace_fprobe_has_same_fprobe(struct trace_fprobe *orig,
struct trace_fprobe *comp)
{
struct trace_probe_event *tpe = orig->tp.event;
int i;
list_for_each_entry(orig, &tpe->probes, tp.list) {
if (strcmp(trace_fprobe_symbol(orig),
trace_fprobe_symbol(comp)))
continue;
/*
* trace_probe_compare_arg_type() ensured that nr_args and
* each argument name and type are same. Let's compare comm.
*/
for (i = 0; i < orig->tp.nr_args; i++) {
if (strcmp(orig->tp.args[i].comm,
comp->tp.args[i].comm))
break;
}
if (i == orig->tp.nr_args)
return true;
}
return false;
}
static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to)
{
int ret;
if (trace_fprobe_is_return(tf) != trace_fprobe_is_return(to) ||
trace_fprobe_is_tracepoint(tf) != trace_fprobe_is_tracepoint(to)) {
trace_probe_log_set_index(0);
trace_probe_log_err(0, DIFF_PROBE_TYPE);
return -EEXIST;
}
ret = trace_probe_compare_arg_type(&tf->tp, &to->tp);
if (ret) {
/* Note that argument starts index = 2 */
trace_probe_log_set_index(ret + 1);
trace_probe_log_err(0, DIFF_ARG_TYPE);
return -EEXIST;
}
if (trace_fprobe_has_same_fprobe(to, tf)) {
trace_probe_log_set_index(0);
trace_probe_log_err(0, SAME_PROBE);
return -EEXIST;
}
/* Append to existing event */
ret = trace_probe_append(&tf->tp, &to->tp);
if (ret)
return ret;
ret = __register_trace_fprobe(tf);
if (ret)
trace_probe_unlink(&tf->tp);
else
dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
return ret;
}
/* Register a trace_probe and probe_event */
static int register_trace_fprobe(struct trace_fprobe *tf)
{
struct trace_fprobe *old_tf;
int ret;
mutex_lock(&event_mutex);
old_tf = find_trace_fprobe(trace_probe_name(&tf->tp),
trace_probe_group_name(&tf->tp));
if (old_tf) {
ret = append_trace_fprobe(tf, old_tf);
goto end;
}
/* Register new event */
ret = register_fprobe_event(tf);
if (ret) {
if (ret == -EEXIST) {
trace_probe_log_set_index(0);
trace_probe_log_err(0, EVENT_EXIST);
} else
pr_warn("Failed to register probe event(%d)\n", ret);
goto end;
}
/* Register fprobe */
ret = __register_trace_fprobe(tf);
if (ret < 0)
unregister_fprobe_event(tf);
else
dyn_event_add(&tf->devent, trace_probe_event_call(&tf->tp));
end:
mutex_unlock(&event_mutex);
return ret;
}
struct __find_tracepoint_cb_data {
const char *tp_name;
struct tracepoint *tpoint;
struct module *mod;
};
static void __find_tracepoint_module_cb(struct tracepoint *tp, struct module *mod, void *priv)
{
struct __find_tracepoint_cb_data *data = priv;
if (!data->tpoint && !strcmp(data->tp_name, tp->name)) {
data->tpoint = tp;
if (!data->mod) {
data->mod = mod;
if (!try_module_get(data->mod)) {
data->tpoint = NULL;
data->mod = NULL;
}
}
}
}
static void __find_tracepoint_cb(struct tracepoint *tp, void *priv)
{
struct __find_tracepoint_cb_data *data = priv;
if (!data->tpoint && !strcmp(data->tp_name, tp->name))
data->tpoint = tp;
}
/*
* Find a tracepoint from kernel and module. If the tracepoint is in a module,
* this increments the module refcount to prevent unloading until the
* trace_fprobe is registered to the list. After registering the trace_fprobe
* on the trace_fprobe list, the module refcount is decremented because
* tracepoint_probe_module_cb will handle it.
*/
static struct tracepoint *find_tracepoint(const char *tp_name,
struct module **tp_mod)
{
struct __find_tracepoint_cb_data data = {
.tp_name = tp_name,
.mod = NULL,
};
for_each_kernel_tracepoint(__find_tracepoint_cb, &data);
if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) {
for_each_module_tracepoint(__find_tracepoint_module_cb, &data);
*tp_mod = data.mod;
}
return data.tpoint;
}
#ifdef CONFIG_MODULES
static void reenable_trace_fprobe(struct trace_fprobe *tf)
{
struct trace_probe *tp = &tf->tp;
list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) {
__enable_trace_fprobe(tf);
}
}
static struct tracepoint *find_tracepoint_in_module(struct module *mod,
const char *tp_name)
{
struct __find_tracepoint_cb_data data = {
.tp_name = tp_name,
.mod = mod,
};
for_each_tracepoint_in_module(mod, __find_tracepoint_module_cb, &data);
return data.tpoint;
}
static int __tracepoint_probe_module_cb(struct notifier_block *self,
unsigned long val, void *data)
{
struct tp_module *tp_mod = data;
struct tracepoint *tpoint;
struct trace_fprobe *tf;
struct dyn_event *pos;
if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING)
return NOTIFY_DONE;
mutex_lock(&event_mutex);
for_each_trace_fprobe(tf, pos) {
if (val == MODULE_STATE_COMING && tf->tpoint == TRACEPOINT_STUB) {
tpoint = find_tracepoint_in_module(tp_mod->mod, tf->symbol);
if (tpoint) {
tf->tpoint = tpoint;
tf->mod = tp_mod->mod;
if (!WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)) &&
trace_probe_is_enabled(&tf->tp))
reenable_trace_fprobe(tf);
}
} else if (val == MODULE_STATE_GOING && tp_mod->mod == tf->mod) {
tracepoint_probe_unregister(tf->tpoint,
tf->tpoint->probestub, NULL);
tf->tpoint = NULL;
tf->mod = NULL;
}
}
mutex_unlock(&event_mutex);
return NOTIFY_DONE;
}
static struct notifier_block tracepoint_module_nb = {
.notifier_call = __tracepoint_probe_module_cb,
};
#endif /* CONFIG_MODULES */
static int parse_symbol_and_return(int argc, const char *argv[],
char **symbol, bool *is_return,
bool is_tracepoint)
{
char *tmp = strchr(argv[1], '%');
int i;
if (tmp) {
int len = tmp - argv[1];
if (!is_tracepoint && !strcmp(tmp, "%return")) {
*is_return = true;
} else {
trace_probe_log_err(len, BAD_ADDR_SUFFIX);
return -EINVAL;
}
*symbol = kmemdup_nul(argv[1], len, GFP_KERNEL);
} else
*symbol = kstrdup(argv[1], GFP_KERNEL);
if (!*symbol)
return -ENOMEM;
if (*is_return)
return 0;
/* If there is $retval, this should be a return fprobe. */
for (i = 2; i < argc; i++) {
tmp = strstr(argv[i], "$retval");
if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') {
if (is_tracepoint) {
trace_probe_log_set_index(i);
trace_probe_log_err(tmp - argv[i], RETVAL_ON_PROBE);
return -EINVAL;
}
*is_return = true;
break;
}
}
return 0;
}
static int __trace_fprobe_create(int argc, const char *argv[])
{
/*
* Argument syntax:
* - Add fentry probe:
* f[:[GRP/][EVENT]] [MOD:]KSYM [FETCHARGS]
* - Add fexit probe:
* f[N][:[GRP/][EVENT]] [MOD:]KSYM%return [FETCHARGS]
* - Add tracepoint probe:
* t[:[GRP/][EVENT]] TRACEPOINT [FETCHARGS]
*
* Fetch args:
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth entry of stack (N:0-)
* $argN : fetch Nth argument (N:1-)
* $comm : fetch current task comm
* @ADDR : fetch memory at ADDR (ADDR should be in kernel)
* @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
* Dereferencing memory fetch:
* +|-offs(ARG) : fetch memory at ARG +|- offs address.
* Alias name of args:
* NAME=FETCHARG : set NAME as alias of FETCHARG.
* Type of args:
* FETCHARG:TYPE : use TYPE instead of unsigned long.
*/
struct trace_fprobe *tf = NULL;
int i, len, new_argc = 0, ret = 0;
bool is_return = false;
char *symbol = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
const char **new_argv = NULL;
int maxactive = 0;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
char abuf[MAX_BTF_ARGS_LEN];
char *dbuf = NULL;
bool is_tracepoint = false;
struct module *tp_mod = NULL;
struct tracepoint *tpoint = NULL;
struct traceprobe_parse_context ctx = {
.flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE,
};
if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2)
return -ECANCELED;
if (argv[0][0] == 't') {
is_tracepoint = true;
group = TRACEPOINT_EVENT_SYSTEM;
}
trace_probe_log_init("trace_fprobe", argc, argv);
event = strchr(&argv[0][1], ':');
if (event)
event++;
if (isdigit(argv[0][1])) {
if (event)
len = event - &argv[0][1] - 1;
else
len = strlen(&argv[0][1]);
if (len > MAX_EVENT_NAME_LEN - 1) {
trace_probe_log_err(1, BAD_MAXACT);
goto parse_error;
}
memcpy(buf, &argv[0][1], len);
buf[len] = '\0';
ret = kstrtouint(buf, 0, &maxactive);
if (ret || !maxactive) {
trace_probe_log_err(1, BAD_MAXACT);
goto parse_error;
}
/* fprobe rethook instances are iterated over via a list. The
* maximum should stay reasonable.
*/
if (maxactive > RETHOOK_MAXACTIVE_MAX) {
trace_probe_log_err(1, MAXACT_TOO_BIG);
goto parse_error;
}
}
trace_probe_log_set_index(1);
/* a symbol(or tracepoint) must be specified */
ret = parse_symbol_and_return(argc, argv, &symbol, &is_return, is_tracepoint);
if (ret < 0)
goto parse_error;
if (!is_return && maxactive) {
trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT_TYPE);
goto parse_error;
}
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(&event, &group, gbuf,
event - argv[0]);
if (ret)
goto parse_error;
}
if (!event) {
/* Make a new event name */
if (is_tracepoint)
snprintf(buf, MAX_EVENT_NAME_LEN, "%s%s",
isdigit(*symbol) ? "_" : "", symbol);
else
snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol,
is_return ? "exit" : "entry");
sanitize_event_name(buf);
event = buf;
}
if (is_return)
ctx.flags |= TPARG_FL_RETURN;
else
ctx.flags |= TPARG_FL_FENTRY;
if (is_tracepoint) {
ctx.flags |= TPARG_FL_TPOINT;
tpoint = find_tracepoint(symbol, &tp_mod);
if (tpoint) {
ctx.funcname = kallsyms_lookup(
(unsigned long)tpoint->probestub,
NULL, NULL, NULL, sbuf);
} else if (IS_ENABLED(CONFIG_MODULES)) {
/* This *may* be loaded afterwards */
tpoint = TRACEPOINT_STUB;
ctx.funcname = symbol;
} else {
trace_probe_log_set_index(1);
trace_probe_log_err(0, NO_TRACEPOINT);
goto parse_error;
}
} else
ctx.funcname = symbol;
argc -= 2; argv += 2;
new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc,
abuf, MAX_BTF_ARGS_LEN, &ctx);
if (IS_ERR(new_argv)) {
ret = PTR_ERR(new_argv);
new_argv = NULL;
goto out;
}
if (new_argv) {
argc = new_argc;
argv = new_argv;
}
if (argc > MAX_TRACE_ARGS) {
ret = -E2BIG;
goto out;
}
ret = traceprobe_expand_dentry_args(argc, argv, &dbuf);
if (ret)
goto out;
/* setup a probe */
tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod,
maxactive, argc, is_return);
if (IS_ERR(tf)) {
ret = PTR_ERR(tf);
/* This must return -ENOMEM, else there is a bug */
WARN_ON_ONCE(ret != -ENOMEM);
goto out; /* We know tf is not allocated */
}
/* parse arguments */
for (i = 0; i < argc; i++) {
trace_probe_log_set_index(i + 2);
ctx.offset = 0;
ret = traceprobe_parse_probe_arg(&tf->tp, i, argv[i], &ctx);
if (ret)
goto error; /* This can be -ENOMEM */
}
if (is_return && tf->tp.entry_arg) {
tf->fp.entry_handler = trace_fprobe_entry_handler;
tf->fp.entry_data_size = traceprobe_get_entry_data_size(&tf->tp);
}
ret = traceprobe_set_print_fmt(&tf->tp,
is_return ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL);
if (ret < 0)
goto error;
ret = register_trace_fprobe(tf);
if (ret) {
trace_probe_log_set_index(1);
if (ret == -EILSEQ)
trace_probe_log_err(0, BAD_INSN_BNDRY);
else if (ret == -ENOENT)
trace_probe_log_err(0, BAD_PROBE_ADDR);
else if (ret != -ENOMEM && ret != -EEXIST)
trace_probe_log_err(0, FAIL_REG_PROBE);
goto error;
}
out:
if (tp_mod)
module_put(tp_mod);
traceprobe_finish_parse(&ctx);
trace_probe_log_clear();
kfree(new_argv);
kfree(symbol);
kfree(dbuf);
return ret;
parse_error:
ret = -EINVAL;
error:
free_trace_fprobe(tf);
goto out;
}
static int trace_fprobe_create(const char *raw_command)
{
return trace_probe_create(raw_command, __trace_fprobe_create);
}
static int trace_fprobe_release(struct dyn_event *ev)
{
struct trace_fprobe *tf = to_trace_fprobe(ev);
int ret = unregister_trace_fprobe(tf);
if (!ret)
free_trace_fprobe(tf);
return ret;
}
static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev)
{
struct trace_fprobe *tf = to_trace_fprobe(ev);
int i;
if (trace_fprobe_is_tracepoint(tf))
seq_putc(m, 't');
else
seq_putc(m, 'f');
if (trace_fprobe_is_return(tf) && tf->fp.nr_maxactive)
seq_printf(m, "%d", tf->fp.nr_maxactive);
seq_printf(m, ":%s/%s", trace_probe_group_name(&tf->tp),
trace_probe_name(&tf->tp));
seq_printf(m, " %s%s", trace_fprobe_symbol(tf),
trace_fprobe_is_return(tf) ? "%return" : "");
for (i = 0; i < tf->tp.nr_args; i++)
seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm);
seq_putc(m, '\n');
return 0;
}
/*
* called by perf_trace_init() or __ftrace_set_clr_event() under event_mutex.
*/
static int fprobe_register(struct trace_event_call *event,
enum trace_reg type, void *data)
{
struct trace_event_file *file = data;
switch (type) {
case TRACE_REG_REGISTER:
return enable_trace_fprobe(event, file);
case TRACE_REG_UNREGISTER:
return disable_trace_fprobe(event, file);
#ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER:
return enable_trace_fprobe(event, NULL);
case TRACE_REG_PERF_UNREGISTER:
return disable_trace_fprobe(event, NULL);
case TRACE_REG_PERF_OPEN:
case TRACE_REG_PERF_CLOSE:
case TRACE_REG_PERF_ADD:
case TRACE_REG_PERF_DEL:
return 0;
#endif
}
return 0;
}
/*
* Register dynevent at core_initcall. This allows kernel to setup fprobe
* events in postcore_initcall without tracefs.
*/
static __init int init_fprobe_trace_early(void)
{
int ret;
ret = dyn_event_register(&trace_fprobe_ops);
if (ret)
return ret;
#ifdef CONFIG_MODULES
ret = register_tracepoint_module_notifier(&tracepoint_module_nb);
if (ret)
return ret;
#endif
return 0;
}
core_initcall(init_fprobe_trace_early);