e7186af7fb
For backward compatibility, older tooling expects to see the kernel_stack event with a "caller" field that is a fixed size array of 8 addresses. The code now supports more than 8 with an added "size" field that states the real number of entries. But the "caller" field still just looks like a fixed size to user space. Since the tracing macros that create the user space format files also creates the structures that those files represent, the kernel_stack event structure had its "caller" field a fixed size of 8, but in reality, when it is allocated on the ring buffer, it can hold more if the stack trace is bigger that 8 functions. The copying of these entries was simply done with a memcpy(): size = nr_entries * sizeof(unsigned long); memcpy(entry->caller, fstack->calls, size); The FORTIFY_SOURCE logic noticed at runtime that when the nr_entries was larger than 8, that the memcpy() was writing more than what the structure stated it can hold and it complained about it. This is because the FORTIFY_SOURCE code is unaware that the amount allocated is actually enough to hold the size. It does not expect that a fixed size field will hold more than the fixed size. This was originally solved by hiding the caller assignment with some pointer arithmetic. ptr = ring_buffer_data(); entry = ptr; ptr += offsetof(typeof(*entry), caller); memcpy(ptr, fstack->calls, size); But it is considered bad form to hide from kernel hardening. Instead, make it work nicely with FORTIFY_SOURCE by adding a new __stack_array() macro that is specific for this one special use case. The macro will take 4 arguments: type, item, len, field (whereas the __array() macro takes just the first three). This macro will act just like the __array() macro when creating the code to deal with the format file that is exposed to user space. But for the kernel, it will turn the caller field into: type item[] __counted_by(field); or for this instance: unsigned long caller[] __counted_by(size); Now the kernel code can expose the assignment of the caller to the FORTIFY_SOURCE and everyone is happy! Link: https://lore.kernel.org/linux-trace-kernel/20230712105235.5fc441aa@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20230713092605.2ddb9788@rorschach.local.home Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Sven Schnelle <svens@linux.ibm.com> Suggested-by: Kees Cook <keescook@chromium.org> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> Reviewed-by: Kees Cook <keescook@chromium.org>
430 lines
10 KiB
C
430 lines
10 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* This file defines the trace event structures that go into the ring
|
|
* buffer directly. They are created via macros so that changes for them
|
|
* appear in the format file. Using macros will automate this process.
|
|
*
|
|
* The macro used to create a ftrace data structure is:
|
|
*
|
|
* FTRACE_ENTRY( name, struct_name, id, structure, print )
|
|
*
|
|
* @name: the name used the event name, as well as the name of
|
|
* the directory that holds the format file.
|
|
*
|
|
* @struct_name: the name of the structure that is created.
|
|
*
|
|
* @id: The event identifier that is used to detect what event
|
|
* this is from the ring buffer.
|
|
*
|
|
* @structure: the structure layout
|
|
*
|
|
* - __field( type, item )
|
|
* This is equivalent to declaring
|
|
* type item;
|
|
* in the structure.
|
|
* - __array( type, item, size )
|
|
* This is equivalent to declaring
|
|
* type item[size];
|
|
* in the structure.
|
|
*
|
|
* * for structures within structures, the format of the internal
|
|
* structure is laid out. This allows the internal structure
|
|
* to be deciphered for the format file. Although these macros
|
|
* may become out of sync with the internal structure, they
|
|
* will create a compile error if it happens. Since the
|
|
* internal structures are just tracing helpers, this is not
|
|
* an issue.
|
|
*
|
|
* When an internal structure is used, it should use:
|
|
*
|
|
* __field_struct( type, item )
|
|
*
|
|
* instead of __field. This will prevent it from being shown in
|
|
* the output file. The fields in the structure should use.
|
|
*
|
|
* __field_desc( type, container, item )
|
|
* __array_desc( type, container, item, len )
|
|
*
|
|
* type, item and len are the same as __field and __array, but
|
|
* container is added. This is the name of the item in
|
|
* __field_struct that this is describing.
|
|
*
|
|
*
|
|
* @print: the print format shown to users in the format file.
|
|
*/
|
|
|
|
/*
|
|
* Function trace entry - function address and parent function address:
|
|
*/
|
|
FTRACE_ENTRY_REG(function, ftrace_entry,
|
|
|
|
TRACE_FN,
|
|
|
|
F_STRUCT(
|
|
__field_fn( unsigned long, ip )
|
|
__field_fn( unsigned long, parent_ip )
|
|
),
|
|
|
|
F_printk(" %ps <-- %ps",
|
|
(void *)__entry->ip, (void *)__entry->parent_ip),
|
|
|
|
perf_ftrace_event_register
|
|
);
|
|
|
|
/* Function call entry */
|
|
FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,
|
|
|
|
TRACE_GRAPH_ENT,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct ftrace_graph_ent, graph_ent )
|
|
__field_packed( unsigned long, graph_ent, func )
|
|
__field_packed( int, graph_ent, depth )
|
|
),
|
|
|
|
F_printk("--> %ps (%d)", (void *)__entry->func, __entry->depth)
|
|
);
|
|
|
|
/* Function return entry */
|
|
#ifdef CONFIG_FUNCTION_GRAPH_RETVAL
|
|
|
|
FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
|
|
|
|
TRACE_GRAPH_RET,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct ftrace_graph_ret, ret )
|
|
__field_packed( unsigned long, ret, func )
|
|
__field_packed( unsigned long, ret, retval )
|
|
__field_packed( int, ret, depth )
|
|
__field_packed( unsigned int, ret, overrun )
|
|
__field_packed( unsigned long long, ret, calltime)
|
|
__field_packed( unsigned long long, ret, rettime )
|
|
),
|
|
|
|
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d retval: %lx",
|
|
(void *)__entry->func, __entry->depth,
|
|
__entry->calltime, __entry->rettime,
|
|
__entry->depth, __entry->retval)
|
|
);
|
|
|
|
#else
|
|
|
|
FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,
|
|
|
|
TRACE_GRAPH_RET,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct ftrace_graph_ret, ret )
|
|
__field_packed( unsigned long, ret, func )
|
|
__field_packed( int, ret, depth )
|
|
__field_packed( unsigned int, ret, overrun )
|
|
__field_packed( unsigned long long, ret, calltime)
|
|
__field_packed( unsigned long long, ret, rettime )
|
|
),
|
|
|
|
F_printk("<-- %ps (%d) (start: %llx end: %llx) over: %d",
|
|
(void *)__entry->func, __entry->depth,
|
|
__entry->calltime, __entry->rettime,
|
|
__entry->depth)
|
|
);
|
|
|
|
#endif
|
|
|
|
/*
|
|
* Context switch trace entry - which task (and prio) we switched from/to:
|
|
*
|
|
* This is used for both wakeup and context switches. We only want
|
|
* to create one structure, but we need two outputs for it.
|
|
*/
|
|
#define FTRACE_CTX_FIELDS \
|
|
__field( unsigned int, prev_pid ) \
|
|
__field( unsigned int, next_pid ) \
|
|
__field( unsigned int, next_cpu ) \
|
|
__field( unsigned char, prev_prio ) \
|
|
__field( unsigned char, prev_state ) \
|
|
__field( unsigned char, next_prio ) \
|
|
__field( unsigned char, next_state )
|
|
|
|
FTRACE_ENTRY(context_switch, ctx_switch_entry,
|
|
|
|
TRACE_CTX,
|
|
|
|
F_STRUCT(
|
|
FTRACE_CTX_FIELDS
|
|
),
|
|
|
|
F_printk("%u:%u:%u ==> %u:%u:%u [%03u]",
|
|
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
|
|
__entry->next_pid, __entry->next_prio, __entry->next_state,
|
|
__entry->next_cpu)
|
|
);
|
|
|
|
/*
|
|
* FTRACE_ENTRY_DUP only creates the format file, it will not
|
|
* create another structure.
|
|
*/
|
|
FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
|
|
|
|
TRACE_WAKE,
|
|
|
|
F_STRUCT(
|
|
FTRACE_CTX_FIELDS
|
|
),
|
|
|
|
F_printk("%u:%u:%u ==+ %u:%u:%u [%03u]",
|
|
__entry->prev_pid, __entry->prev_prio, __entry->prev_state,
|
|
__entry->next_pid, __entry->next_prio, __entry->next_state,
|
|
__entry->next_cpu)
|
|
);
|
|
|
|
/*
|
|
* Stack-trace entry:
|
|
*/
|
|
|
|
#define FTRACE_STACK_ENTRIES 8
|
|
|
|
FTRACE_ENTRY(kernel_stack, stack_entry,
|
|
|
|
TRACE_STACK,
|
|
|
|
F_STRUCT(
|
|
__field( int, size )
|
|
__stack_array( unsigned long, caller, FTRACE_STACK_ENTRIES, size)
|
|
),
|
|
|
|
F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n",
|
|
(void *)__entry->caller[0], (void *)__entry->caller[1],
|
|
(void *)__entry->caller[2], (void *)__entry->caller[3],
|
|
(void *)__entry->caller[4], (void *)__entry->caller[5],
|
|
(void *)__entry->caller[6], (void *)__entry->caller[7])
|
|
);
|
|
|
|
FTRACE_ENTRY(user_stack, userstack_entry,
|
|
|
|
TRACE_USER_STACK,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, tgid )
|
|
__array( unsigned long, caller, FTRACE_STACK_ENTRIES )
|
|
),
|
|
|
|
F_printk("\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n\t=> %ps\n"
|
|
"\t=> %ps\n\t=> %ps\n",
|
|
(void *)__entry->caller[0], (void *)__entry->caller[1],
|
|
(void *)__entry->caller[2], (void *)__entry->caller[3],
|
|
(void *)__entry->caller[4], (void *)__entry->caller[5],
|
|
(void *)__entry->caller[6], (void *)__entry->caller[7])
|
|
);
|
|
|
|
/*
|
|
* trace_printk entry:
|
|
*/
|
|
FTRACE_ENTRY(bprint, bprint_entry,
|
|
|
|
TRACE_BPRINT,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__field( const char *, fmt )
|
|
__dynamic_array( u32, buf )
|
|
),
|
|
|
|
F_printk("%ps: %s",
|
|
(void *)__entry->ip, __entry->fmt)
|
|
);
|
|
|
|
FTRACE_ENTRY_REG(print, print_entry,
|
|
|
|
TRACE_PRINT,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__dynamic_array( char, buf )
|
|
),
|
|
|
|
F_printk("%ps: %s",
|
|
(void *)__entry->ip, __entry->buf),
|
|
|
|
ftrace_event_register
|
|
);
|
|
|
|
FTRACE_ENTRY(raw_data, raw_data_entry,
|
|
|
|
TRACE_RAW_DATA,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, id )
|
|
__dynamic_array( char, buf )
|
|
),
|
|
|
|
F_printk("id:%04x %08x",
|
|
__entry->id, (int)__entry->buf[0])
|
|
);
|
|
|
|
FTRACE_ENTRY(bputs, bputs_entry,
|
|
|
|
TRACE_BPUTS,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__field( const char *, str )
|
|
),
|
|
|
|
F_printk("%ps: %s",
|
|
(void *)__entry->ip, __entry->str)
|
|
);
|
|
|
|
FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
|
|
|
|
TRACE_MMIO_RW,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct mmiotrace_rw, rw )
|
|
__field_desc( resource_size_t, rw, phys )
|
|
__field_desc( unsigned long, rw, value )
|
|
__field_desc( unsigned long, rw, pc )
|
|
__field_desc( int, rw, map_id )
|
|
__field_desc( unsigned char, rw, opcode )
|
|
__field_desc( unsigned char, rw, width )
|
|
),
|
|
|
|
F_printk("%lx %lx %lx %d %x %x",
|
|
(unsigned long)__entry->phys, __entry->value, __entry->pc,
|
|
__entry->map_id, __entry->opcode, __entry->width)
|
|
);
|
|
|
|
FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
|
|
|
|
TRACE_MMIO_MAP,
|
|
|
|
F_STRUCT(
|
|
__field_struct( struct mmiotrace_map, map )
|
|
__field_desc( resource_size_t, map, phys )
|
|
__field_desc( unsigned long, map, virt )
|
|
__field_desc( unsigned long, map, len )
|
|
__field_desc( int, map, map_id )
|
|
__field_desc( unsigned char, map, opcode )
|
|
),
|
|
|
|
F_printk("%lx %lx %lx %d %x",
|
|
(unsigned long)__entry->phys, __entry->virt, __entry->len,
|
|
__entry->map_id, __entry->opcode)
|
|
);
|
|
|
|
|
|
#define TRACE_FUNC_SIZE 30
|
|
#define TRACE_FILE_SIZE 20
|
|
|
|
FTRACE_ENTRY(branch, trace_branch,
|
|
|
|
TRACE_BRANCH,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, line )
|
|
__array( char, func, TRACE_FUNC_SIZE+1 )
|
|
__array( char, file, TRACE_FILE_SIZE+1 )
|
|
__field( char, correct )
|
|
__field( char, constant )
|
|
),
|
|
|
|
F_printk("%u:%s:%s (%u)%s",
|
|
__entry->line,
|
|
__entry->func, __entry->file, __entry->correct,
|
|
__entry->constant ? " CONSTANT" : "")
|
|
);
|
|
|
|
|
|
FTRACE_ENTRY(hwlat, hwlat_entry,
|
|
|
|
TRACE_HWLAT,
|
|
|
|
F_STRUCT(
|
|
__field( u64, duration )
|
|
__field( u64, outer_duration )
|
|
__field( u64, nmi_total_ts )
|
|
__field_struct( struct timespec64, timestamp )
|
|
__field_desc( s64, timestamp, tv_sec )
|
|
__field_desc( long, timestamp, tv_nsec )
|
|
__field( unsigned int, nmi_count )
|
|
__field( unsigned int, seqnum )
|
|
__field( unsigned int, count )
|
|
),
|
|
|
|
F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tcount:%d\tnmi-ts:%llu\tnmi-count:%u\n",
|
|
__entry->seqnum,
|
|
__entry->tv_sec,
|
|
__entry->tv_nsec,
|
|
__entry->duration,
|
|
__entry->outer_duration,
|
|
__entry->count,
|
|
__entry->nmi_total_ts,
|
|
__entry->nmi_count)
|
|
);
|
|
|
|
#define FUNC_REPEATS_GET_DELTA_TS(entry) \
|
|
(((u64)(entry)->top_delta_ts << 32) | (entry)->bottom_delta_ts) \
|
|
|
|
FTRACE_ENTRY(func_repeats, func_repeats_entry,
|
|
|
|
TRACE_FUNC_REPEATS,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned long, ip )
|
|
__field( unsigned long, parent_ip )
|
|
__field( u16 , count )
|
|
__field( u16 , top_delta_ts )
|
|
__field( u32 , bottom_delta_ts )
|
|
),
|
|
|
|
F_printk(" %ps <-%ps\t(repeats:%u delta: -%llu)",
|
|
(void *)__entry->ip,
|
|
(void *)__entry->parent_ip,
|
|
__entry->count,
|
|
FUNC_REPEATS_GET_DELTA_TS(__entry))
|
|
);
|
|
|
|
FTRACE_ENTRY(osnoise, osnoise_entry,
|
|
|
|
TRACE_OSNOISE,
|
|
|
|
F_STRUCT(
|
|
__field( u64, noise )
|
|
__field( u64, runtime )
|
|
__field( u64, max_sample )
|
|
__field( unsigned int, hw_count )
|
|
__field( unsigned int, nmi_count )
|
|
__field( unsigned int, irq_count )
|
|
__field( unsigned int, softirq_count )
|
|
__field( unsigned int, thread_count )
|
|
),
|
|
|
|
F_printk("noise:%llu\tmax_sample:%llu\thw:%u\tnmi:%u\tirq:%u\tsoftirq:%u\tthread:%u\n",
|
|
__entry->noise,
|
|
__entry->max_sample,
|
|
__entry->hw_count,
|
|
__entry->nmi_count,
|
|
__entry->irq_count,
|
|
__entry->softirq_count,
|
|
__entry->thread_count)
|
|
);
|
|
|
|
FTRACE_ENTRY(timerlat, timerlat_entry,
|
|
|
|
TRACE_TIMERLAT,
|
|
|
|
F_STRUCT(
|
|
__field( unsigned int, seqnum )
|
|
__field( int, context )
|
|
__field( u64, timer_latency )
|
|
),
|
|
|
|
F_printk("seq:%u\tcontext:%d\ttimer_latency:%llu\n",
|
|
__entry->seqnum,
|
|
__entry->context,
|
|
__entry->timer_latency)
|
|
);
|