1
linux/arch/x86/kernel/dumpstack_64.c
Steven Rostedt 7ee991fbc6 ftrace: print real return in dumpstack for function graph
Impact: better dumpstack output

I noticed in my crash dumps and even in the stack tracer that a
lot of functions listed in the stack trace are simply
return_to_handler which is ftrace graphs way to insert its own
call into the return of a function.

But we lose out where the actually function was called from.

This patch adds in hooks to the dumpstack mechanism that detects
this and finds the real function to print. Both are printed to
let the user know that a hook is still in place.

This does give a funny side effect in the stack tracer output:

        Depth   Size      Location    (80 entries)
        -----   ----      --------
  0)     4144      48   save_stack_trace+0x2f/0x4d
  1)     4096     128   ftrace_call+0x5/0x2b
  2)     3968      16   mempool_alloc_slab+0x16/0x18
  3)     3952     384   return_to_handler+0x0/0x73
  4)     3568    -240   stack_trace_call+0x11d/0x209
  5)     3808     144   return_to_handler+0x0/0x73
  6)     3664    -128   mempool_alloc+0x4d/0xfe
  7)     3792     128   return_to_handler+0x0/0x73
  8)     3664     -32   scsi_sg_alloc+0x48/0x4a [scsi_mod]

As you can see, the real functions are now negative. This is due
to them not being found inside the stack.

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2008-12-03 08:56:25 +01:00

299 lines
7.1 KiB
C

/*
* Copyright (C) 1991, 1992 Linus Torvalds
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
*/
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/ptrace.h>
#include <linux/kexec.h>
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
#include <asm/stacktrace.h>
#include "dumpstack.h"
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
unsigned *usedp, char **idp)
{
static char ids[][8] = {
[DEBUG_STACK - 1] = "#DB",
[NMI_STACK - 1] = "NMI",
[DOUBLEFAULT_STACK - 1] = "#DF",
[STACKFAULT_STACK - 1] = "#SS",
[MCE_STACK - 1] = "#MC",
#if DEBUG_STKSZ > EXCEPTION_STKSZ
[N_EXCEPTION_STACKS ...
N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
#endif
};
unsigned k;
/*
* Iterate over all exception stacks, and figure out whether
* 'stack' is in one of them:
*/
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
unsigned long end = per_cpu(orig_ist, cpu).ist[k];
/*
* Is 'stack' above this exception frame's end?
* If yes then skip to the next frame.
*/
if (stack >= end)
continue;
/*
* Is 'stack' above this exception frame's start address?
* If yes then we found the right frame.
*/
if (stack >= end - EXCEPTION_STKSZ) {
/*
* Make sure we only iterate through an exception
* stack once. If it comes up for the second time
* then there's something wrong going on - just
* break out and return NULL:
*/
if (*usedp & (1U << k))
break;
*usedp |= 1U << k;
*idp = ids[k];
return (unsigned long *)end;
}
/*
* If this is a debug stack, and if it has a larger size than
* the usual exception stacks, then 'stack' might still
* be within the lower portion of the debug stack:
*/
#if DEBUG_STKSZ > EXCEPTION_STKSZ
if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
unsigned j = N_EXCEPTION_STACKS - 1;
/*
* Black magic. A large debug stack is composed of
* multiple exception stack entries, which we
* iterate through now. Dont look:
*/
do {
++j;
end -= EXCEPTION_STKSZ;
ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
} while (stack < end - EXCEPTION_STKSZ);
if (*usedp & (1U << j))
break;
*usedp |= 1U << j;
*idp = ids[j];
return (unsigned long *)end;
}
#endif
}
return NULL;
}
/*
* x86-64 can have up to three kernel stacks:
* process stack
* interrupt stack
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
unsigned used = 0;
struct thread_info *tinfo;
int graph = 0;
if (!task)
task = current;
if (!stack) {
unsigned long dummy;
stack = &dummy;
if (task && task != current)
stack = (unsigned long *)task->thread.sp;
}
#ifdef CONFIG_FRAME_POINTER
if (!bp) {
if (task == current) {
/* Grab bp right from our regs */
get_bp(bp);
} else {
/* bp is the last reg pushed by switch_to */
bp = *(unsigned long *) task->thread.sp;
}
}
#endif
/*
* Print function call entries in all stacks, starting at the
* current stack address. If the stacks consist of nested
* exceptions
*/
tinfo = task_thread_info(task);
for (;;) {
char *id;
unsigned long *estack_end;
estack_end = in_exception_stack(cpu, (unsigned long)stack,
&used, &id);
if (estack_end) {
if (ops->stack(data, id) < 0)
break;
bp = print_context_stack(tinfo, stack, bp, ops,
data, estack_end, &graph);
ops->stack(data, "<EOE>");
/*
* We link to the next stack via the
* second-to-last pointer (index -2 to end) in the
* exception stack:
*/
stack = (unsigned long *) estack_end[-2];
continue;
}
if (irqstack_end) {
unsigned long *irqstack;
irqstack = irqstack_end -
(IRQSTACKSIZE - 64) / sizeof(*irqstack);
if (stack >= irqstack && stack < irqstack_end) {
if (ops->stack(data, "IRQ") < 0)
break;
bp = print_context_stack(tinfo, stack, bp,
ops, data, irqstack_end, &graph);
/*
* We link to the next stack (which would be
* the process stack normally) the last
* pointer (index -1 to end) in the IRQ stack:
*/
stack = (unsigned long *) (irqstack_end[-1]);
irqstack_end = NULL;
ops->stack(data, "EOI");
continue;
}
}
break;
}
/*
* This handles the process stack:
*/
bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph);
put_cpu();
}
EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long *sp, unsigned long bp, char *log_lvl)
{
unsigned long *stack;
int i;
const int cpu = smp_processor_id();
unsigned long *irqstack_end =
(unsigned long *) (cpu_pda(cpu)->irqstackptr);
unsigned long *irqstack =
(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
/*
* debugging aid: "show_stack(NULL, NULL);" prints the
* back trace for this cpu.
*/
if (sp == NULL) {
if (task)
sp = (unsigned long *)task->thread.sp;
else
sp = (unsigned long *)&sp;
}
stack = sp;
for (i = 0; i < kstack_depth_to_print; i++) {
if (stack >= irqstack && stack <= irqstack_end) {
if (stack == irqstack_end) {
stack = (unsigned long *) (irqstack_end[-1]);
printk(" <EOI> ");
}
} else {
if (((long) stack & (THREAD_SIZE-1)) == 0)
break;
}
if (i && ((i % STACKSLOTS_PER_LINE) == 0))
printk("\n%s", log_lvl);
printk(" %016lx", *stack++);
touch_nmi_watchdog();
}
printk("\n");
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
}
void show_registers(struct pt_regs *regs)
{
int i;
unsigned long sp;
const int cpu = smp_processor_id();
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
sp = regs->sp;
printk("CPU %d ", cpu);
__show_regs(regs, 1);
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
cur->comm, cur->pid, task_thread_info(cur), cur);
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
*/
if (!user_mode(regs)) {
unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes;
unsigned char c;
u8 *ip;
printk(KERN_EMERG "Stack:\n");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
regs->bp, KERN_EMERG);
printk(KERN_EMERG "Code: ");
ip = (u8 *)regs->ip - code_prologue;
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
/* try starting at IP */
ip = (u8 *)regs->ip;
code_len = code_len - code_prologue + 1;
}
for (i = 0; i < code_len; i++, ip++) {
if (ip < (u8 *)PAGE_OFFSET ||
probe_kernel_address(ip, c)) {
printk(" Bad RIP value.");
break;
}
if (ip == (u8 *)regs->ip)
printk("<%02x> ", c);
else
printk("%02x ", c);
}
}
printk("\n");
}
int is_valid_bugaddr(unsigned long ip)
{
unsigned short ud2;
if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
return 0;
return ud2 == 0x0b0f;
}