06d65bda75
It's just wasteful for stacktrace users like perf to walk through every entries on the stack whereas these only accept reliable ones, ie: that the frame pointer validates. Since perf requires pure reliable stacktraces, it needs a stack walker based on frame pointers-only to optimize the stacktrace processing. This might solve some near-lockup scenarios that can be triggered by call-graph tracing timer events. Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Paul Mackerras <paulus@samba.org> LKML-Reference: <1261024834-5336-2-git-send-regression-fweisbec@gmail.com> [ v2: fix for modular builds and small detail tidyup ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
384 lines
8.6 KiB
C
384 lines
8.6 KiB
C
/*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
* Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
|
|
*/
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/kprobes.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/utsname.h>
|
|
#include <linux/hardirq.h>
|
|
#include <linux/kdebug.h>
|
|
#include <linux/module.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/ftrace.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/bug.h>
|
|
#include <linux/nmi.h>
|
|
#include <linux/sysfs.h>
|
|
|
|
#include <asm/stacktrace.h>
|
|
|
|
#include "dumpstack.h"
|
|
|
|
int panic_on_unrecovered_nmi;
|
|
int panic_on_io_nmi;
|
|
unsigned int code_bytes = 64;
|
|
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
|
|
static int die_counter;
|
|
|
|
void printk_address(unsigned long address, int reliable)
|
|
{
|
|
printk(" [<%p>] %s%pS\n", (void *) address,
|
|
reliable ? "" : "? ", (void *) address);
|
|
}
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
static void
|
|
print_ftrace_graph_addr(unsigned long addr, void *data,
|
|
const struct stacktrace_ops *ops,
|
|
struct thread_info *tinfo, int *graph)
|
|
{
|
|
struct task_struct *task = tinfo->task;
|
|
unsigned long ret_addr;
|
|
int index = task->curr_ret_stack;
|
|
|
|
if (addr != (unsigned long)return_to_handler)
|
|
return;
|
|
|
|
if (!task->ret_stack || index < *graph)
|
|
return;
|
|
|
|
index -= *graph;
|
|
ret_addr = task->ret_stack[index].ret;
|
|
|
|
ops->address(data, ret_addr, 1);
|
|
|
|
(*graph)++;
|
|
}
|
|
#else
|
|
static inline void
|
|
print_ftrace_graph_addr(unsigned long addr, void *data,
|
|
const struct stacktrace_ops *ops,
|
|
struct thread_info *tinfo, int *graph)
|
|
{ }
|
|
#endif
|
|
|
|
/*
|
|
* x86-64 can have up to three kernel stacks:
|
|
* process stack
|
|
* interrupt stack
|
|
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
|
|
*/
|
|
|
|
static inline int valid_stack_ptr(struct thread_info *tinfo,
|
|
void *p, unsigned int size, void *end)
|
|
{
|
|
void *t = tinfo;
|
|
if (end) {
|
|
if (p < end && p >= (end-THREAD_SIZE))
|
|
return 1;
|
|
else
|
|
return 0;
|
|
}
|
|
return p > t && p < t + THREAD_SIZE - size;
|
|
}
|
|
|
|
unsigned long
|
|
print_context_stack(struct thread_info *tinfo,
|
|
unsigned long *stack, unsigned long bp,
|
|
const struct stacktrace_ops *ops, void *data,
|
|
unsigned long *end, int *graph)
|
|
{
|
|
struct stack_frame *frame = (struct stack_frame *)bp;
|
|
|
|
while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
|
|
unsigned long addr;
|
|
|
|
addr = *stack;
|
|
if (__kernel_text_address(addr)) {
|
|
if ((unsigned long) stack == bp + sizeof(long)) {
|
|
ops->address(data, addr, 1);
|
|
frame = frame->next_frame;
|
|
bp = (unsigned long) frame;
|
|
} else {
|
|
ops->address(data, addr, 0);
|
|
}
|
|
print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
|
|
}
|
|
stack++;
|
|
}
|
|
return bp;
|
|
}
|
|
EXPORT_SYMBOL_GPL(print_context_stack);
|
|
|
|
unsigned long
|
|
print_context_stack_bp(struct thread_info *tinfo,
|
|
unsigned long *stack, unsigned long bp,
|
|
const struct stacktrace_ops *ops, void *data,
|
|
unsigned long *end, int *graph)
|
|
{
|
|
struct stack_frame *frame = (struct stack_frame *)bp;
|
|
unsigned long *ret_addr = &frame->return_address;
|
|
|
|
while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) {
|
|
unsigned long addr = *ret_addr;
|
|
|
|
if (__kernel_text_address(addr)) {
|
|
ops->address(data, addr, 1);
|
|
frame = frame->next_frame;
|
|
ret_addr = &frame->return_address;
|
|
print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
|
|
}
|
|
}
|
|
return (unsigned long)frame;
|
|
}
|
|
EXPORT_SYMBOL_GPL(print_context_stack_bp);
|
|
|
|
|
|
static void
|
|
print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
|
|
{
|
|
printk(data);
|
|
print_symbol(msg, symbol);
|
|
printk("\n");
|
|
}
|
|
|
|
static void print_trace_warning(void *data, char *msg)
|
|
{
|
|
printk("%s%s\n", (char *)data, msg);
|
|
}
|
|
|
|
static int print_trace_stack(void *data, char *name)
|
|
{
|
|
printk("%s <%s> ", (char *)data, name);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Print one address/symbol entries per line.
|
|
*/
|
|
static void print_trace_address(void *data, unsigned long addr, int reliable)
|
|
{
|
|
touch_nmi_watchdog();
|
|
printk(data);
|
|
printk_address(addr, reliable);
|
|
}
|
|
|
|
static const struct stacktrace_ops print_trace_ops = {
|
|
.warning = print_trace_warning,
|
|
.warning_symbol = print_trace_warning_symbol,
|
|
.stack = print_trace_stack,
|
|
.address = print_trace_address,
|
|
.walk_stack = print_context_stack,
|
|
};
|
|
|
|
void
|
|
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
unsigned long *stack, unsigned long bp, char *log_lvl)
|
|
{
|
|
printk("%sCall Trace:\n", log_lvl);
|
|
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
|
}
|
|
|
|
void show_trace(struct task_struct *task, struct pt_regs *regs,
|
|
unsigned long *stack, unsigned long bp)
|
|
{
|
|
show_trace_log_lvl(task, regs, stack, bp, "");
|
|
}
|
|
|
|
void show_stack(struct task_struct *task, unsigned long *sp)
|
|
{
|
|
show_stack_log_lvl(task, NULL, sp, 0, "");
|
|
}
|
|
|
|
/*
|
|
* The architecture-independent dump_stack generator
|
|
*/
|
|
void dump_stack(void)
|
|
{
|
|
unsigned long bp = 0;
|
|
unsigned long stack;
|
|
|
|
#ifdef CONFIG_FRAME_POINTER
|
|
if (!bp)
|
|
get_bp(bp);
|
|
#endif
|
|
|
|
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
|
|
current->pid, current->comm, print_tainted(),
|
|
init_utsname()->release,
|
|
(int)strcspn(init_utsname()->version, " "),
|
|
init_utsname()->version);
|
|
show_trace(NULL, NULL, &stack, bp);
|
|
}
|
|
EXPORT_SYMBOL(dump_stack);
|
|
|
|
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
|
static int die_owner = -1;
|
|
static unsigned int die_nest_count;
|
|
|
|
unsigned __kprobes long oops_begin(void)
|
|
{
|
|
int cpu;
|
|
unsigned long flags;
|
|
|
|
/* notify the hw-branch tracer so it may disable tracing and
|
|
add the last trace to the trace buffer -
|
|
the earlier this happens, the more useful the trace. */
|
|
trace_hw_branch_oops();
|
|
|
|
oops_enter();
|
|
|
|
/* racy, but better than risking deadlock. */
|
|
raw_local_irq_save(flags);
|
|
cpu = smp_processor_id();
|
|
if (!arch_spin_trylock(&die_lock)) {
|
|
if (cpu == die_owner)
|
|
/* nested oops. should stop eventually */;
|
|
else
|
|
arch_spin_lock(&die_lock);
|
|
}
|
|
die_nest_count++;
|
|
die_owner = cpu;
|
|
console_verbose();
|
|
bust_spinlocks(1);
|
|
return flags;
|
|
}
|
|
|
|
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
|
{
|
|
if (regs && kexec_should_crash(current))
|
|
crash_kexec(regs);
|
|
|
|
bust_spinlocks(0);
|
|
die_owner = -1;
|
|
add_taint(TAINT_DIE);
|
|
die_nest_count--;
|
|
if (!die_nest_count)
|
|
/* Nest count reaches zero, release the lock. */
|
|
arch_spin_unlock(&die_lock);
|
|
raw_local_irq_restore(flags);
|
|
oops_exit();
|
|
|
|
if (!signr)
|
|
return;
|
|
if (in_interrupt())
|
|
panic("Fatal exception in interrupt");
|
|
if (panic_on_oops)
|
|
panic("Fatal exception");
|
|
do_exit(signr);
|
|
}
|
|
|
|
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
|
{
|
|
#ifdef CONFIG_X86_32
|
|
unsigned short ss;
|
|
unsigned long sp;
|
|
#endif
|
|
printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
|
|
#ifdef CONFIG_PREEMPT
|
|
printk("PREEMPT ");
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
printk("SMP ");
|
|
#endif
|
|
#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
printk("DEBUG_PAGEALLOC");
|
|
#endif
|
|
printk("\n");
|
|
sysfs_printk_last_file();
|
|
if (notify_die(DIE_OOPS, str, regs, err,
|
|
current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
|
|
return 1;
|
|
|
|
show_registers(regs);
|
|
#ifdef CONFIG_X86_32
|
|
if (user_mode_vm(regs)) {
|
|
sp = regs->sp;
|
|
ss = regs->ss & 0xffff;
|
|
} else {
|
|
sp = kernel_stack_pointer(regs);
|
|
savesegment(ss, ss);
|
|
}
|
|
printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
|
|
print_symbol("%s", regs->ip);
|
|
printk(" SS:ESP %04x:%08lx\n", ss, sp);
|
|
#else
|
|
/* Executive summary in case the oops scrolled away */
|
|
printk(KERN_ALERT "RIP ");
|
|
printk_address(regs->ip, 1);
|
|
printk(" RSP <%016lx>\n", regs->sp);
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This is gone through when something in the kernel has done something bad
|
|
* and is about to be terminated:
|
|
*/
|
|
void die(const char *str, struct pt_regs *regs, long err)
|
|
{
|
|
unsigned long flags = oops_begin();
|
|
int sig = SIGSEGV;
|
|
|
|
if (!user_mode_vm(regs))
|
|
report_bug(regs->ip, regs);
|
|
|
|
if (__die(str, regs, err))
|
|
sig = 0;
|
|
oops_end(flags, regs, sig);
|
|
}
|
|
|
|
void notrace __kprobes
|
|
die_nmi(char *str, struct pt_regs *regs, int do_panic)
|
|
{
|
|
unsigned long flags;
|
|
|
|
if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
|
|
return;
|
|
|
|
/*
|
|
* We are in trouble anyway, lets at least try
|
|
* to get a message out.
|
|
*/
|
|
flags = oops_begin();
|
|
printk(KERN_EMERG "%s", str);
|
|
printk(" on CPU%d, ip %08lx, registers:\n",
|
|
smp_processor_id(), regs->ip);
|
|
show_registers(regs);
|
|
oops_end(flags, regs, 0);
|
|
if (do_panic || panic_on_oops)
|
|
panic("Non maskable interrupt");
|
|
nmi_exit();
|
|
local_irq_enable();
|
|
do_exit(SIGBUS);
|
|
}
|
|
|
|
static int __init oops_setup(char *s)
|
|
{
|
|
if (!s)
|
|
return -EINVAL;
|
|
if (!strcmp(s, "panic"))
|
|
panic_on_oops = 1;
|
|
return 0;
|
|
}
|
|
early_param("oops", oops_setup);
|
|
|
|
static int __init kstack_setup(char *s)
|
|
{
|
|
if (!s)
|
|
return -EINVAL;
|
|
kstack_depth_to_print = simple_strtoul(s, NULL, 0);
|
|
return 0;
|
|
}
|
|
early_param("kstack", kstack_setup);
|
|
|
|
static int __init code_bytes_setup(char *s)
|
|
{
|
|
code_bytes = simple_strtoul(s, NULL, 0);
|
|
if (code_bytes > 8192)
|
|
code_bytes = 8192;
|
|
|
|
return 1;
|
|
}
|
|
__setup("code_bytes=", code_bytes_setup);
|