linux/kernel/trace/trace_event_perf.c

164 lines
3.3 KiB
C
Raw Normal View History

/*
* trace event based perf event profiling/tracing
*
* Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
perf: Take a hot regs snapshot for trace events We are taking a wrong regs snapshot when a trace event triggers. Either we use get_irq_regs(), which gives us the interrupted registers if we are in an interrupt, or we use task_pt_regs() which gives us the state before we entered the kernel, assuming we are lucky enough to be no kernel thread, in which case task_pt_regs() returns the initial set of regs when the kernel thread was started. What we want is different. We need a hot snapshot of the regs, so that we can get the instruction pointer to record in the sample, the frame pointer for the callchain, and some other things. Let's use the new perf_fetch_caller_regs() for that. Comparison with perf record -e lock: -R -a -f -g Before: perf [kernel] [k] __do_softirq | --- __do_softirq | |--55.16%-- __open | --44.84%-- __write_nocancel After: perf [kernel] [k] perf_tp_event | --- perf_tp_event | |--41.07%-- lock_acquire | | | |--39.36%-- _raw_spin_lock | | | | | |--7.81%-- hrtimer_interrupt | | | smp_apic_timer_interrupt | | | apic_timer_interrupt The old case was producing unreliable callchains. Now having right frame and instruction pointers, we have the trace we want. Also syscalls and kprobe events already have the right regs, let's use them instead of wasting a retrieval. v2: Follow the rename perf_save_regs() -> perf_fetch_caller_regs() Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Masami Hiramatsu <mhiramat@redhat.com> Cc: Jason Baron <jbaron@redhat.com> Cc: Archs <linux-arch@vger.kernel.org>
2010-03-03 06:16:16 +00:00
* Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
*/
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
static char *perf_trace_buf[4];
/*
* Force it to be aligned to unsigned long to avoid misaligned accesses
* suprises
*/
typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
perf_trace_t;
/* Count the events in use (per event id, not per instance) */
static int total_ref_count;
static int perf_trace_event_enable(struct ftrace_event_call *event, void *data)
{
int ret = -ENOMEM;
if (event->perf_refcount++ > 0) {
event->perf_data = NULL;
return 0;
}
if (!total_ref_count) {
char *buf;
int i;
for (i = 0; i < 4; i++) {
buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail_buf;
rcu_assign_pointer(perf_trace_buf[i], buf);
}
}
ret = event->perf_event_enable(event);
if (!ret) {
event->perf_data = data;
total_ref_count++;
return 0;
}
fail_buf:
if (!total_ref_count) {
int i;
for (i = 0; i < 4; i++) {
free_percpu(perf_trace_buf[i]);
perf_trace_buf[i] = NULL;
}
}
event->perf_refcount--;
return ret;
}
int perf_trace_enable(int event_id, void *data)
{
struct ftrace_event_call *event;
int ret = -EINVAL;
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id && event->perf_event_enable &&
try_module_get(event->mod)) {
ret = perf_trace_event_enable(event, data);
break;
}
}
mutex_unlock(&event_mutex);
return ret;
}
static void perf_trace_event_disable(struct ftrace_event_call *event)
{
if (--event->perf_refcount > 0)
return;
event->perf_event_disable(event);
if (!--total_ref_count) {
char *buf[4];
int i;
for (i = 0; i < 4; i++) {
buf[i] = perf_trace_buf[i];
rcu_assign_pointer(perf_trace_buf[i], NULL);
}
/*
* Ensure every events in profiling have finished before
* releasing the buffers
*/
synchronize_sched();
for (i = 0; i < 4; i++)
free_percpu(buf[i]);
}
}
void perf_trace_disable(int event_id)
{
struct ftrace_event_call *event;
mutex_lock(&event_mutex);
list_for_each_entry(event, &ftrace_events, list) {
if (event->id == event_id) {
perf_trace_event_disable(event);
module_put(event->mod);
break;
}
}
mutex_unlock(&event_mutex);
}
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
struct pt_regs *regs, int *rctxp)
{
struct trace_entry *entry;
char *trace_buf, *raw_data;
int pc;
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
pc = preempt_count();
*rctxp = perf_swevent_get_recursion_context();
if (*rctxp < 0)
goto err_recursion;
trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]);
if (!trace_buf)
goto err;
raw_data = per_cpu_ptr(trace_buf, smp_processor_id());
/* zero the dead bytes from align to not leak stack to user */
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
entry = (struct trace_entry *)raw_data;
tracing_generic_entry_update(entry, regs->flags, pc);
entry->type = type;
return raw_data;
err:
perf_swevent_put_recursion_context(*rctxp);
err_recursion:
return NULL;
}
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);