linux/arch/arm/kernel/perf_event_xscale.c
Peter Zijlstra 89d6c0b5bd perf, arch: Add generic NODE cache events
Add a NODE level to the generic cache events which is used to measure
local vs remote memory accesses. Like all other cache events, an
ACCESS is HIT+MISS, if there is no way to distinguish between reads
and writes do reads only etc..

The below needs filling out for !x86 (which I filled out with
unsupported events).

I'm fairly sure ARM can leave it like that since it doesn't strike me as
an architecture that even has NUMA support. SH might have something since
it does appear to have some NUMA bits.

Sparc64, PowerPC and MIPS certainly want a good look there since they
clearly are NUMA capable.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: David Miller <davem@davemloft.net>
Cc: Anton Blanchard <anton@samba.org>
Cc: David Daney <ddaney@caviumnetworks.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1303508226.4865.8.camel@laptop
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2011-07-01 11:06:38 +02:00

821 lines
20 KiB
C

/*
* ARMv5 [xscale] Performance counter handling code.
*
* Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
*
* Based on the previous xscale OProfile code.
*
* There are two variants of the xscale PMU that we support:
* - xscale1pmu: 2 event counters and a cycle counter
* - xscale2pmu: 4 event counters and a cycle counter
* The two variants share event definitions, but have different
* PMU structures.
*/
#ifdef CONFIG_CPU_XSCALE
enum xscale_perf_types {
XSCALE_PERFCTR_ICACHE_MISS = 0x00,
XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
XSCALE_PERFCTR_DATA_STALL = 0x02,
XSCALE_PERFCTR_ITLB_MISS = 0x03,
XSCALE_PERFCTR_DTLB_MISS = 0x04,
XSCALE_PERFCTR_BRANCH = 0x05,
XSCALE_PERFCTR_BRANCH_MISS = 0x06,
XSCALE_PERFCTR_INSTRUCTION = 0x07,
XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
XSCALE_PERFCTR_PC_CHANGED = 0x0D,
XSCALE_PERFCTR_BCU_REQUEST = 0x10,
XSCALE_PERFCTR_BCU_FULL = 0x11,
XSCALE_PERFCTR_BCU_DRAIN = 0x12,
XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
XSCALE_PERFCTR_RMW = 0x16,
/* XSCALE_PERFCTR_CCNT is not hardware defined */
XSCALE_PERFCTR_CCNT = 0xFE,
XSCALE_PERFCTR_UNUSED = 0xFF,
};
enum xscale_counters {
XSCALE_CYCLE_COUNTER = 1,
XSCALE_COUNTER0,
XSCALE_COUNTER1,
XSCALE_COUNTER2,
XSCALE_COUNTER3,
};
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
};
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
};
#define XSCALE_PMU_ENABLE 0x001
#define XSCALE_PMN_RESET 0x002
#define XSCALE_CCNT_RESET 0x004
#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
#define XSCALE_PMU_CNT64 0x008
#define XSCALE1_OVERFLOWED_MASK 0x700
#define XSCALE1_CCOUNT_OVERFLOW 0x400
#define XSCALE1_COUNT0_OVERFLOW 0x100
#define XSCALE1_COUNT1_OVERFLOW 0x200
#define XSCALE1_CCOUNT_INT_EN 0x040
#define XSCALE1_COUNT0_INT_EN 0x010
#define XSCALE1_COUNT1_INT_EN 0x020
#define XSCALE1_COUNT0_EVT_SHFT 12
#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
#define XSCALE1_COUNT1_EVT_SHFT 20
#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
static inline u32
xscale1pmu_read_pmnc(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
return val;
}
static inline void
xscale1pmu_write_pmnc(u32 val)
{
/* upper 4bits and 7, 11 are write-as-0 */
val &= 0xffff77f;
asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
}
static inline int
xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
enum xscale_counters counter)
{
int ret = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
break;
case XSCALE_COUNTER0:
ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
break;
case XSCALE_COUNTER1:
ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
return ret;
}
static irqreturn_t
xscale1pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/*
* NOTE: there's an A stepping erratum that states if an overflow
* bit already exists and another occurs, the previous
* Overflow bit gets cleared. There's no workaround.
* Fixed in B stepping or later.
*/
pmnc = xscale1pmu_read_pmnc();
/*
* Write the value back to clear the overflow flags. Overflow
* flags remain in pmnc for use below. We also disable the PMU
* while we process the interrupt.
*/
xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
return IRQ_NONE;
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx, 1);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, &data, regs))
armpmu->disable(hwc, idx);
}
irq_work_run();
/*
* Re-enable the PMU.
*/
pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(pmnc);
return IRQ_HANDLED;
}
static void
xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long val, mask, evt, flags;
switch (idx) {
case XSCALE_CYCLE_COUNTER:
mask = 0;
evt = XSCALE1_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
mask = XSCALE1_COUNT0_EVT_MASK;
evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
XSCALE1_COUNT0_INT_EN;
break;
case XSCALE_COUNTER1:
mask = XSCALE1_COUNT1_EVT_MASK;
evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
XSCALE1_COUNT1_INT_EN;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
raw_spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long val, mask, evt, flags;
switch (idx) {
case XSCALE_CYCLE_COUNTER:
mask = XSCALE1_CCOUNT_INT_EN;
evt = 0;
break;
case XSCALE_COUNTER0:
mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
raw_spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~mask;
val |= evt;
xscale1pmu_write_pmnc(val);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static int
xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
if (XSCALE_PERFCTR_CCNT == event->config_base) {
if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
return -EAGAIN;
return XSCALE_CYCLE_COUNTER;
} else {
if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
return XSCALE_COUNTER1;
if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
return XSCALE_COUNTER0;
return -EAGAIN;
}
}
static void
xscale1pmu_start(void)
{
unsigned long flags, val;
raw_spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val |= XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale1pmu_stop(void)
{
unsigned long flags, val;
raw_spin_lock_irqsave(&pmu_lock, flags);
val = xscale1pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale1pmu_write_pmnc(val);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline u32
xscale1pmu_read_counter(int counter)
{
u32 val = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
break;
}
return val;
}
static inline void
xscale1pmu_write_counter(int counter, u32 val)
{
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
break;
}
}
static const struct arm_pmu xscale1pmu = {
.id = ARM_PERF_PMU_ID_XSCALE1,
.name = "xscale1",
.handle_irq = xscale1pmu_handle_irq,
.enable = xscale1pmu_enable_event,
.disable = xscale1pmu_disable_event,
.read_counter = xscale1pmu_read_counter,
.write_counter = xscale1pmu_write_counter,
.get_event_idx = xscale1pmu_get_event_idx,
.start = xscale1pmu_start,
.stop = xscale1pmu_stop,
.cache_map = &xscale_perf_cache_map,
.event_map = &xscale_perf_map,
.raw_event_mask = 0xFF,
.num_events = 3,
.max_period = (1LLU << 32) - 1,
};
static const struct arm_pmu *__init xscale1pmu_init(void)
{
return &xscale1pmu;
}
#define XSCALE2_OVERFLOWED_MASK 0x01f
#define XSCALE2_CCOUNT_OVERFLOW 0x001
#define XSCALE2_COUNT0_OVERFLOW 0x002
#define XSCALE2_COUNT1_OVERFLOW 0x004
#define XSCALE2_COUNT2_OVERFLOW 0x008
#define XSCALE2_COUNT3_OVERFLOW 0x010
#define XSCALE2_CCOUNT_INT_EN 0x001
#define XSCALE2_COUNT0_INT_EN 0x002
#define XSCALE2_COUNT1_INT_EN 0x004
#define XSCALE2_COUNT2_INT_EN 0x008
#define XSCALE2_COUNT3_INT_EN 0x010
#define XSCALE2_COUNT0_EVT_SHFT 0
#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
#define XSCALE2_COUNT1_EVT_SHFT 8
#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
#define XSCALE2_COUNT2_EVT_SHFT 16
#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
#define XSCALE2_COUNT3_EVT_SHFT 24
#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
static inline u32
xscale2pmu_read_pmnc(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
/* bits 1-2 and 4-23 are read-unpredictable */
return val & 0xff000009;
}
static inline void
xscale2pmu_write_pmnc(u32 val)
{
/* bits 4-23 are write-as-0, 24-31 are write ignored */
val &= 0xf;
asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
}
static inline u32
xscale2pmu_read_overflow_flags(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
return val;
}
static inline void
xscale2pmu_write_overflow_flags(u32 val)
{
asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
}
static inline u32
xscale2pmu_read_event_select(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
return val;
}
static inline void
xscale2pmu_write_event_select(u32 val)
{
asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
}
static inline u32
xscale2pmu_read_int_enable(void)
{
u32 val;
asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
return val;
}
static void
xscale2pmu_write_int_enable(u32 val)
{
asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
}
static inline int
xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
enum xscale_counters counter)
{
int ret = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
break;
case XSCALE_COUNTER0:
ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
break;
case XSCALE_COUNTER1:
ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
break;
case XSCALE_COUNTER2:
ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
break;
case XSCALE_COUNTER3:
ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
}
return ret;
}
static irqreturn_t
xscale2pmu_handle_irq(int irq_num, void *dev)
{
unsigned long pmnc, of_flags;
struct perf_sample_data data;
struct cpu_hw_events *cpuc;
struct pt_regs *regs;
int idx;
/* Disable the PMU. */
pmnc = xscale2pmu_read_pmnc();
xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
/* Check the overflow flag register. */
of_flags = xscale2pmu_read_overflow_flags();
if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
return IRQ_NONE;
/* Clear the overflow bits. */
xscale2pmu_write_overflow_flags(of_flags);
regs = get_irq_regs();
perf_sample_data_init(&data, 0);
cpuc = &__get_cpu_var(cpu_hw_events);
for (idx = 0; idx <= armpmu->num_events; ++idx) {
struct perf_event *event = cpuc->events[idx];
struct hw_perf_event *hwc;
if (!test_bit(idx, cpuc->active_mask))
continue;
if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
continue;
hwc = &event->hw;
armpmu_event_update(event, hwc, idx, 1);
data.period = event->hw.last_period;
if (!armpmu_event_set_period(event, hwc, idx))
continue;
if (perf_event_overflow(event, &data, regs))
armpmu->disable(hwc, idx);
}
irq_work_run();
/*
* Re-enable the PMU.
*/
pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(pmnc);
return IRQ_HANDLED;
}
static void
xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags, ien, evtsel;
ien = xscale2pmu_read_int_enable();
evtsel = xscale2pmu_read_event_select();
switch (idx) {
case XSCALE_CYCLE_COUNTER:
ien |= XSCALE2_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
ien |= XSCALE2_COUNT0_INT_EN;
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
ien |= XSCALE2_COUNT1_INT_EN;
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
break;
case XSCALE_COUNTER2:
ien |= XSCALE2_COUNT2_INT_EN;
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
break;
case XSCALE_COUNTER3:
ien |= XSCALE2_COUNT3_INT_EN;
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
raw_spin_lock_irqsave(&pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
unsigned long flags, ien, evtsel;
ien = xscale2pmu_read_int_enable();
evtsel = xscale2pmu_read_event_select();
switch (idx) {
case XSCALE_CYCLE_COUNTER:
ien &= ~XSCALE2_CCOUNT_INT_EN;
break;
case XSCALE_COUNTER0:
ien &= ~XSCALE2_COUNT0_INT_EN;
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
break;
case XSCALE_COUNTER1:
ien &= ~XSCALE2_COUNT1_INT_EN;
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
break;
case XSCALE_COUNTER2:
ien &= ~XSCALE2_COUNT2_INT_EN;
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
break;
case XSCALE_COUNTER3:
ien &= ~XSCALE2_COUNT3_INT_EN;
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
break;
default:
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
return;
}
raw_spin_lock_irqsave(&pmu_lock, flags);
xscale2pmu_write_event_select(evtsel);
xscale2pmu_write_int_enable(ien);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static int
xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
struct hw_perf_event *event)
{
int idx = xscale1pmu_get_event_idx(cpuc, event);
if (idx >= 0)
goto out;
if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
idx = XSCALE_COUNTER3;
else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
idx = XSCALE_COUNTER2;
out:
return idx;
}
static void
xscale2pmu_start(void)
{
unsigned long flags, val;
raw_spin_lock_irqsave(&pmu_lock, flags);
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
val |= XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static void
xscale2pmu_stop(void)
{
unsigned long flags, val;
raw_spin_lock_irqsave(&pmu_lock, flags);
val = xscale2pmu_read_pmnc();
val &= ~XSCALE_PMU_ENABLE;
xscale2pmu_write_pmnc(val);
raw_spin_unlock_irqrestore(&pmu_lock, flags);
}
static inline u32
xscale2pmu_read_counter(int counter)
{
u32 val = 0;
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER2:
asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
break;
case XSCALE_COUNTER3:
asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
break;
}
return val;
}
static inline void
xscale2pmu_write_counter(int counter, u32 val)
{
switch (counter) {
case XSCALE_CYCLE_COUNTER:
asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
break;
case XSCALE_COUNTER0:
asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER1:
asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER2:
asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
break;
case XSCALE_COUNTER3:
asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
break;
}
}
static const struct arm_pmu xscale2pmu = {
.id = ARM_PERF_PMU_ID_XSCALE2,
.name = "xscale2",
.handle_irq = xscale2pmu_handle_irq,
.enable = xscale2pmu_enable_event,
.disable = xscale2pmu_disable_event,
.read_counter = xscale2pmu_read_counter,
.write_counter = xscale2pmu_write_counter,
.get_event_idx = xscale2pmu_get_event_idx,
.start = xscale2pmu_start,
.stop = xscale2pmu_stop,
.cache_map = &xscale_perf_cache_map,
.event_map = &xscale_perf_map,
.raw_event_mask = 0xFF,
.num_events = 5,
.max_period = (1LLU << 32) - 1,
};
static const struct arm_pmu *__init xscale2pmu_init(void)
{
return &xscale2pmu;
}
#else
static const struct arm_pmu *__init xscale1pmu_init(void)
{
return NULL;
}
static const struct arm_pmu *__init xscale2pmu_init(void)
{
return NULL;
}
#endif /* CONFIG_CPU_XSCALE */