linux/arch/powerpc/platforms/pseries/ras.c
Benjamin Herrenschmidt 0ebfff1491 [POWERPC] Add new interrupt mapping core and change platforms to use it
This adds the new irq remapper core and removes the old one.  Because
there are some fundamental conflicts with the old code, like the value
of NO_IRQ which I'm now setting to 0 (as per discussions with Linus),
etc..., this commit also changes the relevant platform and driver code
over to use the new remapper (so as not to cause difficulties later
in bisecting).

This patch removes the old pre-parsing of the open firmware interrupt
tree along with all the bogus assumptions it made to try to renumber
interrupts according to the platform. This is all to be handled by the
new code now.

For the pSeries XICS interrupt controller, a single remapper host is
created for the whole machine regardless of how many interrupt
presentation and source controllers are found, and it's set to match
any device node that isn't a 8259.  That works fine on pSeries and
avoids having to deal with some of the complexities of split source
controllers vs. presentation controllers in the pSeries device trees.

The powerpc i8259 PIC driver now always requests the legacy interrupt
range. It also has the feature of being able to match any device node
(including NULL) if passed no device node as an input. That will help
porting over platforms with broken device-trees like Pegasos who don't
have a proper interrupt tree.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-07-03 21:36:01 +10:00

377 lines
11 KiB
C

/*
* Copyright (C) 2001 Dave Engebretsen IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* Change Activity:
* 2001/09/21 : engebret : Created with minimal EPOW and HW exception support.
* End Change Activity
*/
#include <linux/errno.h>
#include <linux/threads.h>
#include <linux/kernel_stat.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/timex.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/irq.h>
#include <linux/random.h>
#include <linux/sysrq.h>
#include <linux/bitops.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/pgtable.h>
#include <asm/irq.h>
#include <asm/cache.h>
#include <asm/prom.h>
#include <asm/ptrace.h>
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/udbg.h>
#include <asm/firmware.h>
#include "ras.h"
static unsigned char ras_log_buf[RTAS_ERROR_LOG_MAX];
static DEFINE_SPINLOCK(ras_log_buf_lock);
char mce_data_buf[RTAS_ERROR_LOG_MAX];
static int ras_get_sensor_state_token;
static int ras_check_exception_token;
#define EPOW_SENSOR_TOKEN 9
#define EPOW_SENSOR_INDEX 0
#define RAS_VECTOR_OFFSET 0x500
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id,
struct pt_regs * regs);
static irqreturn_t ras_error_interrupt(int irq, void *dev_id,
struct pt_regs * regs);
/* #define DEBUG */
static void request_ras_irqs(struct device_node *np,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
const char *name)
{
int i, index, count = 0;
struct of_irq oirq;
u32 *opicprop;
unsigned int opicplen;
unsigned int virqs[16];
/* Check for obsolete "open-pic-interrupt" property. If present, then
* map those interrupts using the default interrupt host and default
* trigger
*/
opicprop = (u32 *)get_property(np, "open-pic-interrupt", &opicplen);
if (opicprop) {
opicplen /= sizeof(u32);
for (i = 0; i < opicplen; i++) {
if (count > 15)
break;
virqs[count] = irq_create_mapping(NULL, *(opicprop++),
IRQ_TYPE_NONE);
if (virqs[count] == NO_IRQ)
printk(KERN_ERR "Unable to allocate interrupt "
"number for %s\n", np->full_name);
else
count++;
}
}
/* Else use normal interrupt tree parsing */
else {
/* First try to do a proper OF tree parsing */
for (index = 0; of_irq_map_one(np, index, &oirq) == 0;
index++) {
if (count > 15)
break;
virqs[count] = irq_create_of_mapping(oirq.controller,
oirq.specifier,
oirq.size);
if (virqs[count] == NO_IRQ)
printk(KERN_ERR "Unable to allocate interrupt "
"number for %s\n", np->full_name);
else
count++;
}
}
/* Now request them */
for (i = 0; i < count; i++) {
if (request_irq(virqs[i], handler, 0, name, NULL)) {
printk(KERN_ERR "Unable to request interrupt %d for "
"%s\n", virqs[i], np->full_name);
return;
}
}
}
/*
* Initialize handlers for the set of interrupts caused by hardware errors
* and power system events.
*/
static int __init init_ras_IRQ(void)
{
struct device_node *np;
ras_get_sensor_state_token = rtas_token("get-sensor-state");
ras_check_exception_token = rtas_token("check-exception");
/* Internal Errors */
np = of_find_node_by_path("/event-sources/internal-errors");
if (np != NULL) {
request_ras_irqs(np, ras_error_interrupt, "RAS_ERROR");
of_node_put(np);
}
/* EPOW Events */
np = of_find_node_by_path("/event-sources/epow-events");
if (np != NULL) {
request_ras_irqs(np, ras_epow_interrupt, "RAS_EPOW");
of_node_put(np);
}
return 0;
}
__initcall(init_ras_IRQ);
/*
* Handle power subsystem events (EPOW).
*
* Presently we just log the event has occurred. This should be fixed
* to examine the type of power failure and take appropriate action where
* the time horizon permits something useful to be done.
*/
static irqreturn_t
ras_epow_interrupt(int irq, void *dev_id, struct pt_regs * regs)
{
int status = 0xdeadbeef;
int state = 0;
int critical;
status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
if (state > 3)
critical = 1; /* Time Critical */
else
critical = 0;
spin_lock(&ras_log_buf_lock);
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RAS_VECTOR_OFFSET,
irq_map[irq].hwirq,
RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS,
critical, __pa(&ras_log_buf),
rtas_get_error_log_max());
udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
*((unsigned long *)&ras_log_buf), status, state);
printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
*((unsigned long *)&ras_log_buf), status, state);
/* format and print the extended information */
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
spin_unlock(&ras_log_buf_lock);
return IRQ_HANDLED;
}
/*
* Handle hardware error interrupts.
*
* RTAS check-exception is called to collect data on the exception. If
* the error is deemed recoverable, we log a warning and return.
* For nonrecoverable errors, an error is logged and we stop all processing
* as quickly as possible in order to prevent propagation of the failure.
*/
static irqreturn_t
ras_error_interrupt(int irq, void *dev_id, struct pt_regs * regs)
{
struct rtas_error_log *rtas_elog;
int status = 0xdeadbeef;
int fatal;
spin_lock(&ras_log_buf_lock);
status = rtas_call(ras_check_exception_token, 6, 1, NULL,
RAS_VECTOR_OFFSET,
irq_map[irq].hwirq,
RTAS_INTERNAL_ERROR, 1 /*Time Critical */,
__pa(&ras_log_buf),
rtas_get_error_log_max());
rtas_elog = (struct rtas_error_log *)ras_log_buf;
if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC))
fatal = 1;
else
fatal = 0;
/* format and print the extended information */
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
if (fatal) {
udbg_printf("Fatal HW Error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
#ifndef DEBUG
/* Don't actually power off when debugging so we can test
* without actually failing while injecting errors.
* Error data will not be logged to syslog.
*/
ppc_md.power_off();
#endif
} else {
udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
printk(KERN_WARNING
"Warning: Recoverable hardware error <0x%lx 0x%x>\n",
*((unsigned long *)&ras_log_buf), status);
}
spin_unlock(&ras_log_buf_lock);
return IRQ_HANDLED;
}
/* Get the error information for errors coming through the
* FWNMI vectors. The pt_regs' r3 will be updated to reflect
* the actual r3 if possible, and a ptr to the error log entry
* will be returned if found.
*
* The mce_data_buf does not have any locks or protection around it,
* if a second machine check comes in, or a system reset is done
* before we have logged the error, then we will get corruption in the
* error log. This is preferable over holding off on calling
* ibm,nmi-interlock which would result in us checkstopping if a
* second machine check did come in.
*/
static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs)
{
unsigned long errdata = regs->gpr[3];
struct rtas_error_log *errhdr = NULL;
unsigned long *savep;
if ((errdata >= 0x7000 && errdata < 0x7fff0) ||
(errdata >= rtas.base && errdata < rtas.base + rtas.size - 16)) {
savep = __va(errdata);
regs->gpr[3] = savep[0]; /* restore original r3 */
memset(mce_data_buf, 0, RTAS_ERROR_LOG_MAX);
memcpy(mce_data_buf, (char *)(savep + 1), RTAS_ERROR_LOG_MAX);
errhdr = (struct rtas_error_log *)mce_data_buf;
} else {
printk("FWNMI: corrupt r3\n");
}
return errhdr;
}
/* Call this when done with the data returned by FWNMI_get_errinfo.
* It will release the saved data area for other CPUs in the
* partition to receive FWNMI errors.
*/
static void fwnmi_release_errinfo(void)
{
int ret = rtas_call(rtas_token("ibm,nmi-interlock"), 0, 1, NULL);
if (ret != 0)
printk("FWNMI: nmi-interlock failed: %d\n", ret);
}
int pSeries_system_reset_exception(struct pt_regs *regs)
{
if (fwnmi_active) {
struct rtas_error_log *errhdr = fwnmi_get_errinfo(regs);
if (errhdr) {
/* XXX Should look at FWNMI information */
}
fwnmi_release_errinfo();
}
return 0; /* need to perform reset */
}
/*
* See if we can recover from a machine check exception.
* This is only called on power4 (or above) and only via
* the Firmware Non-Maskable Interrupts (fwnmi) handler
* which provides the error analysis for us.
*
* Return 1 if corrected (or delivered a signal).
* Return 0 if there is nothing we can do.
*/
static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err)
{
int nonfatal = 0;
if (err->disposition == RTAS_DISP_FULLY_RECOVERED) {
/* Platform corrected itself */
nonfatal = 1;
} else if ((regs->msr & MSR_RI) &&
user_mode(regs) &&
err->severity == RTAS_SEVERITY_ERROR_SYNC &&
err->disposition == RTAS_DISP_NOT_RECOVERED &&
err->target == RTAS_TARGET_MEMORY &&
err->type == RTAS_TYPE_ECC_UNCORR &&
!(current->pid == 0 || current->pid == 1)) {
/* Kill off a user process with an ECC error */
printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n",
current->pid);
/* XXX something better for ECC error? */
_exception(SIGBUS, regs, BUS_ADRERR, regs->nip);
nonfatal = 1;
}
log_error((char *)err, ERR_TYPE_RTAS_LOG, !nonfatal);
return nonfatal;
}
/*
* Handle a machine check.
*
* Note that on Power 4 and beyond Firmware Non-Maskable Interrupts (fwnmi)
* should be present. If so the handler which called us tells us if the
* error was recovered (never true if RI=0).
*
* On hardware prior to Power 4 these exceptions were asynchronous which
* means we can't tell exactly where it occurred and so we can't recover.
*/
int pSeries_machine_check_exception(struct pt_regs *regs)
{
struct rtas_error_log *errp;
if (fwnmi_active) {
errp = fwnmi_get_errinfo(regs);
fwnmi_release_errinfo();
if (errp && recover_mce(regs, errp))
return 1;
}
return 0;
}