linux/arch/ia64/kernel/crash.c
Hidetoshi Seto 4295ab3488 [IA64] kdump: Mask MCA/INIT on frozen cpus
Summary:

  INIT asserted on kdump kernel invokes INIT handler not only on a
  cpu that running on the kdump kernel, but also BSP of the panicked
  kernel, because the (badly) frozen BSP can be thawed by INIT.

Description:

  The kdump_cpu_freeze() is called on cpus except one that initiates
  panic and/or kdump, to stop/offline the cpu (on ia64, it means we
  pass control of cpus to SAL, or put them in spinloop).  Note that
  CPU0(BSP) always go to spinloop, so if panic was happened on an AP,
  there are at least 2cpus (= the AP and BSP) which not back to SAL.

  On the spinning cpus, interrupts are disabled (rsm psr.i), but INIT
  is still interruptible because psr.mc for mask them is not set unless
  kdump_cpu_freeze() is not called from MCA/INIT context.

  Therefore, assume that a panic was happened on an AP, kdump was
  invoked, new INIT handlers for kdump kernel was registered and then
  an INIT is asserted.  From the viewpoint of SAL, there are 2 online
  cpus, so INIT will be delivered to both of them.  It likely means
  that not only the AP (= a cpu executing kdump) enters INIT handler
  which is newly registered, but also BSP (= another cpu spinning in
  panicked kernel) enters the same INIT handler.  Of course setting of
  registers in BSP are still old (for panicked kernel), so what happen
  with running handler with wrong setting will be extremely unexpected.
  I believe this is not desirable behavior.

How to Reproduce:

  Start kdump on one of APs (e.g. cpu1)
    # taskset 0x2 echo c > /proc/sysrq-trigger
  Then assert INIT after kdump kernel is booted, after new INIT handler
  for kdump kernel is registered.

Expected results:

  An INIT handler is invoked only on the AP.

Actual results:

  An INIT handler is invoked on the AP and BSP.

Sample of results:

  I got following console log by asserting INIT after prompt "root:/>".
  It seems that two monarchs appeared by one INIT, and one panicked at
  last.  And it also seems that the panicked one supposed there were
  4 online cpus and no one did rendezvous:

    :
    [  0 %]dropping to initramfs shell
    exiting this shell will reboot your system
    root:/> Entered OS INIT handler. PSP=fff301a0 cpu=0 monarch=0
    ia64_init_handler: Promoting cpu 0 to monarch.
    Delaying for 5 seconds...
    All OS INIT slaves have reached rendezvous
    Processes interrupted by INIT - 0 (cpu 0 task 0xa000000100af0000)
    :
    <<snip>>
    :
    Entered OS INIT handler. PSP=fff301a0 cpu=0 monarch=1
    Delaying for 5 seconds...
    mlogbuf_finish: printing switched to urgent mode, MCA/INIT might be dodgy or fail.
    OS INIT slave did not rendezvous on cpu 1 2 3
    INIT swapper 0[0]: bugcheck! 0 [1]
    :
    <<snip>>
    :
    Kernel panic - not syncing: Attempted to kill the idle task!

Proposed fix:

  To avoid this problem, this patch inserts ia64_set_psr_mc() to mask
  INIT on cpus going to be frozen.  This masking have no effect if the
  kdump_cpu_freeze() is called from INIT handler when kdump_on_init == 1,
  because psr.mc is already turned on to 1 before entering OS_INIT.
  I confirmed that weird log like above are disappeared after applying
  this patch.

Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Haren Myneni <hbabu@us.ibm.com>
Cc: kexec@lists.infradead.org
Acked-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
2009-09-14 16:17:05 -07:00

262 lines
5.8 KiB
C

/*
* arch/ia64/kernel/crash.c
*
* Architecture specific (ia64) functions for kexec based crash dumps.
*
* Created by: Khalid Aziz <khalid.aziz@hp.com>
* Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
* Copyright (C) 2005 Intel Corp Zou Nan hai <nanhai.zou@intel.com>
*
*/
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/bootmem.h>
#include <linux/kexec.h>
#include <linux/elfcore.h>
#include <linux/sysctl.h>
#include <linux/init.h>
#include <linux/kdebug.h>
#include <asm/mca.h>
int kdump_status[NR_CPUS];
static atomic_t kdump_cpu_frozen;
atomic_t kdump_in_progress;
static int kdump_on_init = 1;
static int kdump_on_fatal_mca = 1;
static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
size_t data_len)
{
struct elf_note *note = (struct elf_note *)buf;
note->n_namesz = strlen(name) + 1;
note->n_descsz = data_len;
note->n_type = type;
buf += (sizeof(*note) + 3)/4;
memcpy(buf, name, note->n_namesz);
buf += (note->n_namesz + 3)/4;
memcpy(buf, data, data_len);
buf += (data_len + 3)/4;
return buf;
}
static void
final_note(void *buf)
{
memset(buf, 0, sizeof(struct elf_note));
}
extern void ia64_dump_cpu_regs(void *);
static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);
void
crash_save_this_cpu(void)
{
void *buf;
unsigned long cfm, sof, sol;
int cpu = smp_processor_id();
struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);
elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
memset(prstatus, 0, sizeof(*prstatus));
prstatus->pr_pid = current->pid;
ia64_dump_cpu_regs(dst);
cfm = dst[43];
sol = (cfm >> 7) & 0x7f;
sof = cfm & 0x7f;
dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
sof - sol);
buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
if (!buf)
return;
buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus,
sizeof(*prstatus));
final_note(buf);
}
#ifdef CONFIG_SMP
static int
kdump_wait_cpu_freeze(void)
{
int cpu_num = num_online_cpus() - 1;
int timeout = 1000;
while(timeout-- > 0) {
if (atomic_read(&kdump_cpu_frozen) == cpu_num)
return 0;
udelay(1000);
}
return 1;
}
#endif
void
machine_crash_shutdown(struct pt_regs *pt)
{
/* This function is only called after the system
* has paniced or is otherwise in a critical state.
* The minimum amount of code to allow a kexec'd kernel
* to run successfully needs to happen here.
*
* In practice this means shooting down the other cpus in
* an SMP system.
*/
kexec_disable_iosapic();
#ifdef CONFIG_SMP
kdump_smp_send_stop();
/* not all cpu response to IPI, send INIT to freeze them */
if (kdump_wait_cpu_freeze() && kdump_on_init) {
kdump_smp_send_init();
}
#endif
}
static void
machine_kdump_on_init(void)
{
crash_save_vmcoreinfo();
local_irq_disable();
kexec_disable_iosapic();
machine_kexec(ia64_kimage);
}
void
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
{
int cpuid;
local_irq_disable();
cpuid = smp_processor_id();
crash_save_this_cpu();
current->thread.ksp = (__u64)info->sw - 16;
ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */
atomic_inc(&kdump_cpu_frozen);
kdump_status[cpuid] = 1;
mb();
#ifdef CONFIG_HOTPLUG_CPU
if (cpuid != 0)
ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
#endif
for (;;)
cpu_relax();
}
static int
kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
{
struct ia64_mca_notify_die *nd;
struct die_args *args = data;
if (!kdump_on_init && !kdump_on_fatal_mca)
return NOTIFY_DONE;
if (!ia64_kimage) {
if (val == DIE_INIT_MONARCH_LEAVE)
ia64_mca_printk(KERN_NOTICE
"%s: kdump not configured\n",
__func__);
return NOTIFY_DONE;
}
if (val != DIE_INIT_MONARCH_LEAVE &&
val != DIE_INIT_SLAVE_LEAVE &&
val != DIE_INIT_MONARCH_PROCESS &&
val != DIE_MCA_RENDZVOUS_LEAVE &&
val != DIE_MCA_MONARCH_LEAVE)
return NOTIFY_DONE;
nd = (struct ia64_mca_notify_die *)args->err;
/* Reason code 1 means machine check rendezvous*/
if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE
|| val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1)
return NOTIFY_DONE;
switch (val) {
case DIE_INIT_MONARCH_PROCESS:
if (kdump_on_init) {
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
}
break;
case DIE_INIT_MONARCH_LEAVE:
if (kdump_on_init)
machine_kdump_on_init();
break;
case DIE_INIT_SLAVE_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_RENDZVOUS_LEAVE:
if (atomic_read(&kdump_in_progress))
unw_init_running(kdump_cpu_freeze, NULL);
break;
case DIE_MCA_MONARCH_LEAVE:
/* *(nd->data) indicate if MCA is recoverable */
if (kdump_on_fatal_mca && !(*(nd->data))) {
atomic_set(&kdump_in_progress, 1);
*(nd->monarch_cpu) = -1;
machine_kdump_on_init();
}
break;
}
return NOTIFY_DONE;
}
#ifdef CONFIG_SYSCTL
static ctl_table kdump_ctl_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_init",
.data = &kdump_on_init,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "kdump_on_fatal_mca",
.data = &kdump_on_fatal_mca,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 }
};
static ctl_table sys_table[] = {
{
.ctl_name = CTL_KERN,
.procname = "kernel",
.mode = 0555,
.child = kdump_ctl_table,
},
{ .ctl_name = 0 }
};
#endif
static int
machine_crash_setup(void)
{
/* be notified before default_monarch_init_process */
static struct notifier_block kdump_init_notifier_nb = {
.notifier_call = kdump_init_notifier,
.priority = 1,
};
int ret;
if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
return ret;
#ifdef CONFIG_SYSCTL
register_sysctl_table(sys_table);
#endif
return 0;
}
__initcall(machine_crash_setup);