Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/xics: Harden xics hypervisor backend

During kdump stress testing I sometimes see the kdump kernel panic
with:

Interrupt 0x306 (real) is invalid, disabling it.
Kernel panic - not syncing: bad return code EOI - rc = -4, value=ff000306

Instead of panicing print the error message, dump the stack the first
time it happens and continue on. Add some more information to the
debug messages as well.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Anton Blanchard and committed by
Benjamin Herrenschmidt
3ce21cdf 3bfd0c9c

+27 -11
+27 -11
arch/powerpc/sysdev/xics/icp-hv.c
··· 27 27 { 28 28 unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; 29 29 long rc; 30 + unsigned int ret = XICS_IRQ_SPURIOUS; 30 31 31 32 rc = plpar_hcall(H_XIRR, retbuf, cppr); 32 - if (rc != H_SUCCESS) 33 - panic(" bad return code xirr - rc = %lx\n", rc); 34 - return (unsigned int)retbuf[0]; 33 + if (rc == H_SUCCESS) { 34 + ret = (unsigned int)retbuf[0]; 35 + } else { 36 + pr_err("%s: bad return code xirr cppr=0x%x returned %ld\n", 37 + __func__, cppr, rc); 38 + WARN_ON_ONCE(1); 39 + } 40 + 41 + return ret; 35 42 } 36 43 37 44 static inline void icp_hv_set_xirr(unsigned int value) 38 45 { 39 46 long rc = plpar_hcall_norets(H_EOI, value); 40 - if (rc != H_SUCCESS) 41 - panic("bad return code EOI - rc = %ld, value=%x\n", rc, value); 47 + if (rc != H_SUCCESS) { 48 + pr_err("%s: bad return code eoi xirr=0x%x returned %ld\n", 49 + __func__, value, rc); 50 + WARN_ON_ONCE(1); 51 + } 42 52 } 43 53 44 54 static inline void icp_hv_set_cppr(u8 value) 45 55 { 46 56 long rc = plpar_hcall_norets(H_CPPR, value); 47 - if (rc != H_SUCCESS) 48 - panic("bad return code cppr - rc = %lx\n", rc); 57 + if (rc != H_SUCCESS) { 58 + pr_err("%s: bad return code cppr cppr=0x%x returned %ld\n", 59 + __func__, value, rc); 60 + WARN_ON_ONCE(1); 61 + } 49 62 } 50 63 51 64 static inline void icp_hv_set_qirr(int n_cpu , u8 value) 52 65 { 53 - long rc = plpar_hcall_norets(H_IPI, get_hard_smp_processor_id(n_cpu), 54 - value); 55 - if (rc != H_SUCCESS) 56 - panic("bad return code qirr - rc = %lx\n", rc); 66 + int hw_cpu = get_hard_smp_processor_id(n_cpu); 67 + long rc = plpar_hcall_norets(H_IPI, hw_cpu, value); 68 + if (rc != H_SUCCESS) { 69 + pr_err("%s: bad return code qirr cpu=%d hw_cpu=%d mfrr=0x%x " 70 + "returned %ld\n", __func__, n_cpu, hw_cpu, value, rc); 71 + WARN_ON_ONCE(1); 72 + } 57 73 } 58 74 59 75 static void icp_hv_eoi(struct irq_data *d)