Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc: Remove broken and complicated kdump system reset code

We have a lot of complicated logic that handles possible recursion between
kdump and a system reset exception. We can solve this in a much simpler
way using the same setjmp/longjmp tricks xmon does.

As a first step, this patch removes the old system reset code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by

Anton Blanchard and committed by
Benjamin Herrenschmidt
9b00ac06 58154c8c

+25 -101
-6
arch/powerpc/include/asm/kexec.h
··· 73 73 master to copy new code to 0 */ 74 74 extern int crashing_cpu; 75 75 extern void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)); 76 - extern cpumask_t cpus_in_sr; 77 - static inline int kexec_sr_activated(int cpu) 78 - { 79 - return cpumask_test_cpu(cpu, &cpus_in_sr); 80 - } 81 76 82 77 struct kimage; 83 78 struct pt_regs; ··· 89 94 extern void machine_kexec_mask_interrupts(void); 90 95 91 96 #else /* !CONFIG_KEXEC */ 92 - static inline int kexec_sr_activated(int cpu) { return 0; } 93 97 static inline void crash_kexec_secondary(struct pt_regs *regs) { } 94 98 95 99 static inline int overlaps_crashkernel(unsigned long start, unsigned long size)
+12 -75
arch/powerpc/kernel/crash.c
··· 47 47 /* This keeps a track of which one is crashing cpu. */ 48 48 int crashing_cpu = -1; 49 49 static cpumask_t cpus_in_crash = CPU_MASK_NONE; 50 - cpumask_t cpus_in_sr = CPU_MASK_NONE; 51 50 52 51 #define CRASH_HANDLER_MAX 3 53 52 /* NULL terminated list of shutdown handles */ ··· 54 55 static DEFINE_SPINLOCK(crash_handlers_lock); 55 56 56 57 #ifdef CONFIG_SMP 57 - static atomic_t enter_on_soft_reset = ATOMIC_INIT(0); 58 58 59 59 void crash_ipi_callback(struct pt_regs *regs) 60 60 { ··· 68 70 cpumask_set_cpu(cpu, &cpus_in_crash); 69 71 70 72 /* 71 - * Entered via soft-reset - could be the kdump 72 - * process is invoked using soft-reset or user activated 73 - * it if some CPU did not respond to an IPI. 74 - * For soft-reset, the secondary CPU can enter this func 75 - * twice. 1 - using IPI, and 2. soft-reset. 76 - * Tell the kexec CPU that entered via soft-reset and ready 77 - * to go down. 78 - */ 79 - if (cpumask_test_cpu(cpu, &cpus_in_sr)) { 80 - cpumask_clear_cpu(cpu, &cpus_in_sr); 81 - atomic_inc(&enter_on_soft_reset); 82 - } 83 - 84 - /* 85 73 * Starting the kdump boot. 86 74 * This barrier is needed to make sure that all CPUs are stopped. 87 - * If not, soft-reset will be invoked to bring other CPUs. 88 75 */ 89 76 while (!cpumask_test_cpu(crashing_cpu, &cpus_in_crash)) 90 77 cpu_relax(); ··· 86 103 /* NOTREACHED */ 87 104 } 88 105 89 - /* 90 - * Wait until all CPUs are entered via soft-reset. 91 - */ 92 - static void crash_soft_reset_check(int cpu) 93 - { 94 - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 95 - 96 - cpumask_clear_cpu(cpu, &cpus_in_sr); 97 - while (atomic_read(&enter_on_soft_reset) != ncpus) 98 - cpu_relax(); 99 - } 100 - 101 - 102 106 static void crash_kexec_prepare_cpus(int cpu) 103 107 { 104 108 unsigned int msecs; 105 109 106 110 unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ 111 + 112 + printk(KERN_EMERG "Sending IPI to other CPUs\n"); 107 113 108 114 crash_send_ipi(crash_ipi_callback); 109 115 smp_wmb(); ··· 103 131 * respond. 104 132 * Delay of at least 10 seconds. 105 133 */ 106 - printk(KERN_EMERG "Sending IPI to other cpus...\n"); 107 134 msecs = 10000; 108 135 while ((cpumask_weight(&cpus_in_crash) < ncpus) && (--msecs > 0)) { 109 136 cpu_relax(); ··· 111 140 112 141 /* Would it be better to replace the trap vector here? */ 113 142 114 - /* 115 - * FIXME: In case if we do not get all CPUs, one possibility: ask the 116 - * user to do soft reset such that we get all. 117 - * Soft-reset will be used until better mechanism is implemented. 118 - */ 119 143 if (cpumask_weight(&cpus_in_crash) < ncpus) { 120 - printk(KERN_EMERG "done waiting: %d cpu(s) not responding\n", 144 + printk(KERN_EMERG "ERROR: %d CPU(s) not responding\n", 121 145 ncpus - cpumask_weight(&cpus_in_crash)); 122 - printk(KERN_EMERG "Activate soft-reset to stop other cpu(s)\n"); 123 - cpumask_clear(&cpus_in_sr); 124 - atomic_set(&enter_on_soft_reset, 0); 125 - while (cpumask_weight(&cpus_in_crash) < ncpus) 126 - cpu_relax(); 127 146 } 128 - /* 129 - * Make sure all CPUs are entered via soft-reset if the kdump is 130 - * invoked using soft-reset. 131 - */ 132 - if (cpumask_test_cpu(cpu, &cpus_in_sr)) 133 - crash_soft_reset_check(cpu); 134 - /* Leave the IPI callback set */ 147 + 148 + printk(KERN_EMERG "IPI complete\n"); 135 149 } 136 150 137 151 /* 138 - * This function will be called by secondary cpus or by kexec cpu 139 - * if soft-reset is activated to stop some CPUs. 152 + * This function will be called by secondary cpus. 140 153 */ 141 154 void crash_kexec_secondary(struct pt_regs *regs) 142 155 { 143 - int cpu = smp_processor_id(); 144 156 unsigned long flags; 145 - int msecs = 5; 157 + int msecs = 500; 146 158 147 159 local_irq_save(flags); 148 - /* Wait 5ms if the kexec CPU is not entered yet. */ 160 + 161 + /* Wait 500ms for the primary crash CPU to signal its progress */ 149 162 while (crashing_cpu < 0) { 150 163 if (--msecs < 0) { 151 - /* 152 - * Either kdump image is not loaded or 153 - * kdump process is not started - Probably xmon 154 - * exited using 'x'(exit and recover) or 155 - * kexec_should_crash() failed for all running tasks. 156 - */ 157 - cpumask_clear_cpu(cpu, &cpus_in_sr); 164 + /* No response, kdump image may not have been loaded */ 158 165 local_irq_restore(flags); 159 166 return; 160 167 } 168 + 161 169 mdelay(1); 162 170 cpu_relax(); 163 171 } 164 - if (cpu == crashing_cpu) { 165 - /* 166 - * Panic CPU will enter this func only via soft-reset. 167 - * Wait until all secondary CPUs entered and 168 - * then start kexec boot. 169 - */ 170 - crash_soft_reset_check(cpu); 171 - cpumask_set_cpu(crashing_cpu, &cpus_in_crash); 172 - if (ppc_md.kexec_cpu_down) 173 - ppc_md.kexec_cpu_down(1, 0); 174 - machine_kexec(kexec_crash_image); 175 - /* NOTREACHED */ 176 - } 172 + 177 173 crash_ipi_callback(regs); 178 174 } 179 175 ··· 163 225 164 226 void crash_kexec_secondary(struct pt_regs *regs) 165 227 { 166 - cpumask_clear(&cpus_in_sr); 167 228 } 168 229 #endif /* CONFIG_SMP */ 169 230
+13 -20
arch/powerpc/kernel/traps.c
··· 162 162 printk("\n"); 163 163 raw_spin_unlock_irqrestore(&die.lock, flags); 164 164 165 - if (kexec_should_crash(current) || 166 - kexec_sr_activated(smp_processor_id())) 165 + /* 166 + * A system reset (0x100) is a request to dump, so we always send 167 + * it through the crashdump code. 168 + */ 169 + if (kexec_should_crash(current) || (TRAP(regs) == 0x100)) { 167 170 crash_kexec(regs); 168 - crash_kexec_secondary(regs); 171 + 172 + /* 173 + * We aren't the primary crash CPU. We need to send it 174 + * to a holding pattern to avoid it ending up in the panic 175 + * code. 176 + */ 177 + crash_kexec_secondary(regs); 178 + } 169 179 170 180 /* 171 181 * While our oops output is serialised by a spinlock, output ··· 242 232 return; 243 233 } 244 234 245 - #ifdef CONFIG_KEXEC 246 - cpumask_set_cpu(smp_processor_id(), &cpus_in_sr); 247 - #endif 248 - 249 235 die("System Reset", regs, SIGABRT); 250 - 251 - /* 252 - * Some CPUs when released from the debugger will execute this path. 253 - * These CPUs entered the debugger via a soft-reset. If the CPU was 254 - * hung before entering the debugger it will return to the hung 255 - * state when exiting this function. This causes a problem in 256 - * kdump since the hung CPU(s) will not respond to the IPI sent 257 - * from kdump. To prevent the problem we call crash_kexec_secondary() 258 - * here. If a kdump had not been initiated or we exit the debugger 259 - * with the "exit and recover" command (x) crash_kexec_secondary() 260 - * will return after 5ms and the CPU returns to its previous state. 261 - */ 262 - crash_kexec_secondary(regs); 263 236 264 237 /* Must die if the interrupt is not recoverable */ 265 238 if (!(regs->msr & MSR_RI))