[PATCH] kdump: Use real pt_regs from exception

Makes kexec_crashdump() take a pt_regs * as an argument. This allows to
get exact register state at the point of the crash. If we come from direct
panic assertion NULL will be passed and the current registers saved before
crashdump.

This hooks into two places:
die(): check the conditions under which we will panic when calling
do_exit and go there directly with the pt_regs that caused the fatal
fault.

die_nmi(): If we receive an NMI lockup while in the kernel use the
pt_regs and go directly to crash_kexec(). We're probably nested up badly
at this point so this might be the only chance to escape with proper
information.

Signed-off-by: Alexander Nyberg <alexn@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Alexander Nyberg and committed by
Linus Torvalds
6e274d14 86b1ae38

+66 -23
+24 -12
arch/i386/kernel/crash.c
··· 100 100 regs->eip = (unsigned long)current_text_addr(); 101 101 } 102 102 103 - static void crash_save_self(void) 103 + /* CPU does not save ss and esp on stack if execution is already 104 + * running in kernel mode at the time of NMI occurrence. This code 105 + * fixes it. 106 + */ 107 + static void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) 108 + { 109 + memcpy(newregs, oldregs, sizeof(*newregs)); 110 + newregs->esp = (unsigned long)&(oldregs->esp); 111 + __asm__ __volatile__("xorl %eax, %eax;"); 112 + __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(newregs->xss)); 113 + } 114 + 115 + /* We may have saved_regs from where the error came from 116 + * or it is NULL if via a direct panic(). 117 + */ 118 + static void crash_save_self(struct pt_regs *saved_regs) 104 119 { 105 120 struct pt_regs regs; 106 121 int cpu; 107 122 cpu = smp_processor_id(); 108 - crash_get_current_regs(&regs); 123 + 124 + if (saved_regs) 125 + crash_setup_regs(&regs, saved_regs); 126 + else 127 + crash_get_current_regs(&regs); 109 128 crash_save_this_cpu(&regs, cpu); 110 129 } 111 130 ··· 143 124 return 1; 144 125 local_irq_disable(); 145 126 146 - /* CPU does not save ss and esp on stack if execution is already 147 - * running in kernel mode at the time of NMI occurrence. This code 148 - * fixes it. 149 - */ 150 127 if (!user_mode(regs)) { 151 - memcpy(&fixed_regs, regs, sizeof(*regs)); 152 - fixed_regs.esp = (unsigned long)&(regs->esp); 153 - __asm__ __volatile__("xorl %eax, %eax;"); 154 - __asm__ __volatile__ ("movw %%ss, %%ax;" :"=a"(fixed_regs.xss)); 128 + crash_setup_regs(&fixed_regs, regs); 155 129 regs = &fixed_regs; 156 130 } 157 131 crash_save_this_cpu(regs, cpu); ··· 196 184 } 197 185 #endif 198 186 199 - void machine_crash_shutdown(void) 187 + void machine_crash_shutdown(struct pt_regs *regs) 200 188 { 201 189 /* This function is only called after the system 202 190 * has paniced or is otherwise in a critical state. ··· 216 204 #if defined(CONFIG_X86_IO_APIC) 217 205 disable_IO_APIC(); 218 206 #endif 219 - crash_save_self(); 207 + crash_save_self(regs); 220 208 }
+17
arch/i386/kernel/traps.c
··· 27 27 #include <linux/ptrace.h> 28 28 #include <linux/utsname.h> 29 29 #include <linux/kprobes.h> 30 + #include <linux/kexec.h> 30 31 31 32 #ifdef CONFIG_EISA 32 33 #include <linux/ioport.h> ··· 295 294 printk("Kernel BUG\n"); 296 295 } 297 296 297 + /* This is gone through when something in the kernel 298 + * has done something bad and is about to be terminated. 299 + */ 298 300 void die(const char * str, struct pt_regs * regs, long err) 299 301 { 300 302 static struct { ··· 345 341 bust_spinlocks(0); 346 342 die.lock_owner = -1; 347 343 spin_unlock_irq(&die.lock); 344 + 345 + if (kexec_should_crash(current)) 346 + crash_kexec(regs); 347 + 348 348 if (in_interrupt()) 349 349 panic("Fatal exception in interrupt"); 350 350 ··· 578 570 console_silent(); 579 571 spin_unlock(&nmi_print_lock); 580 572 bust_spinlocks(0); 573 + 574 + /* If we are in kernel we are probably nested up pretty bad 575 + * and might aswell get out now while we still can. 576 + */ 577 + if (!user_mode(regs)) { 578 + current->thread.trap_no = 2; 579 + crash_kexec(regs); 580 + } 581 + 581 582 do_exit(SIGSEGV); 582 583 } 583 584
+1 -1
arch/ppc/kernel/machine_kexec.c
··· 34 34 } 35 35 } 36 36 37 - void machine_crash_shutdown(void) 37 + void machine_crash_shutdown(struct pt_regs *regs) 38 38 { 39 39 if (ppc_md.machine_crash_shutdown) { 40 40 ppc_md.machine_crash_shutdown();
+1 -1
arch/ppc64/kernel/machine_kexec.c
··· 34 34 * and if what it will achieve. Letting it be now to compile the code 35 35 * in generic kexec environment 36 36 */ 37 - void machine_crash_shutdown(void) 37 + void machine_crash_shutdown(struct pt_regs *regs) 38 38 { 39 39 /* do nothing right now */ 40 40 /* smp_relase_cpus() if we want smp on panic kernel */
+1 -1
arch/s390/kernel/crash.c
··· 12 12 13 13 note_buf_t crash_notes[NR_CPUS]; 14 14 15 - void machine_crash_shutdown(void) 15 + void machine_crash_shutdown(struct pt_regs *regs) 16 16 { 17 17 }
+1 -1
arch/x86_64/kernel/crash.c
··· 22 22 23 23 note_buf_t crash_notes[NR_CPUS]; 24 24 25 - void machine_crash_shutdown(void) 25 + void machine_crash_shutdown(struct pt_regs *regs) 26 26 { 27 27 /* This function is only called after the system 28 28 * has paniced or is otherwise in a critical state.
+1 -1
drivers/char/sysrq.c
··· 100 100 static void sysrq_handle_crashdump(int key, struct pt_regs *pt_regs, 101 101 struct tty_struct *tty) 102 102 { 103 - crash_kexec(); 103 + crash_kexec(pt_regs); 104 104 } 105 105 static struct sysrq_key_op sysrq_crashdump_op = { 106 106 .handler = sysrq_handle_crashdump,
+6 -2
include/linux/kexec.h
··· 99 99 unsigned long flags); 100 100 #endif 101 101 extern struct page *kimage_alloc_control_pages(struct kimage *image, unsigned int order); 102 - extern void crash_kexec(void); 102 + extern void crash_kexec(struct pt_regs *); 103 + int kexec_should_crash(struct task_struct *); 103 104 extern struct kimage *kexec_image; 104 105 105 106 #define KEXEC_ON_CRASH 0x00000001 ··· 124 123 extern struct resource crashk_res; 125 124 126 125 #else /* !CONFIG_KEXEC */ 127 - static inline void crash_kexec(void) { } 126 + struct pt_regs; 127 + struct task_struct; 128 + static inline void crash_kexec(struct pt_regs *regs) { } 129 + static inline int kexec_should_crash(struct task_struct *p) { return 0; } 128 130 #endif /* CONFIG_KEXEC */ 129 131 #endif /* LINUX_KEXEC_H */
+2 -1
include/linux/reboot.h
··· 52 52 extern void machine_power_off(void); 53 53 54 54 extern void machine_shutdown(void); 55 - extern void machine_crash_shutdown(void); 55 + struct pt_regs; 56 + extern void machine_crash_shutdown(struct pt_regs *); 56 57 57 58 #endif 58 59
+11 -2
kernel/kexec.c
··· 18 18 #include <linux/reboot.h> 19 19 #include <linux/syscalls.h> 20 20 #include <linux/ioport.h> 21 + #include <linux/hardirq.h> 22 + 21 23 #include <asm/page.h> 22 24 #include <asm/uaccess.h> 23 25 #include <asm/io.h> ··· 33 31 .end = 0, 34 32 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 35 33 }; 34 + 35 + int kexec_should_crash(struct task_struct *p) 36 + { 37 + if (in_interrupt() || !p->pid || p->pid == 1 || panic_on_oops) 38 + return 1; 39 + return 0; 40 + } 36 41 37 42 /* 38 43 * When kexec transitions to the new kernel there is a one-to-one ··· 1019 1010 } 1020 1011 #endif 1021 1012 1022 - void crash_kexec(void) 1013 + void crash_kexec(struct pt_regs *regs) 1023 1014 { 1024 1015 struct kimage *image; 1025 1016 int locked; ··· 1037 1028 if (!locked) { 1038 1029 image = xchg(&kexec_crash_image, NULL); 1039 1030 if (image) { 1040 - machine_crash_shutdown(); 1031 + machine_crash_shutdown(regs); 1041 1032 machine_kexec(image); 1042 1033 } 1043 1034 xchg(&kexec_lock, 0);
+1 -1
kernel/panic.c
··· 83 83 * everything else. 84 84 * Do we want to call this before we try to display a message? 85 85 */ 86 - crash_kexec(); 86 + crash_kexec(NULL); 87 87 88 88 #ifdef CONFIG_SMP 89 89 /*