[PATCH] x86_64: ignore machine checks from boot time

Don't log machine check events left over from boot. Too many BIOSes leave
bogus events in there.

This unfortunately also makes it impossible to log events that caused a
reboot. For people with non broken BIOS there is mce=bootlog

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Andi Kleen and committed by Linus Torvalds d5172f26 cf7bee5a

+17 -4
+5
Documentation/x86_64/boot-options.txt
··· 6 Machine check 7 8 mce=off disable machine check 9 10 nomce (for compatibility with i386): same as mce=off 11
··· 6 Machine check 7 8 mce=off disable machine check 9 + mce=bootlog Enable logging of machine checks left over from booting. 10 + Disabled by default because some BIOS leave bogus ones. 11 + If your BIOS doesn't do that it's a good idea to enable though 12 + to make sure you log even machine check events that result 13 + in a reboot. 14 15 nomce (for compatibility with i386): same as mce=off 16
+12 -4
arch/x86_64/kernel/mce.c
··· 36 static unsigned long console_logged; 37 static int notify_user; 38 static int rip_msr; 39 40 /* 41 * Lockless MCE logging infrastructure. ··· 198 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 199 200 mce_get_rip(&m, regs); 201 - if (error_code != -1) 202 rdtscll(m.tsc); 203 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0); 204 - mce_log(&m); 205 206 /* Did this bank cause the exception? */ 207 /* Assume that the bank with uncorrectable errors did it, ··· 317 318 /* Log the machine checks left over from the previous reset. 319 This also clears all registers */ 320 - do_machine_check(NULL, -1); 321 322 set_in_cr4(X86_CR4_MCE); 323 ··· 478 } 479 480 /* mce=off disables machine check. Note you can reenable it later 481 - using sysfs */ 482 static int __init mcheck_enable(char *str) 483 { 484 if (!strcmp(str, "off")) 485 mce_dont_init = 1; 486 else 487 printk("mce= argument %s ignored. Please use /sys", str); 488 return 0;
··· 36 static unsigned long console_logged; 37 static int notify_user; 38 static int rip_msr; 39 + static int mce_bootlog; 40 41 /* 42 * Lockless MCE logging infrastructure. ··· 197 rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); 198 199 mce_get_rip(&m, regs); 200 + if (error_code >= 0) 201 rdtscll(m.tsc); 202 wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0); 203 + if (error_code != -2) 204 + mce_log(&m); 205 206 /* Did this bank cause the exception? */ 207 /* Assume that the bank with uncorrectable errors did it, ··· 315 316 /* Log the machine checks left over from the previous reset. 317 This also clears all registers */ 318 + do_machine_check(NULL, mce_bootlog ? -1 : -2); 319 320 set_in_cr4(X86_CR4_MCE); 321 ··· 476 } 477 478 /* mce=off disables machine check. Note you can reenable it later 479 + using sysfs. 480 + mce=bootlog Log MCEs from before booting. Disabled by default to work 481 + around buggy BIOS that leave bogus MCEs. */ 482 static int __init mcheck_enable(char *str) 483 { 484 + if (*str == '=') 485 + str++; 486 if (!strcmp(str, "off")) 487 mce_dont_init = 1; 488 + else if (!strcmp(str, "bootlog")) 489 + mce_bootlog = 1; 490 else 491 printk("mce= argument %s ignored. Please use /sys", str); 492 return 0;