Merge branch 'ras-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull ras fixes from Thomas Gleixner:
"A set of fixes for RAS/MCE:

- Improve the error message when the kernel cannot recover from a MCE
so the maximum amount of information gets provided.

- Individually check MCE recovery features on SkyLake CPUs instead of
assuming none when the CAPID0 register does not advertise the
general ability for recovery.

- Prevent MCE to output inconsistent messages which first show an
error location and then claim that the source is unknown.

- Prevent overwriting MCi_STATUS in the attempt to gather more
information when a fatal MCE has alreay been detected. This leads
to empty status values in the printout and failing to react
promptly on the fatal event"

* 'ras-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce: Fix incorrect "Machine check from unknown source" message
x86/mce: Do not overwrite MCi_STATUS in mce_no_way_out()
x86/mce: Check for alternate indication of machine check recovery on Skylake
x86/mce: Improve error message when kernel cannot recover

+42 -18
+5
arch/x86/kernel/cpu/mcheck/mce-severity.c
··· 160 160 SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR), 161 161 USER 162 162 ), 163 + MCESEV( 164 + PANIC, "Data load in unrecoverable area of kernel", 165 + SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), 166 + KERNEL 167 + ), 163 168 #endif 164 169 MCESEV( 165 170 PANIC, "Action required: unknown MCACOD",
+28 -16
arch/x86/kernel/cpu/mcheck/mce.c
··· 772 772 static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, 773 773 struct pt_regs *regs) 774 774 { 775 - int i, ret = 0; 776 775 char *tmp; 776 + int i; 777 777 778 778 for (i = 0; i < mca_cfg.banks; i++) { 779 779 m->status = mce_rdmsrl(msr_ops.status(i)); 780 - if (m->status & MCI_STATUS_VAL) { 781 - __set_bit(i, validp); 782 - if (quirk_no_way_out) 783 - quirk_no_way_out(i, m, regs); 784 - } 780 + if (!(m->status & MCI_STATUS_VAL)) 781 + continue; 782 + 783 + __set_bit(i, validp); 784 + if (quirk_no_way_out) 785 + quirk_no_way_out(i, m, regs); 785 786 786 787 if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { 788 + mce_read_aux(m, i); 787 789 *msg = tmp; 788 - ret = 1; 790 + return 1; 789 791 } 790 792 } 791 - return ret; 793 + return 0; 792 794 } 793 795 794 796 /* ··· 1207 1205 lmce = m.mcgstatus & MCG_STATUS_LMCES; 1208 1206 1209 1207 /* 1208 + * Local machine check may already know that we have to panic. 1209 + * Broadcast machine check begins rendezvous in mce_start() 1210 1210 * Go through all banks in exclusion of the other CPUs. This way we 1211 1211 * don't report duplicated events on shared banks because the first one 1212 - * to see it will clear it. If this is a Local MCE, then no need to 1213 - * perform rendezvous. 1212 + * to see it will clear it. 1214 1213 */ 1215 - if (!lmce) 1214 + if (lmce) { 1215 + if (no_way_out) 1216 + mce_panic("Fatal local machine check", &m, msg); 1217 + } else { 1216 1218 order = mce_start(&no_way_out); 1219 + } 1217 1220 1218 1221 for (i = 0; i < cfg->banks; i++) { 1219 1222 __clear_bit(i, toclear); ··· 1294 1287 no_way_out = worst >= MCE_PANIC_SEVERITY; 1295 1288 } else { 1296 1289 /* 1297 - * Local MCE skipped calling mce_reign() 1298 - * If we found a fatal error, we need to panic here. 1290 + * If there was a fatal machine check we should have 1291 + * already called mce_panic earlier in this function. 1292 + * Since we re-read the banks, we might have found 1293 + * something new. Check again to see if we found a 1294 + * fatal error. We call "mce_severity()" again to 1295 + * make sure we have the right "msg". 1299 1296 */ 1300 - if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) 1301 - mce_panic("Machine check from unknown source", 1302 - NULL, NULL); 1297 + if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) { 1298 + mce_severity(&m, cfg->tolerant, &msg, true); 1299 + mce_panic("Local fatal machine check!", &m, msg); 1300 + } 1303 1301 } 1304 1302 1305 1303 /*
+9 -2
arch/x86/kernel/quirks.c
··· 645 645 /* Skylake */ 646 646 static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev) 647 647 { 648 - u32 capid0; 648 + u32 capid0, capid5; 649 649 650 650 pci_read_config_dword(pdev, 0x84, &capid0); 651 + pci_read_config_dword(pdev, 0x98, &capid5); 651 652 652 - if ((capid0 & 0xc0) == 0xc0) 653 + /* 654 + * CAPID0{7:6} indicate whether this is an advanced RAS SKU 655 + * CAPID5{8:5} indicate that various NVDIMM usage modes are 656 + * enabled, so memory machine check recovery is also enabled. 657 + */ 658 + if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0)) 653 659 static_branch_inc(&mcsafe_key); 660 + 654 661 } 655 662 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap); 656 663 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);