Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ras_updates_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 RAS updates from Borislav Petkov:

- Enable additional logging mode on older Xeons (Tony Luck)

- Pass error records logged by firmware through the MCE decoding chain
to provide human-readable error descriptions instead of raw values
(Smita Koralahalli)

- Some #MC handler fixes (Gabriele Paoloni)

- The usual small fixes and cleanups all over.

* tag 'ras_updates_for_v5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce: Rename kill_it to kill_current_task
x86/mce: Remove redundant call to irq_work_queue()
x86/mce: Panic for LMCE only if mca_cfg.tolerant < 3
x86/mce: Move the mce_panic() call and 'kill_it' assignments to the right places
x86/mce, cper: Pass x86 CPER through the MCA handling chain
x86/mce: Use "safe" MSR functions when enabling additional error logging
x86/mce: Correct the detection of invalid notifier priorities
x86/mce: Assign boolean values to a bool variable
x86/mce: Enable additional error logging on certain Intel CPUs
x86/mce: Remove unneeded break

+131 -31
+11
arch/x86/include/asm/acpi.h
··· 159 159 extern int x86_acpi_numa_init(void); 160 160 #endif /* CONFIG_ACPI_NUMA */ 161 161 162 + struct cper_ia_proc_ctx; 163 + 162 164 #ifdef CONFIG_ACPI_APEI 163 165 static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr) 164 166 { ··· 178 176 * so return PAGE_KERNEL_NOENC until we know differently. 179 177 */ 180 178 return PAGE_KERNEL_NOENC; 179 + } 180 + 181 + int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, 182 + u64 lapic_id); 183 + #else 184 + static inline int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, 185 + u64 lapic_id) 186 + { 187 + return -EINVAL; 181 188 } 182 189 #endif 183 190
+8 -1
arch/x86/include/asm/mce.h
··· 177 177 MCE_PRIO_EXTLOG, 178 178 MCE_PRIO_UC, 179 179 MCE_PRIO_EARLY, 180 - MCE_PRIO_CEC 180 + MCE_PRIO_CEC, 181 + MCE_PRIO_HIGHEST = MCE_PRIO_CEC 181 182 }; 182 183 183 184 struct notifier_block; ··· 199 198 } 200 199 #endif 201 200 201 + struct cper_ia_proc_ctx; 202 + 202 203 #ifdef CONFIG_X86_MCE 203 204 int mcheck_init(void); 204 205 void mcheck_cpu_init(struct cpuinfo_x86 *c); 205 206 void mcheck_cpu_clear(struct cpuinfo_x86 *c); 206 207 void mcheck_vendor_init_severity(void); 208 + int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, 209 + u64 lapic_id); 207 210 #else 208 211 static inline int mcheck_init(void) { return 0; } 209 212 static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} 210 213 static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} 211 214 static inline void mcheck_vendor_init_severity(void) {} 215 + static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, 216 + u64 lapic_id) { return -EINVAL; } 212 217 #endif 213 218 214 219 #ifdef CONFIG_X86_ANCIENT_MCE
+1
arch/x86/include/asm/msr-index.h
··· 139 139 #define MSR_IA32_MCG_CAP 0x00000179 140 140 #define MSR_IA32_MCG_STATUS 0x0000017a 141 141 #define MSR_IA32_MCG_CTL 0x0000017b 142 + #define MSR_ERROR_CONTROL 0x0000017f 142 143 #define MSR_IA32_MCG_EXT_CTL 0x000004d0 143 144 144 145 #define MSR_OFFCORE_RSP_0 0x000001a6
+5
arch/x86/kernel/acpi/apei.c
··· 43 43 apei_mce_report_mem_error(sev, mem_err); 44 44 #endif 45 45 } 46 + 47 + int arch_apei_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) 48 + { 49 + return apei_smca_report_x86_error(ctx_info, lapic_id); 50 + }
+61
arch/x86/kernel/cpu/mce/apei.c
··· 51 51 } 52 52 EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); 53 53 54 + int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info, u64 lapic_id) 55 + { 56 + const u64 *i_mce = ((const u64 *) (ctx_info + 1)); 57 + unsigned int cpu; 58 + struct mce m; 59 + 60 + if (!boot_cpu_has(X86_FEATURE_SMCA)) 61 + return -EINVAL; 62 + 63 + /* 64 + * The starting address of the register array extracted from BERT must 65 + * match with the first expected register in the register layout of 66 + * SMCA address space. This address corresponds to banks's MCA_STATUS 67 + * register. 68 + * 69 + * Match any MCi_STATUS register by turning off bank numbers. 70 + */ 71 + if ((ctx_info->msr_addr & MSR_AMD64_SMCA_MC0_STATUS) != 72 + MSR_AMD64_SMCA_MC0_STATUS) 73 + return -EINVAL; 74 + 75 + /* 76 + * The register array size must be large enough to include all the 77 + * SMCA registers which need to be extracted. 78 + * 79 + * The number of registers in the register array is determined by 80 + * Register Array Size/8 as defined in UEFI spec v2.8, sec N.2.4.2.2. 81 + * The register layout is fixed and currently the raw data in the 82 + * register array includes 6 SMCA registers which the kernel can 83 + * extract. 84 + */ 85 + if (ctx_info->reg_arr_size < 48) 86 + return -EINVAL; 87 + 88 + mce_setup(&m); 89 + 90 + m.extcpu = -1; 91 + m.socketid = -1; 92 + 93 + for_each_possible_cpu(cpu) { 94 + if (cpu_data(cpu).initial_apicid == lapic_id) { 95 + m.extcpu = cpu; 96 + m.socketid = cpu_data(m.extcpu).phys_proc_id; 97 + break; 98 + } 99 + } 100 + 101 + m.apicid = lapic_id; 102 + m.bank = (ctx_info->msr_addr >> 4) & 0xFF; 103 + m.status = *i_mce; 104 + m.addr = *(i_mce + 1); 105 + m.misc = *(i_mce + 2); 106 + /* Skipping MCA_CONFIG */ 107 + m.ipid = *(i_mce + 4); 108 + m.synd = *(i_mce + 5); 109 + 110 + mce_log(&m); 111 + 112 + return 0; 113 + } 114 + 54 115 #define CPER_CREATOR_MCE \ 55 116 GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ 56 117 0x64, 0x90, 0xb8, 0x9d)
+16 -27
arch/x86/kernel/cpu/mce/core.c
··· 162 162 163 163 void mce_register_decode_chain(struct notifier_block *nb) 164 164 { 165 - if (WARN_ON(nb->priority > MCE_PRIO_MCELOG && nb->priority < MCE_PRIO_EDAC)) 165 + if (WARN_ON(nb->priority < MCE_PRIO_LOWEST || 166 + nb->priority > MCE_PRIO_HIGHEST)) 166 167 return; 167 168 168 169 blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); ··· 1266 1265 } 1267 1266 } 1268 1267 1269 - static void queue_task_work(struct mce *m, int kill_it) 1268 + static void queue_task_work(struct mce *m, int kill_current_task) 1270 1269 { 1271 1270 current->mce_addr = m->addr; 1272 1271 current->mce_kflags = m->kflags; 1273 1272 current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV); 1274 1273 current->mce_whole_page = whole_page(m); 1275 1274 1276 - if (kill_it) 1275 + if (kill_current_task) 1277 1276 current->mce_kill_me.func = kill_me_now; 1278 1277 else 1279 1278 current->mce_kill_me.func = kill_me_maybe; ··· 1321 1320 int no_way_out = 0; 1322 1321 1323 1322 /* 1324 - * If kill_it gets set, there might be a way to recover from this 1323 + * If kill_current_task is not set, there might be a way to recover from this 1325 1324 * error. 1326 1325 */ 1327 - int kill_it = 0; 1326 + int kill_current_task = 0; 1328 1327 1329 1328 /* 1330 1329 * MCEs are always local on AMD. Same is determined by MCG_STATUS_LMCES ··· 1351 1350 * severity is MCE_AR_SEVERITY we have other options. 1352 1351 */ 1353 1352 if (!(m.mcgstatus & MCG_STATUS_RIPV)) 1354 - kill_it = 1; 1355 - 1353 + kill_current_task = (cfg->tolerant == 3) ? 0 : 1; 1356 1354 /* 1357 1355 * Check if this MCE is signaled to only this logical processor, 1358 1356 * on Intel, Zhaoxin only. ··· 1368 1368 * to see it will clear it. 1369 1369 */ 1370 1370 if (lmce) { 1371 - if (no_way_out) 1371 + if (no_way_out && cfg->tolerant < 3) 1372 1372 mce_panic("Fatal local machine check", &m, msg); 1373 1373 } else { 1374 1374 order = mce_start(&no_way_out); ··· 1387 1387 if (mce_end(order) < 0) { 1388 1388 if (!no_way_out) 1389 1389 no_way_out = worst >= MCE_PANIC_SEVERITY; 1390 + 1391 + if (no_way_out && cfg->tolerant < 3) 1392 + mce_panic("Fatal machine check on current CPU", &m, msg); 1390 1393 } 1391 1394 } else { 1392 1395 /* ··· 1406 1403 } 1407 1404 } 1408 1405 1409 - /* 1410 - * If tolerant is at an insane level we drop requests to kill 1411 - * processes and continue even when there is no way out. 1412 - */ 1413 - if (cfg->tolerant == 3) 1414 - kill_it = 0; 1415 - else if (no_way_out) 1416 - mce_panic("Fatal machine check on current CPU", &m, msg); 1417 - 1418 - if (worst > 0) 1419 - irq_work_queue(&mce_irq_work); 1420 - 1421 - if (worst != MCE_AR_SEVERITY && !kill_it) 1406 + if (worst != MCE_AR_SEVERITY && !kill_current_task) 1422 1407 goto out; 1423 1408 1424 1409 /* Fault was in user mode and we need to take some action */ ··· 1414 1423 /* If this triggers there is no way to recover. Die hard. */ 1415 1424 BUG_ON(!on_thread_stack() || !user_mode(regs)); 1416 1425 1417 - queue_task_work(&m, kill_it); 1426 + queue_task_work(&m, kill_current_task); 1418 1427 1419 1428 } else { 1420 1429 /* ··· 1432 1441 } 1433 1442 1434 1443 if (m.kflags & MCE_IN_KERNEL_COPYIN) 1435 - queue_task_work(&m, kill_it); 1444 + queue_task_work(&m, kill_current_task); 1436 1445 } 1437 1446 out: 1438 1447 mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); ··· 1574 1583 * __mcheck_cpu_init_clear_banks() does the final bank setup. 1575 1584 */ 1576 1585 b->ctl = -1ULL; 1577 - b->init = 1; 1586 + b->init = true; 1578 1587 } 1579 1588 } 1580 1589 ··· 1755 1764 */ 1756 1765 1757 1766 if (c->x86 == 6 && c->x86_model < 0x1A && this_cpu_read(mce_num_banks) > 0) 1758 - mce_banks[0].init = 0; 1767 + mce_banks[0].init = false; 1759 1768 1760 1769 /* 1761 1770 * All newer Intel systems support MCE broadcasting. Enable ··· 1804 1813 case X86_VENDOR_INTEL: 1805 1814 intel_p5_mcheck_init(c); 1806 1815 return 1; 1807 - break; 1808 1816 case X86_VENDOR_CENTAUR: 1809 1817 winchip_mcheck_init(c); 1810 1818 return 1; 1811 - break; 1812 1819 default: 1813 1820 return 0; 1814 1821 }
+21
arch/x86/kernel/cpu/mce/intel.c
··· 509 509 } 510 510 } 511 511 512 + /* 513 + * Enable additional error logs from the integrated 514 + * memory controller on processors that support this. 515 + */ 516 + static void intel_imc_init(struct cpuinfo_x86 *c) 517 + { 518 + u64 error_control; 519 + 520 + switch (c->x86_model) { 521 + case INTEL_FAM6_SANDYBRIDGE_X: 522 + case INTEL_FAM6_IVYBRIDGE_X: 523 + case INTEL_FAM6_HASWELL_X: 524 + if (rdmsrl_safe(MSR_ERROR_CONTROL, &error_control)) 525 + return; 526 + error_control |= 2; 527 + wrmsrl_safe(MSR_ERROR_CONTROL, error_control); 528 + break; 529 + } 530 + } 531 + 512 532 void mce_intel_feature_init(struct cpuinfo_x86 *c) 513 533 { 514 534 intel_init_thermal(c); 515 535 intel_init_cmci(); 516 536 intel_init_lmce(); 517 537 intel_ppin_init(c); 538 + intel_imc_init(c); 518 539 } 519 540 520 541 void mce_intel_feature_clear(struct cpuinfo_x86 *c)
+8 -3
drivers/firmware/efi/cper-x86.c
··· 2 2 // Copyright (C) 2018, Advanced Micro Devices, Inc. 3 3 4 4 #include <linux/cper.h> 5 + #include <linux/acpi.h> 5 6 6 7 /* 7 8 * We don't need a "CPER_IA" prefix since these are all locally defined. ··· 348 347 ctx_info->mm_reg_addr); 349 348 } 350 349 351 - printk("%sRegister Array:\n", newpfx); 352 - print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, groupsize, 353 - (ctx_info + 1), ctx_info->reg_arr_size, 0); 350 + if (ctx_info->reg_ctx_type != CTX_TYPE_MSR || 351 + arch_apei_report_x86_error(ctx_info, proc->lapic_id)) { 352 + printk("%sRegister Array:\n", newpfx); 353 + print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 354 + groupsize, (ctx_info + 1), 355 + ctx_info->reg_arr_size, 0); 356 + } 354 357 355 358 ctx_info = (struct cper_ia_proc_ctx *)((long)ctx_info + size); 356 359 }