Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ras_for_4.2' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras into x86/ras

Pull RAS updates from Borislav Petkov:

- RAS: Add support for deferred errors on AMD (Aravind Gopalakrishnan)

This is an important RAS feature which adds hardware support for
poisoned data. That means roughly that the hardware marks data which it
has detected as corrupted but wasn't able to correct, as poisoned data
and raises an APIC interrupt to signal that in the form of a deferred
error. It is the OS's responsibility then to take proper recovery action
and thus prolonge system lifetime as far as possible.

- Misc cleanups ontop. (Borislav Petkov)"

Signed-off-by: Ingo Molnar <mingo@kernel.org>

+182 -42
+3
arch/x86/include/asm/entry_arch.h
··· 50 50 BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) 51 51 #endif 52 52 53 + #ifdef CONFIG_X86_MCE_AMD 54 + BUILD_INTERRUPT(deferred_error_interrupt, DEFERRED_ERROR_VECTOR) 55 + #endif 53 56 #endif
+3
arch/x86/include/asm/hardirq.h
··· 33 33 #ifdef CONFIG_X86_MCE_THRESHOLD 34 34 unsigned int irq_threshold_count; 35 35 #endif 36 + #ifdef CONFIG_X86_MCE_AMD 37 + unsigned int irq_deferred_error_count; 38 + #endif 36 39 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) 37 40 unsigned int irq_hv_callback_count; 38 41 #endif
+2
arch/x86/include/asm/hw_irq.h
··· 73 73 extern asmlinkage void irq_move_cleanup_interrupt(void); 74 74 extern asmlinkage void reboot_interrupt(void); 75 75 extern asmlinkage void threshold_interrupt(void); 76 + extern asmlinkage void deferred_error_interrupt(void); 76 77 77 78 extern asmlinkage void call_function_interrupt(void); 78 79 extern asmlinkage void call_function_single_interrupt(void); ··· 88 87 extern void trace_thermal_interrupt(void); 89 88 extern void trace_reschedule_interrupt(void); 90 89 extern void trace_threshold_interrupt(void); 90 + extern void trace_deferred_error_interrupt(void); 91 91 extern void trace_call_function_interrupt(void); 92 92 extern void trace_call_function_single_interrupt(void); 93 93 #define trace_irq_move_cleanup_interrupt irq_move_cleanup_interrupt
+6 -5
arch/x86/include/asm/irq_vectors.h
··· 102 102 */ 103 103 #define X86_PLATFORM_IPI_VECTOR 0xf7 104 104 105 - /* Vector for KVM to deliver posted interrupt IPI */ 106 - #ifdef CONFIG_HAVE_KVM 107 - #define POSTED_INTR_VECTOR 0xf2 108 - #endif 109 - 110 105 /* 111 106 * IRQ work vector: 112 107 */ 113 108 #define IRQ_WORK_VECTOR 0xf6 114 109 115 110 #define UV_BAU_MESSAGE 0xf5 111 + #define DEFERRED_ERROR_VECTOR 0xf4 116 112 117 113 /* Vector on which hypervisor callbacks will be delivered */ 118 114 #define HYPERVISOR_CALLBACK_VECTOR 0xf3 115 + 116 + /* Vector for KVM to deliver posted interrupt IPI */ 117 + #ifdef CONFIG_HAVE_KVM 118 + #define POSTED_INTR_VECTOR 0xf2 119 + #endif 119 120 120 121 /* 121 122 * Local APIC timer IRQ vector is on a different priority level,
+16 -2
arch/x86/include/asm/mce.h
··· 117 117 }; 118 118 119 119 struct mce_vendor_flags { 120 - __u64 overflow_recov : 1, /* cpuid_ebx(80000007) */ 121 - __reserved_0 : 63; 120 + /* 121 + * overflow recovery cpuid bit indicates that overflow 122 + * conditions are not fatal 123 + */ 124 + __u64 overflow_recov : 1, 125 + 126 + /* 127 + * SUCCOR stands for S/W UnCorrectable error COntainment 128 + * and Recovery. It indicates support for data poisoning 129 + * in HW and deferred error interrupts. 130 + */ 131 + succor : 1, 132 + __reserved_0 : 62; 122 133 }; 123 134 extern struct mce_vendor_flags mce_flags; 124 135 ··· 233 222 234 223 extern void (*mce_threshold_vector)(void); 235 224 extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 225 + 226 + /* Deferred error interrupt handler */ 227 + extern void (*deferred_error_int_vector)(void); 236 228 237 229 /* 238 230 * Thermal handler
+6
arch/x86/include/asm/trace/irq_vectors.h
··· 101 101 DEFINE_IRQ_VECTOR_EVENT(threshold_apic); 102 102 103 103 /* 104 + * deferred_error_apic - called when entering/exiting a deferred apic interrupt 105 + * vector handler 106 + */ 107 + DEFINE_IRQ_VECTOR_EVENT(deferred_error_apic); 108 + 109 + /* 104 110 * thermal_apic - called when entering/exiting a thermal apic interrupt 105 111 * vector handler 106 112 */
+2 -1
arch/x86/include/asm/traps.h
··· 108 108 void math_emulate(struct math_emu_info *); 109 109 #ifndef CONFIG_X86_32 110 110 asmlinkage void smp_thermal_interrupt(void); 111 - asmlinkage void mce_threshold_interrupt(void); 111 + asmlinkage void smp_threshold_interrupt(void); 112 + asmlinkage void smp_deferred_error_interrupt(void); 112 113 #endif 113 114 114 115 extern enum ctx_state ist_enter(struct pt_regs *regs);
+8 -2
arch/x86/kernel/cpu/mcheck/mce.c
··· 1637 1637 mce_intel_feature_init(c); 1638 1638 mce_adjust_timer = cmci_intel_adjust_timer; 1639 1639 break; 1640 - case X86_VENDOR_AMD: 1640 + 1641 + case X86_VENDOR_AMD: { 1642 + u32 ebx = cpuid_ebx(0x80000007); 1643 + 1641 1644 mce_amd_feature_init(c); 1642 - mce_flags.overflow_recov = cpuid_ebx(0x80000007) & 0x1; 1645 + mce_flags.overflow_recov = !!(ebx & BIT(0)); 1646 + mce_flags.succor = !!(ebx & BIT(1)); 1643 1647 break; 1648 + } 1649 + 1644 1650 default: 1645 1651 break; 1646 1652 }
+121 -20
arch/x86/kernel/cpu/mcheck/mce_amd.c
··· 1 1 /* 2 - * (c) 2005-2012 Advanced Micro Devices, Inc. 2 + * (c) 2005-2015 Advanced Micro Devices, Inc. 3 3 * Your use of this code is subject to the terms and conditions of the 4 4 * GNU general public license version 2. See "COPYING" or 5 5 * http://www.gnu.org/licenses/gpl.html 6 6 * 7 7 * Written by Jacob Shin - AMD, Inc. 8 - * 9 8 * Maintained by: Borislav Petkov <bp@alien8.de> 10 9 * 11 - * April 2006 12 - * - added support for AMD Family 0x10 processors 13 - * May 2012 14 - * - major scrubbing 15 - * 16 - * All MC4_MISCi registers are shared between multi-cores 10 + * All MC4_MISCi registers are shared between cores on a node. 17 11 */ 18 12 #include <linux/interrupt.h> 19 13 #include <linux/notifier.h> ··· 26 32 #include <asm/idle.h> 27 33 #include <asm/mce.h> 28 34 #include <asm/msr.h> 35 + #include <asm/trace/irq_vectors.h> 29 36 30 37 #define NR_BLOCKS 9 31 38 #define THRESHOLD_MAX 0xFFF ··· 42 47 #define MASK_BLKPTR_LO 0xFF000000 43 48 #define MCG_XBLK_ADDR 0xC0000400 44 49 50 + /* Deferred error settings */ 51 + #define MSR_CU_DEF_ERR 0xC0000410 52 + #define MASK_DEF_LVTOFF 0x000000F0 53 + #define MASK_DEF_INT_TYPE 0x00000006 54 + #define DEF_LVT_OFF 0x2 55 + #define DEF_INT_TYPE_APIC 0x2 56 + 45 57 static const char * const th_names[] = { 46 58 "load_store", 47 59 "insn_fetch", ··· 62 60 static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 63 61 64 62 static void amd_threshold_interrupt(void); 63 + static void amd_deferred_error_interrupt(void); 64 + 65 + static void default_deferred_error_interrupt(void) 66 + { 67 + pr_err("Unexpected deferred interrupt at vector %x\n", DEFERRED_ERROR_VECTOR); 68 + } 69 + void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt; 65 70 66 71 /* 67 72 * CPU Initialization ··· 205 196 threshold_restart_bank(&tr); 206 197 }; 207 198 208 - static int setup_APIC_mce(int reserved, int new) 199 + static int setup_APIC_mce_threshold(int reserved, int new) 209 200 { 210 201 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR, 211 202 APIC_EILVT_MSG_FIX, 0)) 212 203 return new; 213 204 214 205 return reserved; 206 + } 207 + 208 + static int setup_APIC_deferred_error(int reserved, int new) 209 + { 210 + if (reserved < 0 && !setup_APIC_eilvt(new, DEFERRED_ERROR_VECTOR, 211 + APIC_EILVT_MSG_FIX, 0)) 212 + return new; 213 + 214 + return reserved; 215 + } 216 + 217 + static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) 218 + { 219 + u32 low = 0, high = 0; 220 + int def_offset = -1, def_new; 221 + 222 + if (rdmsr_safe(MSR_CU_DEF_ERR, &low, &high)) 223 + return; 224 + 225 + def_new = (low & MASK_DEF_LVTOFF) >> 4; 226 + if (!(low & MASK_DEF_LVTOFF)) { 227 + pr_err(FW_BUG "Your BIOS is not setting up LVT offset 0x2 for deferred error IRQs correctly.\n"); 228 + def_new = DEF_LVT_OFF; 229 + low = (low & ~MASK_DEF_LVTOFF) | (DEF_LVT_OFF << 4); 230 + } 231 + 232 + def_offset = setup_APIC_deferred_error(def_offset, def_new); 233 + if ((def_offset == def_new) && 234 + (deferred_error_int_vector != amd_deferred_error_interrupt)) 235 + deferred_error_int_vector = amd_deferred_error_interrupt; 236 + 237 + low = (low & ~MASK_DEF_INT_TYPE) | DEF_INT_TYPE_APIC; 238 + wrmsr(MSR_CU_DEF_ERR, low, high); 215 239 } 216 240 217 241 /* cpu init entry point, called from mce.c with preempt off */ ··· 294 252 295 253 b.interrupt_enable = 1; 296 254 new = (high & MASK_LVTOFF_HI) >> 20; 297 - offset = setup_APIC_mce(offset, new); 255 + offset = setup_APIC_mce_threshold(offset, new); 298 256 299 257 if ((offset == new) && 300 258 (mce_threshold_vector != amd_threshold_interrupt)) ··· 303 261 init: 304 262 mce_threshold_block_init(&b, offset); 305 263 } 264 + } 265 + 266 + if (mce_flags.succor) 267 + deferred_error_interrupt_enable(c); 268 + } 269 + 270 + static void __log_error(unsigned int bank, bool threshold_err, u64 misc) 271 + { 272 + struct mce m; 273 + u64 status; 274 + 275 + rdmsrl(MSR_IA32_MCx_STATUS(bank), status); 276 + if (!(status & MCI_STATUS_VAL)) 277 + return; 278 + 279 + mce_setup(&m); 280 + 281 + m.status = status; 282 + m.bank = bank; 283 + 284 + if (threshold_err) 285 + m.misc = misc; 286 + 287 + if (m.status & MCI_STATUS_ADDRV) 288 + rdmsrl(MSR_IA32_MCx_ADDR(bank), m.addr); 289 + 290 + mce_log(&m); 291 + wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); 292 + } 293 + 294 + static inline void __smp_deferred_error_interrupt(void) 295 + { 296 + inc_irq_stat(irq_deferred_error_count); 297 + deferred_error_int_vector(); 298 + } 299 + 300 + asmlinkage __visible void smp_deferred_error_interrupt(void) 301 + { 302 + entering_irq(); 303 + __smp_deferred_error_interrupt(); 304 + exiting_ack_irq(); 305 + } 306 + 307 + asmlinkage __visible void smp_trace_deferred_error_interrupt(void) 308 + { 309 + entering_irq(); 310 + trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); 311 + __smp_deferred_error_interrupt(); 312 + trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR); 313 + exiting_ack_irq(); 314 + } 315 + 316 + /* APIC interrupt handler for deferred errors */ 317 + static void amd_deferred_error_interrupt(void) 318 + { 319 + u64 status; 320 + unsigned int bank; 321 + 322 + for (bank = 0; bank < mca_cfg.banks; ++bank) { 323 + rdmsrl(MSR_IA32_MCx_STATUS(bank), status); 324 + 325 + if (!(status & MCI_STATUS_VAL) || 326 + !(status & MCI_STATUS_DEFERRED)) 327 + continue; 328 + 329 + __log_error(bank, false, 0); 330 + break; 306 331 } 307 332 } 308 333 ··· 382 273 * the interrupt goes off when error_count reaches threshold_limit. 383 274 * the handler will simply log mcelog w/ software defined bank number. 384 275 */ 276 + 385 277 static void amd_threshold_interrupt(void) 386 278 { 387 279 u32 low = 0, high = 0, address = 0; 388 280 int cpu = smp_processor_id(); 389 281 unsigned int bank, block; 390 - struct mce m; 391 282 392 283 /* assume first bank caused it */ 393 284 for (bank = 0; bank < mca_cfg.banks; ++bank) { ··· 430 321 return; 431 322 432 323 log: 433 - mce_setup(&m); 434 - rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); 435 - if (!(m.status & MCI_STATUS_VAL)) 436 - return; 437 - m.misc = ((u64)high << 32) | low; 438 - m.bank = bank; 439 - mce_log(&m); 440 - 441 - wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); 324 + __log_error(bank, true, ((u64)high << 32) | low); 442 325 } 443 326 444 327 /*
+5
arch/x86/kernel/entry_64.S
··· 935 935 threshold_interrupt smp_threshold_interrupt 936 936 #endif 937 937 938 + #ifdef CONFIG_X86_MCE_AMD 939 + apicinterrupt DEFERRED_ERROR_VECTOR \ 940 + deferred_error_interrupt smp_deferred_error_interrupt 941 + #endif 942 + 938 943 #ifdef CONFIG_X86_THERMAL_VECTOR 939 944 apicinterrupt THERMAL_APIC_VECTOR \ 940 945 thermal_interrupt smp_thermal_interrupt
+6
arch/x86/kernel/irq.c
··· 116 116 seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); 117 117 seq_puts(p, " Threshold APIC interrupts\n"); 118 118 #endif 119 + #ifdef CONFIG_X86_MCE_AMD 120 + seq_printf(p, "%*s: ", prec, "DFR"); 121 + for_each_online_cpu(j) 122 + seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count); 123 + seq_puts(p, " Deferred Error APIC interrupts\n"); 124 + #endif 119 125 #ifdef CONFIG_X86_MCE 120 126 seq_printf(p, "%*s: ", prec, "MCE"); 121 127 for_each_online_cpu(j)
+4
arch/x86/kernel/irqinit.c
··· 135 135 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 136 136 #endif 137 137 138 + #ifdef CONFIG_X86_MCE_AMD 139 + alloc_intr_gate(DEFERRED_ERROR_VECTOR, deferred_error_interrupt); 140 + #endif 141 + 138 142 #ifdef CONFIG_X86_LOCAL_APIC 139 143 /* self generated IPI for local APIC timer */ 140 144 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-12
arch/x86/kernel/traps.c
··· 813 813 do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) 814 814 { 815 815 conditional_sti(regs); 816 - #if 0 817 - /* No need to warn about this any longer. */ 818 - pr_info("Ignoring P6 Local APIC Spurious Interrupt Bug...\n"); 819 - #endif 820 - } 821 - 822 - asmlinkage __visible void __attribute__((weak)) smp_thermal_interrupt(void) 823 - { 824 - } 825 - 826 - asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) 827 - { 828 816 } 829 817 830 818 /*