Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Ingo Molnar:
"The biggest change in this cycle was an enhancement by Yazen Ghannam
to reduce the number of MCE error injection related IPIs.

The rest are smaller fixes"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mce: Fix mce_rdmsrl() warning message
x86/RAS/AMD: Reduce the number of IPIs when prepping error injection
x86/mce/AMD: Increase size of the bank_map type
x86/mce: Do not use bank 1 for APEI generated error logs

+31 -33
+1 -1
arch/x86/kernel/cpu/mcheck/mce-apei.c
··· 46 46 return; 47 47 48 48 mce_setup(&m); 49 - m.bank = 1; 49 + m.bank = -1; 50 50 /* Fake a memory read error with unknown channel */ 51 51 m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; 52 52
+1 -1
arch/x86/kernel/cpu/mcheck/mce.c
··· 425 425 } 426 426 427 427 if (rdmsrl_safe(msr, &v)) { 428 - WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr); 428 + WARN_ONCE(1, "mce: Unable to read MSR 0x%x!\n", msr); 429 429 /* 430 430 * Return zero in case the access faulted. This should 431 431 * not happen normally but can happen if the CPU does
+1 -1
arch/x86/kernel/cpu/mcheck/mce_amd.c
··· 93 93 EXPORT_SYMBOL_GPL(amd_df_mcablock_names); 94 94 95 95 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); 96 - static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ 96 + static DEFINE_PER_CPU(unsigned int, bank_map); /* see which banks are on */ 97 97 98 98 static void amd_threshold_interrupt(void); 99 99 static void amd_deferred_error_interrupt(void);
+28 -30
arch/x86/ras/mce_amd_inj.c
··· 241 241 __func__, PCI_FUNC(F3->devfn), NBCFG); 242 242 } 243 243 244 + static void prepare_msrs(void *info) 245 + { 246 + struct mce i_mce = *(struct mce *)info; 247 + u8 b = i_mce.bank; 248 + 249 + wrmsrl(MSR_IA32_MCG_STATUS, i_mce.mcgstatus); 250 + 251 + if (boot_cpu_has(X86_FEATURE_SMCA)) { 252 + if (i_mce.inject_flags == DFR_INT_INJ) { 253 + wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), i_mce.status); 254 + wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), i_mce.addr); 255 + } else { 256 + wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), i_mce.status); 257 + wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), i_mce.addr); 258 + } 259 + 260 + wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), i_mce.misc); 261 + } else { 262 + wrmsrl(MSR_IA32_MCx_STATUS(b), i_mce.status); 263 + wrmsrl(MSR_IA32_MCx_ADDR(b), i_mce.addr); 264 + wrmsrl(MSR_IA32_MCx_MISC(b), i_mce.misc); 265 + } 266 + 267 + } 268 + 244 269 static void do_inject(void) 245 270 { 246 271 u64 mcg_status = 0; ··· 312 287 313 288 toggle_hw_mce_inject(cpu, true); 314 289 315 - wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, 316 - (u32)mcg_status, (u32)(mcg_status >> 32)); 317 - 318 - if (boot_cpu_has(X86_FEATURE_SMCA)) { 319 - if (inj_type == DFR_INT_INJ) { 320 - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DESTAT(b), 321 - (u32)i_mce.status, (u32)(i_mce.status >> 32)); 322 - 323 - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_DEADDR(b), 324 - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); 325 - } else { 326 - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_STATUS(b), 327 - (u32)i_mce.status, (u32)(i_mce.status >> 32)); 328 - 329 - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_ADDR(b), 330 - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); 331 - } 332 - 333 - wrmsr_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(b), 334 - (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); 335 - } else { 336 - wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), 337 - (u32)i_mce.status, (u32)(i_mce.status >> 32)); 338 - 339 - wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), 340 - (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); 341 - 342 - wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), 343 - (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); 344 - } 290 + i_mce.mcgstatus = mcg_status; 291 + i_mce.inject_flags = inj_type; 292 + smp_call_function_single(cpu, prepare_msrs, &i_mce, 0); 345 293 346 294 toggle_hw_mce_inject(cpu, false); 347 295