Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/ras/mce_amd_inj: Inject bank 4 errors on the NBC

Bank 4 MCEs are logged and reported only on the node base core
(NBC) in a socket. Refer to the D18F3x44[NbMcaToMstCpuEn] field
in Fam10h and later BKDGs. The node base core (NBC) is the
lowest numbered core in the node.

This patch ensures that we inject the error on the NBC for bank
4 errors. Otherwise, triggering #MC or APIC interrupts on a core
which is not the NBC would not have any effect on the system,
i.e. we would not see any relevant output on kernel logs for the
error we just injected.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
[ Cleanup comments. ]
[ Add a missing dependency on AMD_NB caught by Randy Dunlap. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/1443190851-2172-4-git-send-email-Aravind.Gopalakrishnan@amd.com
Link: http://lkml.kernel.org/r/1444641762-9437-5-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Aravind Gopalakrishnan and committed by
Ingo Molnar
fa20a2ed a1300e50

+53 -3
+1 -3
arch/x86/ras/Kconfig
··· 1 1 config AMD_MCE_INJ 2 2 tristate "Simple MCE injection interface for AMD processors" 3 - depends on RAS && EDAC_DECODE_MCE && DEBUG_FS 3 + depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB 4 4 default n 5 5 help 6 6 This is a simple debugfs interface to inject MCEs and test different 7 7 aspects of the MCE handling code. 8 8 9 9 WARNING: Do not even assume this interface is staying stable! 10 - 11 -
+52
arch/x86/ras/mce_amd_inj.c
··· 17 17 #include <linux/cpu.h> 18 18 #include <linux/string.h> 19 19 #include <linux/uaccess.h> 20 + #include <linux/pci.h> 20 21 21 22 #include <asm/mce.h> 23 + #include <asm/amd_nb.h> 22 24 #include <asm/irq_vectors.h> 23 25 24 26 #include "../kernel/cpu/mcheck/mce-internal.h" ··· 34 32 static u8 n_banks; 35 33 36 34 #define MAX_FLAG_OPT_SIZE 3 35 + #define NBCFG 0x44 37 36 38 37 enum injection_type { 39 38 SW_INJ = 0, /* SW injection, simply decode the error */ ··· 201 198 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR)); 202 199 } 203 200 201 + static u32 get_nbc_for_node(int node_id) 202 + { 203 + struct cpuinfo_x86 *c = &boot_cpu_data; 204 + u32 cores_per_node; 205 + 206 + cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket(); 207 + 208 + return cores_per_node * node_id; 209 + } 210 + 211 + static void toggle_nb_mca_mst_cpu(u16 nid) 212 + { 213 + struct pci_dev *F3 = node_to_amd_nb(nid)->misc; 214 + u32 val; 215 + int err; 216 + 217 + if (!F3) 218 + return; 219 + 220 + err = pci_read_config_dword(F3, NBCFG, &val); 221 + if (err) { 222 + pr_err("%s: Error reading F%dx%03x.\n", 223 + __func__, PCI_FUNC(F3->devfn), NBCFG); 224 + return; 225 + } 226 + 227 + if (val & BIT(27)) 228 + return; 229 + 230 + pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n", 231 + __func__); 232 + 233 + val |= BIT(27); 234 + err = pci_write_config_dword(F3, NBCFG, val); 235 + if (err) 236 + pr_err("%s: Error writing F%dx%03x.\n", 237 + __func__, PCI_FUNC(F3->devfn), NBCFG); 238 + } 239 + 204 240 static void do_inject(void) 205 241 { 206 242 u64 mcg_status = 0; ··· 268 226 if (inj_type == DFR_INT_INJ) { 269 227 i_mce.status |= MCI_STATUS_DEFERRED; 270 228 i_mce.status |= (i_mce.status & ~MCI_STATUS_UC); 229 + } 230 + 231 + /* 232 + * For multi node CPUs, logging and reporting of bank 4 errors happens 233 + * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for 234 + * Fam10h and later BKDGs. 235 + */ 236 + if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) { 237 + toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu)); 238 + cpu = get_nbc_for_node(amd_get_nb_id(cpu)); 271 239 } 272 240 273 241 get_online_cpus();