Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp: (21 commits)
EDAC, MCE: Fix shift warning on 32-bit
EDAC, MCE: Add a BIT_64() macro
EDAC, MCE: Enable MCE decoding on F12h
EDAC, MCE: Add F12h NB MCE decoder
EDAC, MCE: Add F12h IC MCE decoder
EDAC, MCE: Add F12h DC MCE decoder
EDAC, MCE: Add support for F11h MCEs
EDAC, MCE: Enable MCE decoding on F14h
EDAC, MCE: Fix FR MCEs decoding
EDAC, MCE: Complete NB MCE decoders
EDAC, MCE: Warn about LS MCEs on F14h
EDAC, MCE: Adjust IC decoders to F14h
EDAC, MCE: Adjust DC decoders to F14h
EDAC, MCE: Rename files
EDAC, MCE: Rework MCE injection
EDAC: Export edac sysfs class to users.
EDAC, MCE: Pass complete MCE info to decoders
EDAC, MCE: Sanitize error codes
EDAC, MCE: Remove unused function parameter
EDAC, MCE: Add HW_ERR prefix
...

+1018 -758
+12 -2
drivers/edac/Kconfig
··· 39 39 there're four debug levels (x=0,1,2,3 from low to high). 40 40 Usually you should select 'N'. 41 41 42 - config EDAC_DECODE_MCE 42 + config EDAC_DECODE_MCE 43 43 tristate "Decode MCEs in human-readable form (only on AMD for now)" 44 44 depends on CPU_SUP_AMD && X86_MCE 45 45 default y ··· 50 50 You should definitely say Y here in case you want to decode MCEs 51 51 which occur really early upon boot, before the module infrastructure 52 52 has been initialized. 53 + 54 + config EDAC_MCE_INJ 55 + tristate "Simple MCE injection interface over /sysfs" 56 + depends on EDAC_DECODE_MCE 57 + default n 58 + help 59 + This is a simple interface to inject MCEs over /sysfs and test 60 + the MCE decoding code in EDAC. 61 + 62 + This is currently AMD-only. 53 63 54 64 config EDAC_MM_EDAC 55 65 tristate "Main Memory EDAC (Error Detection And Correction) reporting" ··· 82 72 Families of Memory Controllers (K8, F10h and F11h) 83 73 84 74 config EDAC_AMD64_ERROR_INJECTION 85 - bool "Sysfs Error Injection facilities" 75 + bool "Sysfs HW Error injection facilities" 86 76 depends on EDAC_AMD64 87 77 help 88 78 Recent Opterons (Family 10h and later) provide for Memory Error
+3
drivers/edac/Makefile
··· 17 17 edac_core-objs += edac_pci.o edac_pci_sysfs.o 18 18 endif 19 19 20 + obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o 21 + 22 + edac_mce_amd-objs := mce_amd.o 20 23 obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o 21 24 22 25 obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
+10 -3
drivers/edac/amd64_edac.c
··· 2073 2073 amd64_handle_ue(mci, info); 2074 2074 } 2075 2075 2076 - void amd64_decode_bus_error(int node_id, struct err_regs *regs) 2076 + void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg) 2077 2077 { 2078 2078 struct mem_ctl_info *mci = mci_lookup[node_id]; 2079 + struct err_regs regs; 2079 2080 2080 - __amd64_decode_bus_error(mci, regs); 2081 + regs.nbsl = (u32) m->status; 2082 + regs.nbsh = (u32)(m->status >> 32); 2083 + regs.nbeal = (u32) m->addr; 2084 + regs.nbeah = (u32)(m->addr >> 32); 2085 + regs.nbcfg = nbcfg; 2086 + 2087 + __amd64_decode_bus_error(mci, &regs); 2081 2088 2082 2089 /* 2083 2090 * Check the UE bit of the NB status high register, if set generate some ··· 2093 2086 * 2094 2087 * FIXME: this should go somewhere else, if at all. 2095 2088 */ 2096 - if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2089 + if (regs.nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2097 2090 edac_mc_handle_ue_no_info(mci, "UE bit is set"); 2098 2091 2099 2092 }
+2 -3
drivers/edac/amd64_edac.h
··· 72 72 #include <linux/edac.h> 73 73 #include <asm/msr.h> 74 74 #include "edac_core.h" 75 - #include "edac_mce_amd.h" 75 + #include "mce_amd.h" 76 76 77 77 #define amd64_printk(level, fmt, arg...) \ 78 78 edac_printk(level, "amd64", fmt, ##arg) ··· 482 482 extern const char *to_msgs[2]; 483 483 extern const char *pp_msgs[4]; 484 484 extern const char *ii_msgs[4]; 485 - extern const char *ext_msgs[32]; 486 485 extern const char *htlink_msgs[8]; 487 486 488 487 #ifdef CONFIG_EDAC_DEBUG 489 - #define NUM_DBG_ATTRS 9 488 + #define NUM_DBG_ATTRS 5 490 489 #else 491 490 #define NUM_DBG_ATTRS 0 492 491 #endif
+12 -195
drivers/edac/amd64_edac_dbg.c
··· 1 1 #include "amd64_edac.h" 2 2 3 - /* 4 - * accept a hex value and store it into the virtual error register file, field: 5 - * nbeal and nbeah. Assume virtual error values have already been set for: NBSL, 6 - * NBSH and NBCFG. Then proceed to map the error values to a MC, CSROW and 7 - * CHANNEL 8 - */ 9 - static ssize_t amd64_nbea_store(struct mem_ctl_info *mci, const char *data, 10 - size_t count) 11 - { 12 - struct amd64_pvt *pvt = mci->pvt_info; 13 - unsigned long long value; 14 - int ret = 0; 15 - 16 - ret = strict_strtoull(data, 16, &value); 17 - if (ret != -EINVAL) { 18 - debugf0("received NBEA= 0x%llx\n", value); 19 - 20 - /* place the value into the virtual error packet */ 21 - pvt->ctl_error_info.nbeal = (u32) value; 22 - value >>= 32; 23 - pvt->ctl_error_info.nbeah = (u32) value; 24 - 25 - /* Process the Mapping request */ 26 - /* TODO: Add race prevention */ 27 - amd_decode_nb_mce(pvt->mc_node_id, &pvt->ctl_error_info, 1); 28 - 29 - return count; 30 - } 31 - return ret; 3 + #define EDAC_DCT_ATTR_SHOW(reg) \ 4 + static ssize_t amd64_##reg##_show(struct mem_ctl_info *mci, char *data) \ 5 + { \ 6 + struct amd64_pvt *pvt = mci->pvt_info; \ 7 + return sprintf(data, "0x%016llx\n", (u64)pvt->reg); \ 32 8 } 33 9 34 - /* display back what the last NBEA (MCA NB Address (MC4_ADDR)) was written */ 35 - static ssize_t amd64_nbea_show(struct mem_ctl_info *mci, char *data) 36 - { 37 - struct amd64_pvt *pvt = mci->pvt_info; 38 - u64 value; 39 - 40 - value = pvt->ctl_error_info.nbeah; 41 - value <<= 32; 42 - value |= pvt->ctl_error_info.nbeal; 43 - 44 - return sprintf(data, "%llx\n", value); 45 - } 46 - 47 - /* store the NBSL (MCA NB Status Low (MC4_STATUS)) value user desires */ 48 - static ssize_t amd64_nbsl_store(struct mem_ctl_info *mci, const char *data, 49 - size_t count) 50 - { 51 - struct amd64_pvt *pvt = mci->pvt_info; 52 - unsigned long value; 53 - int ret = 0; 54 - 55 - ret = strict_strtoul(data, 16, &value); 56 - if (ret != -EINVAL) { 57 - debugf0("received NBSL= 0x%lx\n", value); 58 - 59 - pvt->ctl_error_info.nbsl = (u32) value; 60 - 61 - return count; 62 - } 63 - return ret; 64 - } 65 - 66 - /* display back what the last NBSL value written */ 67 - static ssize_t amd64_nbsl_show(struct mem_ctl_info *mci, char *data) 68 - { 69 - struct amd64_pvt *pvt = mci->pvt_info; 70 - u32 value; 71 - 72 - value = pvt->ctl_error_info.nbsl; 73 - 74 - return sprintf(data, "%x\n", value); 75 - } 76 - 77 - /* store the NBSH (MCA NB Status High) value user desires */ 78 - static ssize_t amd64_nbsh_store(struct mem_ctl_info *mci, const char *data, 79 - size_t count) 80 - { 81 - struct amd64_pvt *pvt = mci->pvt_info; 82 - unsigned long value; 83 - int ret = 0; 84 - 85 - ret = strict_strtoul(data, 16, &value); 86 - if (ret != -EINVAL) { 87 - debugf0("received NBSH= 0x%lx\n", value); 88 - 89 - pvt->ctl_error_info.nbsh = (u32) value; 90 - 91 - return count; 92 - } 93 - return ret; 94 - } 95 - 96 - /* display back what the last NBSH value written */ 97 - static ssize_t amd64_nbsh_show(struct mem_ctl_info *mci, char *data) 98 - { 99 - struct amd64_pvt *pvt = mci->pvt_info; 100 - u32 value; 101 - 102 - value = pvt->ctl_error_info.nbsh; 103 - 104 - return sprintf(data, "%x\n", value); 105 - } 106 - 107 - /* accept and store the NBCFG (MCA NB Configuration) value user desires */ 108 - static ssize_t amd64_nbcfg_store(struct mem_ctl_info *mci, 109 - const char *data, size_t count) 110 - { 111 - struct amd64_pvt *pvt = mci->pvt_info; 112 - unsigned long value; 113 - int ret = 0; 114 - 115 - ret = strict_strtoul(data, 16, &value); 116 - if (ret != -EINVAL) { 117 - debugf0("received NBCFG= 0x%lx\n", value); 118 - 119 - pvt->ctl_error_info.nbcfg = (u32) value; 120 - 121 - return count; 122 - } 123 - return ret; 124 - } 125 - 126 - /* various show routines for the controls of a MCI */ 127 - static ssize_t amd64_nbcfg_show(struct mem_ctl_info *mci, char *data) 128 - { 129 - struct amd64_pvt *pvt = mci->pvt_info; 130 - 131 - return sprintf(data, "%x\n", pvt->ctl_error_info.nbcfg); 132 - } 133 - 134 - 135 - static ssize_t amd64_dhar_show(struct mem_ctl_info *mci, char *data) 136 - { 137 - struct amd64_pvt *pvt = mci->pvt_info; 138 - 139 - return sprintf(data, "%x\n", pvt->dhar); 140 - } 141 - 142 - 143 - static ssize_t amd64_dbam_show(struct mem_ctl_info *mci, char *data) 144 - { 145 - struct amd64_pvt *pvt = mci->pvt_info; 146 - 147 - return sprintf(data, "%x\n", pvt->dbam0); 148 - } 149 - 150 - 151 - static ssize_t amd64_topmem_show(struct mem_ctl_info *mci, char *data) 152 - { 153 - struct amd64_pvt *pvt = mci->pvt_info; 154 - 155 - return sprintf(data, "%llx\n", pvt->top_mem); 156 - } 157 - 158 - 159 - static ssize_t amd64_topmem2_show(struct mem_ctl_info *mci, char *data) 160 - { 161 - struct amd64_pvt *pvt = mci->pvt_info; 162 - 163 - return sprintf(data, "%llx\n", pvt->top_mem2); 164 - } 10 + EDAC_DCT_ATTR_SHOW(dhar); 11 + EDAC_DCT_ATTR_SHOW(dbam0); 12 + EDAC_DCT_ATTR_SHOW(top_mem); 13 + EDAC_DCT_ATTR_SHOW(top_mem2); 165 14 166 15 static ssize_t amd64_hole_show(struct mem_ctl_info *mci, char *data) 167 16 { ··· 31 182 32 183 { 33 184 .attr = { 34 - .name = "nbea_ctl", 35 - .mode = (S_IRUGO | S_IWUSR) 36 - }, 37 - .show = amd64_nbea_show, 38 - .store = amd64_nbea_store, 39 - }, 40 - { 41 - .attr = { 42 - .name = "nbsl_ctl", 43 - .mode = (S_IRUGO | S_IWUSR) 44 - }, 45 - .show = amd64_nbsl_show, 46 - .store = amd64_nbsl_store, 47 - }, 48 - { 49 - .attr = { 50 - .name = "nbsh_ctl", 51 - .mode = (S_IRUGO | S_IWUSR) 52 - }, 53 - .show = amd64_nbsh_show, 54 - .store = amd64_nbsh_store, 55 - }, 56 - { 57 - .attr = { 58 - .name = "nbcfg_ctl", 59 - .mode = (S_IRUGO | S_IWUSR) 60 - }, 61 - .show = amd64_nbcfg_show, 62 - .store = amd64_nbcfg_store, 63 - }, 64 - { 65 - .attr = { 66 185 .name = "dhar", 67 186 .mode = (S_IRUGO) 68 187 }, ··· 42 225 .name = "dbam", 43 226 .mode = (S_IRUGO) 44 227 }, 45 - .show = amd64_dbam_show, 228 + .show = amd64_dbam0_show, 46 229 .store = NULL, 47 230 }, 48 231 { ··· 50 233 .name = "topmem", 51 234 .mode = (S_IRUGO) 52 235 }, 53 - .show = amd64_topmem_show, 236 + .show = amd64_top_mem_show, 54 237 .store = NULL, 55 238 }, 56 239 { ··· 58 241 .name = "topmem2", 59 242 .mode = (S_IRUGO) 60 243 }, 61 - .show = amd64_topmem2_show, 244 + .show = amd64_top_mem2_show, 62 245 .store = NULL, 63 246 }, 64 247 {
+10 -6
drivers/edac/edac_device_sysfs.c
··· 13 13 #include <linux/ctype.h> 14 14 #include <linux/module.h> 15 15 #include <linux/slab.h> 16 + #include <linux/edac.h> 16 17 17 18 #include "edac_core.h" 18 19 #include "edac_module.h" ··· 236 235 debugf1("%s()\n", __func__); 237 236 238 237 /* get the /sys/devices/system/edac reference */ 239 - edac_class = edac_get_edac_class(); 238 + edac_class = edac_get_sysfs_class(); 240 239 if (edac_class == NULL) { 241 240 debugf1("%s() no edac_class error\n", __func__); 242 241 err = -ENODEV; ··· 256 255 257 256 if (!try_module_get(edac_dev->owner)) { 258 257 err = -ENODEV; 259 - goto err_out; 258 + goto err_mod_get; 260 259 } 261 260 262 261 /* register */ ··· 283 282 err_kobj_reg: 284 283 module_put(edac_dev->owner); 285 284 285 + err_mod_get: 286 + edac_put_sysfs_class(); 287 + 286 288 err_out: 287 289 return err; 288 290 } ··· 294 290 * edac_device_unregister_sysfs_main_kobj: 295 291 * the '..../edac/<name>' kobject 296 292 */ 297 - void edac_device_unregister_sysfs_main_kobj( 298 - struct edac_device_ctl_info *edac_dev) 293 + void edac_device_unregister_sysfs_main_kobj(struct edac_device_ctl_info *dev) 299 294 { 300 295 debugf0("%s()\n", __func__); 301 296 debugf4("%s() name of kobject is: %s\n", 302 - __func__, kobject_name(&edac_dev->kobj)); 297 + __func__, kobject_name(&dev->kobj)); 303 298 304 299 /* 305 300 * Unregister the edac device's kobject and ··· 307 304 * a) module_put() this module 308 305 * b) 'kfree' the memory 309 306 */ 310 - kobject_put(&edac_dev->kobj); 307 + kobject_put(&dev->kobj); 308 + edac_put_sysfs_class(); 311 309 } 312 310 313 311 /* edac_dev -> instance information */
+7 -4
drivers/edac/edac_mc_sysfs.c
··· 11 11 12 12 #include <linux/ctype.h> 13 13 #include <linux/slab.h> 14 + #include <linux/edac.h> 14 15 #include <linux/bug.h> 15 16 16 17 #include "edac_core.h" ··· 1012 1011 */ 1013 1012 int edac_sysfs_setup_mc_kset(void) 1014 1013 { 1015 - int err = 0; 1014 + int err = -EINVAL; 1016 1015 struct sysdev_class *edac_class; 1017 1016 1018 1017 debugf1("%s()\n", __func__); 1019 1018 1020 1019 /* get the /sys/devices/system/edac class reference */ 1021 - edac_class = edac_get_edac_class(); 1020 + edac_class = edac_get_sysfs_class(); 1022 1021 if (edac_class == NULL) { 1023 1022 debugf1("%s() no edac_class error=%d\n", __func__, err); 1024 1023 goto fail_out; ··· 1029 1028 if (!mc_kset) { 1030 1029 err = -ENOMEM; 1031 1030 debugf1("%s() Failed to register '.../edac/mc'\n", __func__); 1032 - goto fail_out; 1031 + goto fail_kset; 1033 1032 } 1034 1033 1035 1034 debugf1("%s() Registered '.../edac/mc' kobject\n", __func__); 1036 1035 1037 1036 return 0; 1038 1037 1038 + fail_kset: 1039 + edac_put_sysfs_class(); 1039 1040 1040 - /* error unwind stack */ 1041 1041 fail_out: 1042 1042 return err; 1043 1043 } ··· 1051 1049 void edac_sysfs_teardown_mc_kset(void) 1052 1050 { 1053 1051 kset_unregister(mc_kset); 1052 + edac_put_sysfs_class(); 1054 1053 } 1055 1054
-452
drivers/edac/edac_mce_amd.c
··· 1 - #include <linux/module.h> 2 - #include "edac_mce_amd.h" 3 - 4 - static bool report_gart_errors; 5 - static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); 6 - 7 - void amd_report_gart_errors(bool v) 8 - { 9 - report_gart_errors = v; 10 - } 11 - EXPORT_SYMBOL_GPL(amd_report_gart_errors); 12 - 13 - void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)) 14 - { 15 - nb_bus_decoder = f; 16 - } 17 - EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); 18 - 19 - void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)) 20 - { 21 - if (nb_bus_decoder) { 22 - WARN_ON(nb_bus_decoder != f); 23 - 24 - nb_bus_decoder = NULL; 25 - } 26 - } 27 - EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); 28 - 29 - /* 30 - * string representation for the different MCA reported error types, see F3x48 31 - * or MSR0000_0411. 32 - */ 33 - const char *tt_msgs[] = { /* transaction type */ 34 - "instruction", 35 - "data", 36 - "generic", 37 - "reserved" 38 - }; 39 - EXPORT_SYMBOL_GPL(tt_msgs); 40 - 41 - const char *ll_msgs[] = { /* cache level */ 42 - "L0", 43 - "L1", 44 - "L2", 45 - "L3/generic" 46 - }; 47 - EXPORT_SYMBOL_GPL(ll_msgs); 48 - 49 - const char *rrrr_msgs[] = { 50 - "generic", 51 - "generic read", 52 - "generic write", 53 - "data read", 54 - "data write", 55 - "inst fetch", 56 - "prefetch", 57 - "evict", 58 - "snoop", 59 - "reserved RRRR= 9", 60 - "reserved RRRR= 10", 61 - "reserved RRRR= 11", 62 - "reserved RRRR= 12", 63 - "reserved RRRR= 13", 64 - "reserved RRRR= 14", 65 - "reserved RRRR= 15" 66 - }; 67 - EXPORT_SYMBOL_GPL(rrrr_msgs); 68 - 69 - const char *pp_msgs[] = { /* participating processor */ 70 - "local node originated (SRC)", 71 - "local node responded to request (RES)", 72 - "local node observed as 3rd party (OBS)", 73 - "generic" 74 - }; 75 - EXPORT_SYMBOL_GPL(pp_msgs); 76 - 77 - const char *to_msgs[] = { 78 - "no timeout", 79 - "timed out" 80 - }; 81 - EXPORT_SYMBOL_GPL(to_msgs); 82 - 83 - const char *ii_msgs[] = { /* memory or i/o */ 84 - "mem access", 85 - "reserved", 86 - "i/o access", 87 - "generic" 88 - }; 89 - EXPORT_SYMBOL_GPL(ii_msgs); 90 - 91 - /* 92 - * Map the 4 or 5 (family-specific) bits of Extended Error code to the 93 - * string table. 94 - */ 95 - const char *ext_msgs[] = { 96 - "K8 ECC error", /* 0_0000b */ 97 - "CRC error on link", /* 0_0001b */ 98 - "Sync error packets on link", /* 0_0010b */ 99 - "Master Abort during link operation", /* 0_0011b */ 100 - "Target Abort during link operation", /* 0_0100b */ 101 - "Invalid GART PTE entry during table walk", /* 0_0101b */ 102 - "Unsupported atomic RMW command received", /* 0_0110b */ 103 - "WDT error: NB transaction timeout", /* 0_0111b */ 104 - "ECC/ChipKill ECC error", /* 0_1000b */ 105 - "SVM DEV Error", /* 0_1001b */ 106 - "Link Data error", /* 0_1010b */ 107 - "Link/L3/Probe Filter Protocol error", /* 0_1011b */ 108 - "NB Internal Arrays Parity error", /* 0_1100b */ 109 - "DRAM Address/Control Parity error", /* 0_1101b */ 110 - "Link Transmission error", /* 0_1110b */ 111 - "GART/DEV Table Walk Data error" /* 0_1111b */ 112 - "Res 0x100 error", /* 1_0000b */ 113 - "Res 0x101 error", /* 1_0001b */ 114 - "Res 0x102 error", /* 1_0010b */ 115 - "Res 0x103 error", /* 1_0011b */ 116 - "Res 0x104 error", /* 1_0100b */ 117 - "Res 0x105 error", /* 1_0101b */ 118 - "Res 0x106 error", /* 1_0110b */ 119 - "Res 0x107 error", /* 1_0111b */ 120 - "Res 0x108 error", /* 1_1000b */ 121 - "Res 0x109 error", /* 1_1001b */ 122 - "Res 0x10A error", /* 1_1010b */ 123 - "Res 0x10B error", /* 1_1011b */ 124 - "ECC error in L3 Cache Data", /* 1_1100b */ 125 - "L3 Cache Tag error", /* 1_1101b */ 126 - "L3 Cache LRU Parity error", /* 1_1110b */ 127 - "Probe Filter error" /* 1_1111b */ 128 - }; 129 - EXPORT_SYMBOL_GPL(ext_msgs); 130 - 131 - static void amd_decode_dc_mce(u64 mc0_status) 132 - { 133 - u32 ec = mc0_status & 0xffff; 134 - u32 xec = (mc0_status >> 16) & 0xf; 135 - 136 - pr_emerg("Data Cache Error"); 137 - 138 - if (xec == 1 && TLB_ERROR(ec)) 139 - pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); 140 - else if (xec == 0) { 141 - if (mc0_status & (1ULL << 40)) 142 - pr_cont(" during Data Scrub.\n"); 143 - else if (TLB_ERROR(ec)) 144 - pr_cont(": %s TLB parity error.\n", LL_MSG(ec)); 145 - else if (MEM_ERROR(ec)) { 146 - u8 ll = ec & 0x3; 147 - u8 tt = (ec >> 2) & 0x3; 148 - u8 rrrr = (ec >> 4) & 0xf; 149 - 150 - /* see F10h BKDG (31116), Table 92. */ 151 - if (ll == 0x1) { 152 - if (tt != 0x1) 153 - goto wrong_dc_mce; 154 - 155 - pr_cont(": Data/Tag %s error.\n", RRRR_MSG(ec)); 156 - 157 - } else if (ll == 0x2 && rrrr == 0x3) 158 - pr_cont(" during L1 linefill from L2.\n"); 159 - else 160 - goto wrong_dc_mce; 161 - } else if (BUS_ERROR(ec) && boot_cpu_data.x86 == 0xf) 162 - pr_cont(" during system linefill.\n"); 163 - else 164 - goto wrong_dc_mce; 165 - } else 166 - goto wrong_dc_mce; 167 - 168 - return; 169 - 170 - wrong_dc_mce: 171 - pr_warning("Corrupted DC MCE info?\n"); 172 - } 173 - 174 - static void amd_decode_ic_mce(u64 mc1_status) 175 - { 176 - u32 ec = mc1_status & 0xffff; 177 - u32 xec = (mc1_status >> 16) & 0xf; 178 - 179 - pr_emerg("Instruction Cache Error"); 180 - 181 - if (xec == 1 && TLB_ERROR(ec)) 182 - pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); 183 - else if (xec == 0) { 184 - if (TLB_ERROR(ec)) 185 - pr_cont(": %s TLB Parity error.\n", LL_MSG(ec)); 186 - else if (BUS_ERROR(ec)) { 187 - if (boot_cpu_data.x86 == 0xf && 188 - (mc1_status & (1ULL << 58))) 189 - pr_cont(" during system linefill.\n"); 190 - else 191 - pr_cont(" during attempted NB data read.\n"); 192 - } else if (MEM_ERROR(ec)) { 193 - u8 ll = ec & 0x3; 194 - u8 rrrr = (ec >> 4) & 0xf; 195 - 196 - if (ll == 0x2) 197 - pr_cont(" during a linefill from L2.\n"); 198 - else if (ll == 0x1) { 199 - 200 - switch (rrrr) { 201 - case 0x5: 202 - pr_cont(": Parity error during " 203 - "data load.\n"); 204 - break; 205 - 206 - case 0x7: 207 - pr_cont(": Copyback Parity/Victim" 208 - " error.\n"); 209 - break; 210 - 211 - case 0x8: 212 - pr_cont(": Tag Snoop error.\n"); 213 - break; 214 - 215 - default: 216 - goto wrong_ic_mce; 217 - break; 218 - } 219 - } 220 - } else 221 - goto wrong_ic_mce; 222 - } else 223 - goto wrong_ic_mce; 224 - 225 - return; 226 - 227 - wrong_ic_mce: 228 - pr_warning("Corrupted IC MCE info?\n"); 229 - } 230 - 231 - static void amd_decode_bu_mce(u64 mc2_status) 232 - { 233 - u32 ec = mc2_status & 0xffff; 234 - u32 xec = (mc2_status >> 16) & 0xf; 235 - 236 - pr_emerg("Bus Unit Error"); 237 - 238 - if (xec == 0x1) 239 - pr_cont(" in the write data buffers.\n"); 240 - else if (xec == 0x3) 241 - pr_cont(" in the victim data buffers.\n"); 242 - else if (xec == 0x2 && MEM_ERROR(ec)) 243 - pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); 244 - else if (xec == 0x0) { 245 - if (TLB_ERROR(ec)) 246 - pr_cont(": %s error in a Page Descriptor Cache or " 247 - "Guest TLB.\n", TT_MSG(ec)); 248 - else if (BUS_ERROR(ec)) 249 - pr_cont(": %s/ECC error in data read from NB: %s.\n", 250 - RRRR_MSG(ec), PP_MSG(ec)); 251 - else if (MEM_ERROR(ec)) { 252 - u8 rrrr = (ec >> 4) & 0xf; 253 - 254 - if (rrrr >= 0x7) 255 - pr_cont(": %s error during data copyback.\n", 256 - RRRR_MSG(ec)); 257 - else if (rrrr <= 0x1) 258 - pr_cont(": %s parity/ECC error during data " 259 - "access from L2.\n", RRRR_MSG(ec)); 260 - else 261 - goto wrong_bu_mce; 262 - } else 263 - goto wrong_bu_mce; 264 - } else 265 - goto wrong_bu_mce; 266 - 267 - return; 268 - 269 - wrong_bu_mce: 270 - pr_warning("Corrupted BU MCE info?\n"); 271 - } 272 - 273 - static void amd_decode_ls_mce(u64 mc3_status) 274 - { 275 - u32 ec = mc3_status & 0xffff; 276 - u32 xec = (mc3_status >> 16) & 0xf; 277 - 278 - pr_emerg("Load Store Error"); 279 - 280 - if (xec == 0x0) { 281 - u8 rrrr = (ec >> 4) & 0xf; 282 - 283 - if (!BUS_ERROR(ec) || (rrrr != 0x3 && rrrr != 0x4)) 284 - goto wrong_ls_mce; 285 - 286 - pr_cont(" during %s.\n", RRRR_MSG(ec)); 287 - } 288 - return; 289 - 290 - wrong_ls_mce: 291 - pr_warning("Corrupted LS MCE info?\n"); 292 - } 293 - 294 - void amd_decode_nb_mce(int node_id, struct err_regs *regs, int handle_errors) 295 - { 296 - u32 ec = ERROR_CODE(regs->nbsl); 297 - 298 - if (!handle_errors) 299 - return; 300 - 301 - /* 302 - * GART TLB error reporting is disabled by default. Bail out early. 303 - */ 304 - if (TLB_ERROR(ec) && !report_gart_errors) 305 - return; 306 - 307 - pr_emerg("Northbridge Error, node %d", node_id); 308 - 309 - /* 310 - * F10h, revD can disable ErrCpu[3:0] so check that first and also the 311 - * value encoding has changed so interpret those differently 312 - */ 313 - if ((boot_cpu_data.x86 == 0x10) && 314 - (boot_cpu_data.x86_model > 7)) { 315 - if (regs->nbsh & K8_NBSH_ERR_CPU_VAL) 316 - pr_cont(", core: %u\n", (u8)(regs->nbsh & 0xf)); 317 - } else { 318 - u8 assoc_cpus = regs->nbsh & 0xf; 319 - 320 - if (assoc_cpus > 0) 321 - pr_cont(", core: %d", fls(assoc_cpus) - 1); 322 - 323 - pr_cont("\n"); 324 - } 325 - 326 - pr_emerg("%s.\n", EXT_ERR_MSG(regs->nbsl)); 327 - 328 - if (BUS_ERROR(ec) && nb_bus_decoder) 329 - nb_bus_decoder(node_id, regs); 330 - } 331 - EXPORT_SYMBOL_GPL(amd_decode_nb_mce); 332 - 333 - static void amd_decode_fr_mce(u64 mc5_status) 334 - { 335 - /* we have only one error signature so match all fields at once. */ 336 - if ((mc5_status & 0xffff) == 0x0f0f) 337 - pr_emerg(" FR Error: CPU Watchdog timer expire.\n"); 338 - else 339 - pr_warning("Corrupted FR MCE info?\n"); 340 - } 341 - 342 - static inline void amd_decode_err_code(unsigned int ec) 343 - { 344 - if (TLB_ERROR(ec)) { 345 - pr_emerg("Transaction: %s, Cache Level %s\n", 346 - TT_MSG(ec), LL_MSG(ec)); 347 - } else if (MEM_ERROR(ec)) { 348 - pr_emerg("Transaction: %s, Type: %s, Cache Level: %s", 349 - RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); 350 - } else if (BUS_ERROR(ec)) { 351 - pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, " 352 - "Participating Processor: %s\n", 353 - RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), 354 - PP_MSG(ec)); 355 - } else 356 - pr_warning("Huh? Unknown MCE error 0x%x\n", ec); 357 - } 358 - 359 - static int amd_decode_mce(struct notifier_block *nb, unsigned long val, 360 - void *data) 361 - { 362 - struct mce *m = (struct mce *)data; 363 - struct err_regs regs; 364 - int node, ecc; 365 - 366 - pr_emerg("MC%d_STATUS: ", m->bank); 367 - 368 - pr_cont("%sorrected error, other errors lost: %s, " 369 - "CPU context corrupt: %s", 370 - ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), 371 - ((m->status & MCI_STATUS_OVER) ? "yes" : "no"), 372 - ((m->status & MCI_STATUS_PCC) ? "yes" : "no")); 373 - 374 - /* do the two bits[14:13] together */ 375 - ecc = (m->status >> 45) & 0x3; 376 - if (ecc) 377 - pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); 378 - 379 - pr_cont("\n"); 380 - 381 - switch (m->bank) { 382 - case 0: 383 - amd_decode_dc_mce(m->status); 384 - break; 385 - 386 - case 1: 387 - amd_decode_ic_mce(m->status); 388 - break; 389 - 390 - case 2: 391 - amd_decode_bu_mce(m->status); 392 - break; 393 - 394 - case 3: 395 - amd_decode_ls_mce(m->status); 396 - break; 397 - 398 - case 4: 399 - regs.nbsl = (u32) m->status; 400 - regs.nbsh = (u32)(m->status >> 32); 401 - regs.nbeal = (u32) m->addr; 402 - regs.nbeah = (u32)(m->addr >> 32); 403 - node = amd_get_nb_id(m->extcpu); 404 - 405 - amd_decode_nb_mce(node, &regs, 1); 406 - break; 407 - 408 - case 5: 409 - amd_decode_fr_mce(m->status); 410 - break; 411 - 412 - default: 413 - break; 414 - } 415 - 416 - amd_decode_err_code(m->status & 0xffff); 417 - 418 - return NOTIFY_STOP; 419 - } 420 - 421 - static struct notifier_block amd_mce_dec_nb = { 422 - .notifier_call = amd_decode_mce, 423 - }; 424 - 425 - static int __init mce_amd_init(void) 426 - { 427 - /* 428 - * We can decode MCEs for K8, F10h and F11h CPUs: 429 - */ 430 - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 431 - return 0; 432 - 433 - if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 434 - return 0; 435 - 436 - atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); 437 - 438 - return 0; 439 - } 440 - early_initcall(mce_amd_init); 441 - 442 - #ifdef MODULE 443 - static void __exit mce_amd_exit(void) 444 - { 445 - atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); 446 - } 447 - 448 - MODULE_DESCRIPTION("AMD MCE decoder"); 449 - MODULE_ALIAS("edac-mce-amd"); 450 - MODULE_LICENSE("GPL"); 451 - module_exit(mce_amd_exit); 452 - #endif
+52 -7
drivers/edac/edac_mce_amd.h drivers/edac/mce_amd.h
··· 1 1 #ifndef _EDAC_MCE_AMD_H 2 2 #define _EDAC_MCE_AMD_H 3 3 4 + #include <linux/notifier.h> 5 + 4 6 #include <asm/mce.h> 7 + 8 + #define BIT_64(n) (U64_C(1) << (n)) 5 9 6 10 #define ERROR_CODE(x) ((x) & 0xffff) 7 11 #define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f) 8 - #define EXT_ERR_MSG(x) ext_msgs[EXT_ERROR_CODE(x)] 9 12 10 13 #define LOW_SYNDROME(x) (((x) >> 15) & 0xff) 11 14 #define HIGH_SYNDROME(x) (((x) >> 24) & 0xff) ··· 23 20 #define II_MSG(x) ii_msgs[II(x)] 24 21 #define LL(x) (((x) >> 0) & 0x3) 25 22 #define LL_MSG(x) ll_msgs[LL(x)] 26 - #define RRRR(x) (((x) >> 4) & 0xf) 27 - #define RRRR_MSG(x) rrrr_msgs[RRRR(x)] 28 23 #define TO(x) (((x) >> 8) & 0x1) 29 24 #define TO_MSG(x) to_msgs[TO(x)] 30 25 #define PP(x) (((x) >> 9) & 0x3) 31 26 #define PP_MSG(x) pp_msgs[PP(x)] 27 + 28 + #define RRRR(x) (((x) >> 4) & 0xf) 29 + #define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!") 32 30 33 31 #define K8_NBSH 0x4C 34 32 ··· 45 41 #define K8_NBSH_UECC BIT(13) 46 42 #define K8_NBSH_ERR_SCRUBER BIT(8) 47 43 44 + enum tt_ids { 45 + TT_INSTR = 0, 46 + TT_DATA, 47 + TT_GEN, 48 + TT_RESV, 49 + }; 50 + 51 + enum ll_ids { 52 + LL_RESV = 0, 53 + LL_L1, 54 + LL_L2, 55 + LL_LG, 56 + }; 57 + 58 + enum ii_ids { 59 + II_MEM = 0, 60 + II_RESV, 61 + II_IO, 62 + II_GEN, 63 + }; 64 + 65 + enum rrrr_ids { 66 + R4_GEN = 0, 67 + R4_RD, 68 + R4_WR, 69 + R4_DRD, 70 + R4_DWR, 71 + R4_IRD, 72 + R4_PREF, 73 + R4_EVICT, 74 + R4_SNOOP, 75 + }; 76 + 48 77 extern const char *tt_msgs[]; 49 78 extern const char *ll_msgs[]; 50 79 extern const char *rrrr_msgs[]; 51 80 extern const char *pp_msgs[]; 52 81 extern const char *to_msgs[]; 53 82 extern const char *ii_msgs[]; 54 - extern const char *ext_msgs[]; 55 83 56 84 /* 57 85 * relevant NB regs ··· 96 60 u32 nbeal; 97 61 }; 98 62 63 + /* 64 + * per-family decoder ops 65 + */ 66 + struct amd_decoder_ops { 67 + bool (*dc_mce)(u16); 68 + bool (*ic_mce)(u16); 69 + bool (*nb_mce)(u16, u8); 70 + }; 99 71 100 72 void amd_report_gart_errors(bool); 101 - void amd_register_ecc_decoder(void (*f)(int, struct err_regs *)); 102 - void amd_unregister_ecc_decoder(void (*f)(int, struct err_regs *)); 103 - void amd_decode_nb_mce(int, struct err_regs *, int); 73 + void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)); 74 + void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)); 75 + void amd_decode_nb_mce(int, struct mce *, u32); 76 + int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data); 104 77 105 78 #endif /* _EDAC_MCE_AMD_H */
+1 -78
drivers/edac/edac_module.c
··· 27 27 struct workqueue_struct *edac_workqueue; 28 28 29 29 /* 30 - * sysfs object: /sys/devices/system/edac 31 - * need to export to other files in this modules 32 - */ 33 - static struct sysdev_class edac_class = { 34 - .name = "edac", 35 - }; 36 - static int edac_class_valid; 37 - 38 - /* 39 30 * edac_op_state_to_string() 40 31 */ 41 32 char *edac_op_state_to_string(int opstate) ··· 43 52 return "OFFLINE"; 44 53 45 54 return "UNKNOWN"; 46 - } 47 - 48 - /* 49 - * edac_get_edac_class() 50 - * 51 - * return pointer to the edac class of 'edac' 52 - */ 53 - struct sysdev_class *edac_get_edac_class(void) 54 - { 55 - struct sysdev_class *classptr = NULL; 56 - 57 - if (edac_class_valid) 58 - classptr = &edac_class; 59 - 60 - return classptr; 61 - } 62 - 63 - /* 64 - * edac_register_sysfs_edac_name() 65 - * 66 - * register the 'edac' into /sys/devices/system 67 - * 68 - * return: 69 - * 0 success 70 - * !0 error 71 - */ 72 - static int edac_register_sysfs_edac_name(void) 73 - { 74 - int err; 75 - 76 - /* create the /sys/devices/system/edac directory */ 77 - err = sysdev_class_register(&edac_class); 78 - 79 - if (err) { 80 - debugf1("%s() error=%d\n", __func__, err); 81 - return err; 82 - } 83 - 84 - edac_class_valid = 1; 85 - return 0; 86 - } 87 - 88 - /* 89 - * sysdev_class_unregister() 90 - * 91 - * unregister the 'edac' from /sys/devices/system 92 - */ 93 - static void edac_unregister_sysfs_edac_name(void) 94 - { 95 - /* only if currently registered, then unregister it */ 96 - if (edac_class_valid) 97 - sysdev_class_unregister(&edac_class); 98 - 99 - edac_class_valid = 0; 100 55 } 101 56 102 57 /* ··· 91 154 edac_pci_clear_parity_errors(); 92 155 93 156 /* 94 - * perform the registration of the /sys/devices/system/edac class object 95 - */ 96 - if (edac_register_sysfs_edac_name()) { 97 - edac_printk(KERN_ERR, EDAC_MC, 98 - "Error initializing 'edac' kobject\n"); 99 - err = -ENODEV; 100 - goto error; 101 - } 102 - 103 - /* 104 157 * now set up the mc_kset under the edac class object 105 158 */ 106 159 err = edac_sysfs_setup_mc_kset(); 107 160 if (err) 108 - goto sysfs_setup_fail; 161 + goto error; 109 162 110 163 /* Setup/Initialize the workq for this core */ 111 164 err = edac_workqueue_setup(); ··· 109 182 /* Error teardown stack */ 110 183 workq_fail: 111 184 edac_sysfs_teardown_mc_kset(); 112 - 113 - sysfs_setup_fail: 114 - edac_unregister_sysfs_edac_name(); 115 185 116 186 error: 117 187 return err; ··· 125 201 /* tear down the various subsystems */ 126 202 edac_workqueue_teardown(); 127 203 edac_sysfs_teardown_mc_kset(); 128 - edac_unregister_sysfs_edac_name(); 129 204 } 130 205 131 206 /*
-1
drivers/edac/edac_module.h
··· 42 42 struct edac_device_ctl_info *edac_dev); 43 43 extern int edac_device_create_sysfs(struct edac_device_ctl_info *edac_dev); 44 44 extern void edac_device_remove_sysfs(struct edac_device_ctl_info *edac_dev); 45 - extern struct sysdev_class *edac_get_edac_class(void); 46 45 47 46 /* edac core workqueue: single CPU mode */ 48 47 extern struct workqueue_struct *edac_workqueue;
+7 -3
drivers/edac/edac_pci_sysfs.c
··· 7 7 * 8 8 */ 9 9 #include <linux/module.h> 10 - #include <linux/sysdev.h> 10 + #include <linux/edac.h> 11 11 #include <linux/slab.h> 12 12 #include <linux/ctype.h> 13 13 ··· 354 354 /* First time, so create the main kobject and its 355 355 * controls and atributes 356 356 */ 357 - edac_class = edac_get_edac_class(); 357 + edac_class = edac_get_sysfs_class(); 358 358 if (edac_class == NULL) { 359 359 debugf1("%s() no edac_class\n", __func__); 360 360 err = -ENODEV; ··· 368 368 if (!try_module_get(THIS_MODULE)) { 369 369 debugf1("%s() try_module_get() failed\n", __func__); 370 370 err = -ENODEV; 371 - goto decrement_count_fail; 371 + goto mod_get_fail; 372 372 } 373 373 374 374 edac_pci_top_main_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); ··· 403 403 kzalloc_fail: 404 404 module_put(THIS_MODULE); 405 405 406 + mod_get_fail: 407 + edac_put_sysfs_class(); 408 + 406 409 decrement_count_fail: 407 410 /* if are on this error exit, nothing to tear down */ 408 411 atomic_dec(&edac_pci_sysfs_refcount); ··· 432 429 __func__); 433 430 kobject_put(edac_pci_top_main_kobj); 434 431 } 432 + edac_put_sysfs_class(); 435 433 } 436 434 437 435 /*
+47 -4
drivers/edac/edac_stub.c
··· 3 3 * 4 4 * Author: Dave Jiang <djiang@mvista.com> 5 5 * 6 - * 2007 (c) MontaVista Software, Inc. This file is licensed under 7 - * the terms of the GNU General Public License version 2. This program 8 - * is licensed "as is" without any warranty of any kind, whether express 9 - * or implied. 6 + * 2007 (c) MontaVista Software, Inc. 7 + * 2010 (c) Advanced Micro Devices Inc. 8 + * Borislav Petkov <borislav.petkov@amd.com> 9 + * 10 + * This file is licensed under the terms of the GNU General Public 11 + * License version 2. This program is licensed "as is" without any 12 + * warranty of any kind, whether express or implied. 10 13 * 11 14 */ 12 15 #include <linux/module.h> ··· 25 22 26 23 int edac_err_assert = 0; 27 24 EXPORT_SYMBOL_GPL(edac_err_assert); 25 + 26 + static atomic_t edac_class_valid = ATOMIC_INIT(0); 28 27 29 28 /* 30 29 * called to determine if there is an EDAC driver interested in ··· 49 44 edac_err_assert++; 50 45 } 51 46 EXPORT_SYMBOL_GPL(edac_atomic_assert_error); 47 + 48 + /* 49 + * sysfs object: /sys/devices/system/edac 50 + * need to export to other files 51 + */ 52 + struct sysdev_class edac_class = { 53 + .name = "edac", 54 + }; 55 + EXPORT_SYMBOL_GPL(edac_class); 56 + 57 + /* return pointer to the 'edac' node in sysfs */ 58 + struct sysdev_class *edac_get_sysfs_class(void) 59 + { 60 + int err = 0; 61 + 62 + if (atomic_read(&edac_class_valid)) 63 + goto out; 64 + 65 + /* create the /sys/devices/system/edac directory */ 66 + err = sysdev_class_register(&edac_class); 67 + if (err) { 68 + printk(KERN_ERR "Error registering toplevel EDAC sysfs dir\n"); 69 + return NULL; 70 + } 71 + 72 + out: 73 + atomic_inc(&edac_class_valid); 74 + return &edac_class; 75 + } 76 + EXPORT_SYMBOL_GPL(edac_get_sysfs_class); 77 + 78 + void edac_put_sysfs_class(void) 79 + { 80 + /* last user unregisters it */ 81 + if (atomic_dec_and_test(&edac_class_valid)) 82 + sysdev_class_unregister(&edac_class); 83 + } 84 + EXPORT_SYMBOL_GPL(edac_put_sysfs_class);
+680
drivers/edac/mce_amd.c
··· 1 + #include <linux/module.h> 2 + #include <linux/slab.h> 3 + 4 + #include "mce_amd.h" 5 + 6 + static struct amd_decoder_ops *fam_ops; 7 + 8 + static u8 nb_err_cpumask = 0xf; 9 + 10 + static bool report_gart_errors; 11 + static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg); 12 + 13 + void amd_report_gart_errors(bool v) 14 + { 15 + report_gart_errors = v; 16 + } 17 + EXPORT_SYMBOL_GPL(amd_report_gart_errors); 18 + 19 + void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32)) 20 + { 21 + nb_bus_decoder = f; 22 + } 23 + EXPORT_SYMBOL_GPL(amd_register_ecc_decoder); 24 + 25 + void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32)) 26 + { 27 + if (nb_bus_decoder) { 28 + WARN_ON(nb_bus_decoder != f); 29 + 30 + nb_bus_decoder = NULL; 31 + } 32 + } 33 + EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder); 34 + 35 + /* 36 + * string representation for the different MCA reported error types, see F3x48 37 + * or MSR0000_0411. 38 + */ 39 + 40 + /* transaction type */ 41 + const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" }; 42 + EXPORT_SYMBOL_GPL(tt_msgs); 43 + 44 + /* cache level */ 45 + const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" }; 46 + EXPORT_SYMBOL_GPL(ll_msgs); 47 + 48 + /* memory transaction type */ 49 + const char *rrrr_msgs[] = { 50 + "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP" 51 + }; 52 + EXPORT_SYMBOL_GPL(rrrr_msgs); 53 + 54 + /* participating processor */ 55 + const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" }; 56 + EXPORT_SYMBOL_GPL(pp_msgs); 57 + 58 + /* request timeout */ 59 + const char *to_msgs[] = { "no timeout", "timed out" }; 60 + EXPORT_SYMBOL_GPL(to_msgs); 61 + 62 + /* memory or i/o */ 63 + const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" }; 64 + EXPORT_SYMBOL_GPL(ii_msgs); 65 + 66 + static const char *f10h_nb_mce_desc[] = { 67 + "HT link data error", 68 + "Protocol error (link, L3, probe filter, etc.)", 69 + "Parity error in NB-internal arrays", 70 + "Link Retry due to IO link transmission error", 71 + "L3 ECC data cache error", 72 + "ECC error in L3 cache tag", 73 + "L3 LRU parity bits error", 74 + "ECC Error in the Probe Filter directory" 75 + }; 76 + 77 + static bool f12h_dc_mce(u16 ec) 78 + { 79 + bool ret = false; 80 + 81 + if (MEM_ERROR(ec)) { 82 + u8 ll = ec & 0x3; 83 + ret = true; 84 + 85 + if (ll == LL_L2) 86 + pr_cont("during L1 linefill from L2.\n"); 87 + else if (ll == LL_L1) 88 + pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec)); 89 + else 90 + ret = false; 91 + } 92 + return ret; 93 + } 94 + 95 + static bool f10h_dc_mce(u16 ec) 96 + { 97 + u8 r4 = (ec >> 4) & 0xf; 98 + u8 ll = ec & 0x3; 99 + 100 + if (r4 == R4_GEN && ll == LL_L1) { 101 + pr_cont("during data scrub.\n"); 102 + return true; 103 + } 104 + return f12h_dc_mce(ec); 105 + } 106 + 107 + static bool k8_dc_mce(u16 ec) 108 + { 109 + if (BUS_ERROR(ec)) { 110 + pr_cont("during system linefill.\n"); 111 + return true; 112 + } 113 + 114 + return f10h_dc_mce(ec); 115 + } 116 + 117 + static bool f14h_dc_mce(u16 ec) 118 + { 119 + u8 r4 = (ec >> 4) & 0xf; 120 + u8 ll = ec & 0x3; 121 + u8 tt = (ec >> 2) & 0x3; 122 + u8 ii = tt; 123 + bool ret = true; 124 + 125 + if (MEM_ERROR(ec)) { 126 + 127 + if (tt != TT_DATA || ll != LL_L1) 128 + return false; 129 + 130 + switch (r4) { 131 + case R4_DRD: 132 + case R4_DWR: 133 + pr_cont("Data/Tag parity error due to %s.\n", 134 + (r4 == R4_DRD ? "load/hw prf" : "store")); 135 + break; 136 + case R4_EVICT: 137 + pr_cont("Copyback parity error on a tag miss.\n"); 138 + break; 139 + case R4_SNOOP: 140 + pr_cont("Tag parity error during snoop.\n"); 141 + break; 142 + default: 143 + ret = false; 144 + } 145 + } else if (BUS_ERROR(ec)) { 146 + 147 + if ((ii != II_MEM && ii != II_IO) || ll != LL_LG) 148 + return false; 149 + 150 + pr_cont("System read data error on a "); 151 + 152 + switch (r4) { 153 + case R4_RD: 154 + pr_cont("TLB reload.\n"); 155 + break; 156 + case R4_DWR: 157 + pr_cont("store.\n"); 158 + break; 159 + case R4_DRD: 160 + pr_cont("load.\n"); 161 + break; 162 + default: 163 + ret = false; 164 + } 165 + } else { 166 + ret = false; 167 + } 168 + 169 + return ret; 170 + } 171 + 172 + static void amd_decode_dc_mce(struct mce *m) 173 + { 174 + u16 ec = m->status & 0xffff; 175 + u8 xec = (m->status >> 16) & 0xf; 176 + 177 + pr_emerg(HW_ERR "Data Cache Error: "); 178 + 179 + /* TLB error signatures are the same across families */ 180 + if (TLB_ERROR(ec)) { 181 + u8 tt = (ec >> 2) & 0x3; 182 + 183 + if (tt == TT_DATA) { 184 + pr_cont("%s TLB %s.\n", LL_MSG(ec), 185 + (xec ? "multimatch" : "parity error")); 186 + return; 187 + } 188 + else 189 + goto wrong_dc_mce; 190 + } 191 + 192 + if (!fam_ops->dc_mce(ec)) 193 + goto wrong_dc_mce; 194 + 195 + return; 196 + 197 + wrong_dc_mce: 198 + pr_emerg(HW_ERR "Corrupted DC MCE info?\n"); 199 + } 200 + 201 + static bool k8_ic_mce(u16 ec) 202 + { 203 + u8 ll = ec & 0x3; 204 + u8 r4 = (ec >> 4) & 0xf; 205 + bool ret = true; 206 + 207 + if (!MEM_ERROR(ec)) 208 + return false; 209 + 210 + if (ll == 0x2) 211 + pr_cont("during a linefill from L2.\n"); 212 + else if (ll == 0x1) { 213 + switch (r4) { 214 + case R4_IRD: 215 + pr_cont("Parity error during data load.\n"); 216 + break; 217 + 218 + case R4_EVICT: 219 + pr_cont("Copyback Parity/Victim error.\n"); 220 + break; 221 + 222 + case R4_SNOOP: 223 + pr_cont("Tag Snoop error.\n"); 224 + break; 225 + 226 + default: 227 + ret = false; 228 + break; 229 + } 230 + } else 231 + ret = false; 232 + 233 + return ret; 234 + } 235 + 236 + static bool f14h_ic_mce(u16 ec) 237 + { 238 + u8 ll = ec & 0x3; 239 + u8 tt = (ec >> 2) & 0x3; 240 + u8 r4 = (ec >> 4) & 0xf; 241 + bool ret = true; 242 + 243 + if (MEM_ERROR(ec)) { 244 + if (tt != 0 || ll != 1) 245 + ret = false; 246 + 247 + if (r4 == R4_IRD) 248 + pr_cont("Data/tag array parity error for a tag hit.\n"); 249 + else if (r4 == R4_SNOOP) 250 + pr_cont("Tag error during snoop/victimization.\n"); 251 + else 252 + ret = false; 253 + } 254 + return ret; 255 + } 256 + 257 + static void amd_decode_ic_mce(struct mce *m) 258 + { 259 + u16 ec = m->status & 0xffff; 260 + u8 xec = (m->status >> 16) & 0xf; 261 + 262 + pr_emerg(HW_ERR "Instruction Cache Error: "); 263 + 264 + if (TLB_ERROR(ec)) 265 + pr_cont("%s TLB %s.\n", LL_MSG(ec), 266 + (xec ? "multimatch" : "parity error")); 267 + else if (BUS_ERROR(ec)) { 268 + bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58))); 269 + 270 + pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read")); 271 + } else if (fam_ops->ic_mce(ec)) 272 + ; 273 + else 274 + pr_emerg(HW_ERR "Corrupted IC MCE info?\n"); 275 + } 276 + 277 + static void amd_decode_bu_mce(struct mce *m) 278 + { 279 + u32 ec = m->status & 0xffff; 280 + u32 xec = (m->status >> 16) & 0xf; 281 + 282 + pr_emerg(HW_ERR "Bus Unit Error"); 283 + 284 + if (xec == 0x1) 285 + pr_cont(" in the write data buffers.\n"); 286 + else if (xec == 0x3) 287 + pr_cont(" in the victim data buffers.\n"); 288 + else if (xec == 0x2 && MEM_ERROR(ec)) 289 + pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec)); 290 + else if (xec == 0x0) { 291 + if (TLB_ERROR(ec)) 292 + pr_cont(": %s error in a Page Descriptor Cache or " 293 + "Guest TLB.\n", TT_MSG(ec)); 294 + else if (BUS_ERROR(ec)) 295 + pr_cont(": %s/ECC error in data read from NB: %s.\n", 296 + RRRR_MSG(ec), PP_MSG(ec)); 297 + else if (MEM_ERROR(ec)) { 298 + u8 rrrr = (ec >> 4) & 0xf; 299 + 300 + if (rrrr >= 0x7) 301 + pr_cont(": %s error during data copyback.\n", 302 + RRRR_MSG(ec)); 303 + else if (rrrr <= 0x1) 304 + pr_cont(": %s parity/ECC error during data " 305 + "access from L2.\n", RRRR_MSG(ec)); 306 + else 307 + goto wrong_bu_mce; 308 + } else 309 + goto wrong_bu_mce; 310 + } else 311 + goto wrong_bu_mce; 312 + 313 + return; 314 + 315 + wrong_bu_mce: 316 + pr_emerg(HW_ERR "Corrupted BU MCE info?\n"); 317 + } 318 + 319 + static void amd_decode_ls_mce(struct mce *m) 320 + { 321 + u16 ec = m->status & 0xffff; 322 + u8 xec = (m->status >> 16) & 0xf; 323 + 324 + if (boot_cpu_data.x86 == 0x14) { 325 + pr_emerg("You shouldn't be seeing an LS MCE on this cpu family," 326 + " please report on LKML.\n"); 327 + return; 328 + } 329 + 330 + pr_emerg(HW_ERR "Load Store Error"); 331 + 332 + if (xec == 0x0) { 333 + u8 r4 = (ec >> 4) & 0xf; 334 + 335 + if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR)) 336 + goto wrong_ls_mce; 337 + 338 + pr_cont(" during %s.\n", RRRR_MSG(ec)); 339 + } else 340 + goto wrong_ls_mce; 341 + 342 + return; 343 + 344 + wrong_ls_mce: 345 + pr_emerg(HW_ERR "Corrupted LS MCE info?\n"); 346 + } 347 + 348 + static bool k8_nb_mce(u16 ec, u8 xec) 349 + { 350 + bool ret = true; 351 + 352 + switch (xec) { 353 + case 0x1: 354 + pr_cont("CRC error detected on HT link.\n"); 355 + break; 356 + 357 + case 0x5: 358 + pr_cont("Invalid GART PTE entry during GART table walk.\n"); 359 + break; 360 + 361 + case 0x6: 362 + pr_cont("Unsupported atomic RMW received from an IO link.\n"); 363 + break; 364 + 365 + case 0x0: 366 + case 0x8: 367 + if (boot_cpu_data.x86 == 0x11) 368 + return false; 369 + 370 + pr_cont("DRAM ECC error detected on the NB.\n"); 371 + break; 372 + 373 + case 0xd: 374 + pr_cont("Parity error on the DRAM addr/ctl signals.\n"); 375 + break; 376 + 377 + default: 378 + ret = false; 379 + break; 380 + } 381 + 382 + return ret; 383 + } 384 + 385 + static bool f10h_nb_mce(u16 ec, u8 xec) 386 + { 387 + bool ret = true; 388 + u8 offset = 0; 389 + 390 + if (k8_nb_mce(ec, xec)) 391 + return true; 392 + 393 + switch(xec) { 394 + case 0xa ... 0xc: 395 + offset = 10; 396 + break; 397 + 398 + case 0xe: 399 + offset = 11; 400 + break; 401 + 402 + case 0xf: 403 + if (TLB_ERROR(ec)) 404 + pr_cont("GART Table Walk data error.\n"); 405 + else if (BUS_ERROR(ec)) 406 + pr_cont("DMA Exclusion Vector Table Walk error.\n"); 407 + else 408 + ret = false; 409 + 410 + goto out; 411 + break; 412 + 413 + case 0x1c ... 0x1f: 414 + offset = 24; 415 + break; 416 + 417 + default: 418 + ret = false; 419 + 420 + goto out; 421 + break; 422 + } 423 + 424 + pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]); 425 + 426 + out: 427 + return ret; 428 + } 429 + 430 + static bool nb_noop_mce(u16 ec, u8 xec) 431 + { 432 + return false; 433 + } 434 + 435 + void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg) 436 + { 437 + u8 xec = (m->status >> 16) & 0x1f; 438 + u16 ec = m->status & 0xffff; 439 + u32 nbsh = (u32)(m->status >> 32); 440 + 441 + pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id); 442 + 443 + /* 444 + * F10h, revD can disable ErrCpu[3:0] so check that first and also the 445 + * value encoding has changed so interpret those differently 446 + */ 447 + if ((boot_cpu_data.x86 == 0x10) && 448 + (boot_cpu_data.x86_model > 7)) { 449 + if (nbsh & K8_NBSH_ERR_CPU_VAL) 450 + pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask)); 451 + } else { 452 + u8 assoc_cpus = nbsh & nb_err_cpumask; 453 + 454 + if (assoc_cpus > 0) 455 + pr_cont(", core: %d", fls(assoc_cpus) - 1); 456 + } 457 + 458 + switch (xec) { 459 + case 0x2: 460 + pr_cont("Sync error (sync packets on HT link detected).\n"); 461 + return; 462 + 463 + case 0x3: 464 + pr_cont("HT Master abort.\n"); 465 + return; 466 + 467 + case 0x4: 468 + pr_cont("HT Target abort.\n"); 469 + return; 470 + 471 + case 0x7: 472 + pr_cont("NB Watchdog timeout.\n"); 473 + return; 474 + 475 + case 0x9: 476 + pr_cont("SVM DMA Exclusion Vector error.\n"); 477 + return; 478 + 479 + default: 480 + break; 481 + } 482 + 483 + if (!fam_ops->nb_mce(ec, xec)) 484 + goto wrong_nb_mce; 485 + 486 + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10) 487 + if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder) 488 + nb_bus_decoder(node_id, m, nbcfg); 489 + 490 + return; 491 + 492 + wrong_nb_mce: 493 + pr_emerg(HW_ERR "Corrupted NB MCE info?\n"); 494 + } 495 + EXPORT_SYMBOL_GPL(amd_decode_nb_mce); 496 + 497 + static void amd_decode_fr_mce(struct mce *m) 498 + { 499 + if (boot_cpu_data.x86 == 0xf || 500 + boot_cpu_data.x86 == 0x11) 501 + goto wrong_fr_mce; 502 + 503 + /* we have only one error signature so match all fields at once. */ 504 + if ((m->status & 0xffff) == 0x0f0f) { 505 + pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n"); 506 + return; 507 + } 508 + 509 + wrong_fr_mce: 510 + pr_emerg(HW_ERR "Corrupted FR MCE info?\n"); 511 + } 512 + 513 + static inline void amd_decode_err_code(u16 ec) 514 + { 515 + if (TLB_ERROR(ec)) { 516 + pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n", 517 + TT_MSG(ec), LL_MSG(ec)); 518 + } else if (MEM_ERROR(ec)) { 519 + pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n", 520 + RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); 521 + } else if (BUS_ERROR(ec)) { 522 + pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, " 523 + "Participating Processor: %s\n", 524 + RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), 525 + PP_MSG(ec)); 526 + } else 527 + pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec); 528 + } 529 + 530 + /* 531 + * Filter out unwanted MCE signatures here. 532 + */ 533 + static bool amd_filter_mce(struct mce *m) 534 + { 535 + u8 xec = (m->status >> 16) & 0x1f; 536 + 537 + /* 538 + * NB GART TLB error reporting is disabled by default. 539 + */ 540 + if (m->bank == 4 && xec == 0x5 && !report_gart_errors) 541 + return true; 542 + 543 + return false; 544 + } 545 + 546 + int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) 547 + { 548 + struct mce *m = (struct mce *)data; 549 + int node, ecc; 550 + 551 + if (amd_filter_mce(m)) 552 + return NOTIFY_STOP; 553 + 554 + pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank); 555 + 556 + pr_cont("%sorrected error, other errors lost: %s, " 557 + "CPU context corrupt: %s", 558 + ((m->status & MCI_STATUS_UC) ? "Unc" : "C"), 559 + ((m->status & MCI_STATUS_OVER) ? "yes" : "no"), 560 + ((m->status & MCI_STATUS_PCC) ? "yes" : "no")); 561 + 562 + /* do the two bits[14:13] together */ 563 + ecc = (m->status >> 45) & 0x3; 564 + if (ecc) 565 + pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U")); 566 + 567 + pr_cont("\n"); 568 + 569 + switch (m->bank) { 570 + case 0: 571 + amd_decode_dc_mce(m); 572 + break; 573 + 574 + case 1: 575 + amd_decode_ic_mce(m); 576 + break; 577 + 578 + case 2: 579 + amd_decode_bu_mce(m); 580 + break; 581 + 582 + case 3: 583 + amd_decode_ls_mce(m); 584 + break; 585 + 586 + case 4: 587 + node = amd_get_nb_id(m->extcpu); 588 + amd_decode_nb_mce(node, m, 0); 589 + break; 590 + 591 + case 5: 592 + amd_decode_fr_mce(m); 593 + break; 594 + 595 + default: 596 + break; 597 + } 598 + 599 + amd_decode_err_code(m->status & 0xffff); 600 + 601 + return NOTIFY_STOP; 602 + } 603 + EXPORT_SYMBOL_GPL(amd_decode_mce); 604 + 605 + static struct notifier_block amd_mce_dec_nb = { 606 + .notifier_call = amd_decode_mce, 607 + }; 608 + 609 + static int __init mce_amd_init(void) 610 + { 611 + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) 612 + return 0; 613 + 614 + if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) && 615 + (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf)) 616 + return 0; 617 + 618 + fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL); 619 + if (!fam_ops) 620 + return -ENOMEM; 621 + 622 + switch (boot_cpu_data.x86) { 623 + case 0xf: 624 + fam_ops->dc_mce = k8_dc_mce; 625 + fam_ops->ic_mce = k8_ic_mce; 626 + fam_ops->nb_mce = k8_nb_mce; 627 + break; 628 + 629 + case 0x10: 630 + fam_ops->dc_mce = f10h_dc_mce; 631 + fam_ops->ic_mce = k8_ic_mce; 632 + fam_ops->nb_mce = f10h_nb_mce; 633 + break; 634 + 635 + case 0x11: 636 + fam_ops->dc_mce = k8_dc_mce; 637 + fam_ops->ic_mce = k8_ic_mce; 638 + fam_ops->nb_mce = f10h_nb_mce; 639 + break; 640 + 641 + case 0x12: 642 + fam_ops->dc_mce = f12h_dc_mce; 643 + fam_ops->ic_mce = k8_ic_mce; 644 + fam_ops->nb_mce = nb_noop_mce; 645 + break; 646 + 647 + case 0x14: 648 + nb_err_cpumask = 0x3; 649 + fam_ops->dc_mce = f14h_dc_mce; 650 + fam_ops->ic_mce = f14h_ic_mce; 651 + fam_ops->nb_mce = nb_noop_mce; 652 + break; 653 + 654 + default: 655 + printk(KERN_WARNING "Huh? What family is that: %d?!\n", 656 + boot_cpu_data.x86); 657 + kfree(fam_ops); 658 + return -EINVAL; 659 + } 660 + 661 + pr_info("MCE: In-kernel MCE decoding enabled.\n"); 662 + 663 + atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); 664 + 665 + return 0; 666 + } 667 + early_initcall(mce_amd_init); 668 + 669 + #ifdef MODULE 670 + static void __exit mce_amd_exit(void) 671 + { 672 + atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); 673 + kfree(fam_ops); 674 + } 675 + 676 + MODULE_DESCRIPTION("AMD MCE decoder"); 677 + MODULE_ALIAS("edac-mce-amd"); 678 + MODULE_LICENSE("GPL"); 679 + module_exit(mce_amd_exit); 680 + #endif
+171
drivers/edac/mce_amd_inj.c
··· 1 + /* 2 + * A simple MCE injection facility for testing the MCE decoding code. This 3 + * driver should be built as module so that it can be loaded on production 4 + * kernels for testing purposes. 5 + * 6 + * This file may be distributed under the terms of the GNU General Public 7 + * License version 2. 8 + * 9 + * Copyright (c) 2010: Borislav Petkov <borislav.petkov@amd.com> 10 + * Advanced Micro Devices Inc. 11 + */ 12 + 13 + #include <linux/kobject.h> 14 + #include <linux/sysdev.h> 15 + #include <linux/edac.h> 16 + #include <asm/mce.h> 17 + 18 + #include "mce_amd.h" 19 + 20 + struct edac_mce_attr { 21 + struct attribute attr; 22 + ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf); 23 + ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr, 24 + const char *buf, size_t count); 25 + }; 26 + 27 + #define EDAC_MCE_ATTR(_name, _mode, _show, _store) \ 28 + static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store) 29 + 30 + static struct kobject *mce_kobj; 31 + 32 + /* 33 + * Collect all the MCi_XXX settings 34 + */ 35 + static struct mce i_mce; 36 + 37 + #define MCE_INJECT_STORE(reg) \ 38 + static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \ 39 + struct edac_mce_attr *attr, \ 40 + const char *data, size_t count)\ 41 + { \ 42 + int ret = 0; \ 43 + unsigned long value; \ 44 + \ 45 + ret = strict_strtoul(data, 16, &value); \ 46 + if (ret < 0) \ 47 + printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \ 48 + \ 49 + i_mce.reg = value; \ 50 + \ 51 + return count; \ 52 + } 53 + 54 + MCE_INJECT_STORE(status); 55 + MCE_INJECT_STORE(misc); 56 + MCE_INJECT_STORE(addr); 57 + 58 + #define MCE_INJECT_SHOW(reg) \ 59 + static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \ 60 + struct edac_mce_attr *attr, \ 61 + char *buf) \ 62 + { \ 63 + return sprintf(buf, "0x%016llx\n", i_mce.reg); \ 64 + } 65 + 66 + MCE_INJECT_SHOW(status); 67 + MCE_INJECT_SHOW(misc); 68 + MCE_INJECT_SHOW(addr); 69 + 70 + EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store); 71 + EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store); 72 + EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store); 73 + 74 + /* 75 + * This denotes into which bank we're injecting and triggers 76 + * the injection, at the same time. 77 + */ 78 + static ssize_t edac_inject_bank_store(struct kobject *kobj, 79 + struct edac_mce_attr *attr, 80 + const char *data, size_t count) 81 + { 82 + int ret = 0; 83 + unsigned long value; 84 + 85 + ret = strict_strtoul(data, 10, &value); 86 + if (ret < 0) { 87 + printk(KERN_ERR "Invalid bank value!\n"); 88 + return -EINVAL; 89 + } 90 + 91 + if (value > 5) { 92 + printk(KERN_ERR "Non-existant MCE bank: %lu\n", value); 93 + return -EINVAL; 94 + } 95 + 96 + i_mce.bank = value; 97 + 98 + amd_decode_mce(NULL, 0, &i_mce); 99 + 100 + return count; 101 + } 102 + 103 + static ssize_t edac_inject_bank_show(struct kobject *kobj, 104 + struct edac_mce_attr *attr, char *buf) 105 + { 106 + return sprintf(buf, "%d\n", i_mce.bank); 107 + } 108 + 109 + EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store); 110 + 111 + static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc, 112 + &mce_attr_addr, &mce_attr_bank 113 + }; 114 + 115 + static int __init edac_init_mce_inject(void) 116 + { 117 + struct sysdev_class *edac_class = NULL; 118 + int i, err = 0; 119 + 120 + edac_class = edac_get_sysfs_class(); 121 + if (!edac_class) 122 + return -EINVAL; 123 + 124 + mce_kobj = kobject_create_and_add("mce", &edac_class->kset.kobj); 125 + if (!mce_kobj) { 126 + printk(KERN_ERR "Error creating a mce kset.\n"); 127 + err = -ENOMEM; 128 + goto err_mce_kobj; 129 + } 130 + 131 + for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) { 132 + err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr); 133 + if (err) { 134 + printk(KERN_ERR "Error creating %s in sysfs.\n", 135 + sysfs_attrs[i]->attr.name); 136 + goto err_sysfs_create; 137 + } 138 + } 139 + return 0; 140 + 141 + err_sysfs_create: 142 + while (i-- >= 0) 143 + sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); 144 + 145 + kobject_del(mce_kobj); 146 + 147 + err_mce_kobj: 148 + edac_put_sysfs_class(); 149 + 150 + return err; 151 + } 152 + 153 + static void __exit edac_exit_mce_inject(void) 154 + { 155 + int i; 156 + 157 + for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) 158 + sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); 159 + 160 + kobject_del(mce_kobj); 161 + 162 + edac_put_sysfs_class(); 163 + } 164 + 165 + module_init(edac_init_mce_inject); 166 + module_exit(edac_exit_mce_inject); 167 + 168 + MODULE_LICENSE("GPL"); 169 + MODULE_AUTHOR("Borislav Petkov <borislav.petkov@amd.com>"); 170 + MODULE_AUTHOR("AMD Inc."); 171 + MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding");
+4
include/linux/edac.h
··· 13 13 #define _LINUX_EDAC_H_ 14 14 15 15 #include <asm/atomic.h> 16 + #include <linux/sysdev.h> 16 17 17 18 #define EDAC_OPSTATE_INVAL -1 18 19 #define EDAC_OPSTATE_POLL 0 ··· 23 22 extern int edac_op_state; 24 23 extern int edac_err_assert; 25 24 extern atomic_t edac_handlers; 25 + extern struct sysdev_class edac_class; 26 26 27 27 extern int edac_handler_set(void); 28 28 extern void edac_atomic_assert_error(void); 29 + extern struct sysdev_class *edac_get_sysfs_class(void); 30 + extern void edac_put_sysfs_class(void); 29 31 30 32 static inline void opstate_init(void) 31 33 {