commit d5fc1d517543857ea117fc57f23b394aa9784f06 · tjh.dev/kernel

+1

arch/x86/kernel/cpu/mcheck/mce.c

··· 600 */ 601 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { 602 mce_log(&m); 603 add_taint(TAINT_MACHINE_CHECK); 604 } 605

··· 600 */ 601 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { 602 mce_log(&m); 603 + atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); 604 add_taint(TAINT_MACHINE_CHECK); 605 } 606

-8

drivers/edac/Kconfig

··· 39 there're four debug levels (x=0,1,2,3 from low to high). 40 Usually you should select 'N'. 41 42 - config EDAC_DEBUG_VERBOSE 43 - bool "More verbose debugging" 44 - depends on EDAC_DEBUG 45 - help 46 - This option makes debugging information more verbose. 47 - Source file name and line number where debugging message 48 - printed will be added to debugging message. 49 - 50 config EDAC_DECODE_MCE 51 tristate "Decode MCEs in human-readable form (only on AMD for now)" 52 depends on CPU_SUP_AMD && X86_MCE

··· 39 there're four debug levels (x=0,1,2,3 from low to high). 40 Usually you should select 'N'. 41 42 config EDAC_DECODE_MCE 43 tristate "Decode MCEs in human-readable form (only on AMD for now)" 44 depends on CPU_SUP_AMD && X86_MCE

+55 -156

drivers/edac/amd64_edac.c

··· 160 return 0; 161 } 162 163 - static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 *bandwidth) 164 { 165 struct amd64_pvt *pvt = mci->pvt_info; 166 u32 min_scrubrate = 0x0; ··· 178 179 default: 180 amd64_printk(KERN_ERR, "Unsupported family!\n"); 181 - break; 182 } 183 - return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, *bandwidth, 184 - min_scrubrate); 185 } 186 187 static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) ··· 796 797 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); 798 799 static void amd64_cpu_display_info(struct amd64_pvt *pvt) 800 { 801 if (boot_cpu_data.x86 == 0x11) ··· 892 amd64_debug_display_dimm_sizes(0, pvt); 893 return; 894 } 895 896 /* Only if NOT ganged does dclr1 have valid info */ 897 if (!dct_ganging_enabled(pvt)) ··· 1109 } 1110 1111 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, 1112 - struct err_regs *info, 1113 - u64 sys_addr) 1114 { 1115 struct mem_ctl_info *src_mci; 1116 - unsigned short syndrome; 1117 int channel, csrow; 1118 u32 page, offset; 1119 1120 - /* Extract the syndrome parts and form a 16-bit syndrome */ 1121 - syndrome = HIGH_SYNDROME(info->nbsl) << 8; 1122 - syndrome |= LOW_SYNDROME(info->nbsh); 1123 1124 /* CHIPKILL enabled */ 1125 - if (info->nbcfg & K8_NBCFG_CHIPKILL) { 1126 channel = get_channel_from_ecc_syndrome(mci, syndrome); 1127 if (channel < 0) { 1128 /* ··· 1128 * as suspect. 1129 */ 1130 amd64_mc_printk(mci, KERN_WARNING, 1131 - "unknown syndrome 0x%x - possible error " 1132 - "reporting race\n", syndrome); 1133 edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); 1134 return; 1135 } ··· 1435 u64 chan_off; 1436 1437 if (hi_range_sel) { 1438 - if (!(dct_sel_base_addr & 0xFFFFF800) && 1439 hole_valid && (sys_addr >= 0x100000000ULL)) 1440 chan_off = hole_off << 16; 1441 else ··· 1659 * (MCX_ADDR). 1660 */ 1661 static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, 1662 - struct err_regs *info, 1663 u64 sys_addr) 1664 { 1665 struct amd64_pvt *pvt = mci->pvt_info; 1666 u32 page, offset; 1667 - unsigned short syndrome; 1668 int nid, csrow, chan = 0; 1669 1670 csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); 1671 ··· 1676 1677 error_address_to_page_and_offset(sys_addr, &page, &offset); 1678 1679 - syndrome = HIGH_SYNDROME(info->nbsl) << 8; 1680 - syndrome |= LOW_SYNDROME(info->nbsh); 1681 1682 /* 1683 * We need the syndromes for channel detection only when we're 1684 * ganged. Otherwise @chan should already contain the channel at 1685 * this point. 1686 */ 1687 - if (dct_ganging_enabled(pvt) && pvt->nbcfg & K8_NBCFG_CHIPKILL) 1688 chan = get_channel_from_ecc_syndrome(mci, syndrome); 1689 1690 if (chan >= 0) ··· 1882 }; 1883 1884 static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs, 1885 - int v_dim) 1886 { 1887 unsigned int i, err_sym; 1888 ··· 1959 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) 1960 { 1961 struct amd64_pvt *pvt = mci->pvt_info; 1962 - u32 value = 0; 1963 - int err_sym = 0; 1964 1965 - if (boot_cpu_data.x86 == 0x10) { 1966 - 1967 - amd64_read_pci_cfg(pvt->misc_f3_ctl, 0x180, &value); 1968 - 1969 - /* F3x180[EccSymbolSize]=1 => x8 symbols */ 1970 - if (boot_cpu_data.x86_model > 7 && 1971 - value & BIT(25)) { 1972 - err_sym = decode_syndrome(syndrome, x8_vectors, 1973 - ARRAY_SIZE(x8_vectors), 8); 1974 - return map_err_sym_to_channel(err_sym, 8); 1975 - } 1976 } 1977 - err_sym = decode_syndrome(syndrome, x4_vectors, ARRAY_SIZE(x4_vectors), 4); 1978 - return map_err_sym_to_channel(err_sym, 4); 1979 - } 1980 1981 - /* 1982 - * Check for valid error in the NB Status High register. If so, proceed to read 1983 - * NB Status Low, NB Address Low and NB Address High registers and store data 1984 - * into error structure. 1985 - * 1986 - * Returns: 1987 - * - 1: if hardware regs contains valid error info 1988 - * - 0: if no valid error is indicated 1989 - */ 1990 - static int amd64_get_error_info_regs(struct mem_ctl_info *mci, 1991 - struct err_regs *regs) 1992 - { 1993 - struct amd64_pvt *pvt; 1994 - struct pci_dev *misc_f3_ctl; 1995 - 1996 - pvt = mci->pvt_info; 1997 - misc_f3_ctl = pvt->misc_f3_ctl; 1998 - 1999 - if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSH, &regs->nbsh)) 2000 - return 0; 2001 - 2002 - if (!(regs->nbsh & K8_NBSH_VALID_BIT)) 2003 - return 0; 2004 - 2005 - /* valid error, read remaining error information registers */ 2006 - if (amd64_read_pci_cfg(misc_f3_ctl, K8_NBSL, &regs->nbsl) || 2007 - amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAL, &regs->nbeal) || 2008 - amd64_read_pci_cfg(misc_f3_ctl, K8_NBEAH, &regs->nbeah) || 2009 - amd64_read_pci_cfg(misc_f3_ctl, K8_NBCFG, &regs->nbcfg)) 2010 - return 0; 2011 - 2012 - return 1; 2013 - } 2014 - 2015 - /* 2016 - * This function is called to retrieve the error data from hardware and store it 2017 - * in the info structure. 2018 - * 2019 - * Returns: 2020 - * - 1: if a valid error is found 2021 - * - 0: if no error is found 2022 - */ 2023 - static int amd64_get_error_info(struct mem_ctl_info *mci, 2024 - struct err_regs *info) 2025 - { 2026 - struct amd64_pvt *pvt; 2027 - struct err_regs regs; 2028 - 2029 - pvt = mci->pvt_info; 2030 - 2031 - if (!amd64_get_error_info_regs(mci, info)) 2032 - return 0; 2033 - 2034 - /* 2035 - * Here's the problem with the K8's EDAC reporting: There are four 2036 - * registers which report pieces of error information. They are shared 2037 - * between CEs and UEs. Furthermore, contrary to what is stated in the 2038 - * BKDG, the overflow bit is never used! Every error always updates the 2039 - * reporting registers. 2040 - * 2041 - * Can you see the race condition? All four error reporting registers 2042 - * must be read before a new error updates them! There is no way to read 2043 - * all four registers atomically. The best than can be done is to detect 2044 - * that a race has occured and then report the error without any kind of 2045 - * precision. 2046 - * 2047 - * What is still positive is that errors are still reported and thus 2048 - * problems can still be detected - just not localized because the 2049 - * syndrome and address are spread out across registers. 2050 - * 2051 - * Grrrrr!!!!! Here's hoping that AMD fixes this in some future K8 rev. 2052 - * UEs and CEs should have separate register sets with proper overflow 2053 - * bits that are used! At very least the problem can be fixed by 2054 - * honoring the ErrValid bit in 'nbsh' and not updating registers - just 2055 - * set the overflow bit - unless the current error is CE and the new 2056 - * error is UE which would be the only situation for overwriting the 2057 - * current values. 2058 - */ 2059 - 2060 - regs = *info; 2061 - 2062 - /* Use info from the second read - most current */ 2063 - if (unlikely(!amd64_get_error_info_regs(mci, info))) 2064 - return 0; 2065 - 2066 - /* clear the error bits in hardware */ 2067 - pci_write_bits32(pvt->misc_f3_ctl, K8_NBSH, 0, K8_NBSH_VALID_BIT); 2068 - 2069 - /* Check for the possible race condition */ 2070 - if ((regs.nbsh != info->nbsh) || 2071 - (regs.nbsl != info->nbsl) || 2072 - (regs.nbeah != info->nbeah) || 2073 - (regs.nbeal != info->nbeal)) { 2074 - amd64_mc_printk(mci, KERN_WARNING, 2075 - "hardware STATUS read access race condition " 2076 - "detected!\n"); 2077 - return 0; 2078 - } 2079 - return 1; 2080 } 2081 2082 /* ··· 2080 * catastrophic. 2081 */ 2082 if (info->nbsh & K8_NBSH_OVERFLOW) 2083 - edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR "Error Overflow"); 2084 } 2085 2086 void amd64_decode_bus_error(int node_id, struct err_regs *regs) ··· 2099 if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2100 edac_mc_handle_ue_no_info(mci, "UE bit is set"); 2101 2102 - } 2103 - 2104 - /* 2105 - * The main polling 'check' function, called FROM the edac core to perform the 2106 - * error checking and if an error is encountered, error processing. 2107 - */ 2108 - static void amd64_check(struct mem_ctl_info *mci) 2109 - { 2110 - struct err_regs regs; 2111 - 2112 - if (amd64_get_error_info(mci, &regs)) { 2113 - struct amd64_pvt *pvt = mci->pvt_info; 2114 - amd_decode_nb_mce(pvt->mc_node_id, &regs, 1); 2115 - } 2116 } 2117 2118 /* ··· 2173 static void amd64_read_mc_registers(struct amd64_pvt *pvt) 2174 { 2175 u64 msr_val; 2176 int dram; 2177 2178 /* ··· 2239 amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); 2240 amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0); 2241 2242 - if (!dct_ganging_enabled(pvt) && boot_cpu_data.x86 >= 0x10) { 2243 - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); 2244 - amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1); 2245 } 2246 amd64_dump_misc_regs(pvt); 2247 } 2248 ··· 2640 mci->ctl_name = get_amd_family_name(pvt->mc_type_index); 2641 mci->dev_name = pci_name(pvt->dram_f2_ctl); 2642 mci->ctl_page_to_phys = NULL; 2643 - 2644 - /* IMPORTANT: Set the polling 'check' function in this module */ 2645 - mci->edac_check = amd64_check; 2646 2647 /* memory scrubber interface */ 2648 mci->set_sdram_scrub_rate = amd64_set_scrub_rate;

··· 160 return 0; 161 } 162 163 + static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth) 164 { 165 struct amd64_pvt *pvt = mci->pvt_info; 166 u32 min_scrubrate = 0x0; ··· 178 179 default: 180 amd64_printk(KERN_ERR, "Unsupported family!\n"); 181 + return -EINVAL; 182 } 183 + return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, bandwidth, 184 + min_scrubrate); 185 } 186 187 static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) ··· 796 797 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); 798 799 + static u16 extract_syndrome(struct err_regs *err) 800 + { 801 + return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00); 802 + } 803 + 804 static void amd64_cpu_display_info(struct amd64_pvt *pvt) 805 { 806 if (boot_cpu_data.x86 == 0x11) ··· 887 amd64_debug_display_dimm_sizes(0, pvt); 888 return; 889 } 890 + 891 + amd64_printk(KERN_INFO, "using %s syndromes.\n", 892 + ((pvt->syn_type == 8) ? "x8" : "x4")); 893 894 /* Only if NOT ganged does dclr1 have valid info */ 895 if (!dct_ganging_enabled(pvt)) ··· 1101 } 1102 1103 static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, 1104 + struct err_regs *err_info, u64 sys_addr) 1105 { 1106 struct mem_ctl_info *src_mci; 1107 int channel, csrow; 1108 u32 page, offset; 1109 + u16 syndrome; 1110 1111 + syndrome = extract_syndrome(err_info); 1112 1113 /* CHIPKILL enabled */ 1114 + if (err_info->nbcfg & K8_NBCFG_CHIPKILL) { 1115 channel = get_channel_from_ecc_syndrome(mci, syndrome); 1116 if (channel < 0) { 1117 /* ··· 1123 * as suspect. 1124 */ 1125 amd64_mc_printk(mci, KERN_WARNING, 1126 + "unknown syndrome 0x%04x - possible " 1127 + "error reporting race\n", syndrome); 1128 edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); 1129 return; 1130 } ··· 1430 u64 chan_off; 1431 1432 if (hi_range_sel) { 1433 + if (!(dct_sel_base_addr & 0xFFFF0000) && 1434 hole_valid && (sys_addr >= 0x100000000ULL)) 1435 chan_off = hole_off << 16; 1436 else ··· 1654 * (MCX_ADDR). 1655 */ 1656 static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, 1657 + struct err_regs *err_info, 1658 u64 sys_addr) 1659 { 1660 struct amd64_pvt *pvt = mci->pvt_info; 1661 u32 page, offset; 1662 int nid, csrow, chan = 0; 1663 + u16 syndrome; 1664 1665 csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); 1666 ··· 1671 1672 error_address_to_page_and_offset(sys_addr, &page, &offset); 1673 1674 + syndrome = extract_syndrome(err_info); 1675 1676 /* 1677 * We need the syndromes for channel detection only when we're 1678 * ganged. Otherwise @chan should already contain the channel at 1679 * this point. 1680 */ 1681 + if (dct_ganging_enabled(pvt) && (pvt->nbcfg & K8_NBCFG_CHIPKILL)) 1682 chan = get_channel_from_ecc_syndrome(mci, syndrome); 1683 1684 if (chan >= 0) ··· 1878 }; 1879 1880 static int decode_syndrome(u16 syndrome, u16 *vectors, int num_vecs, 1881 + int v_dim) 1882 { 1883 unsigned int i, err_sym; 1884 ··· 1955 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome) 1956 { 1957 struct amd64_pvt *pvt = mci->pvt_info; 1958 + int err_sym = -1; 1959 1960 + if (pvt->syn_type == 8) 1961 + err_sym = decode_syndrome(syndrome, x8_vectors, 1962 + ARRAY_SIZE(x8_vectors), 1963 + pvt->syn_type); 1964 + else if (pvt->syn_type == 4) 1965 + err_sym = decode_syndrome(syndrome, x4_vectors, 1966 + ARRAY_SIZE(x4_vectors), 1967 + pvt->syn_type); 1968 + else { 1969 + amd64_printk(KERN_WARNING, "%s: Illegal syndrome type: %u\n", 1970 + __func__, pvt->syn_type); 1971 + return err_sym; 1972 } 1973 1974 + return map_err_sym_to_channel(err_sym, pvt->syn_type); 1975 } 1976 1977 /* ··· 2177 * catastrophic. 2178 */ 2179 if (info->nbsh & K8_NBSH_OVERFLOW) 2180 + edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR " Error Overflow"); 2181 } 2182 2183 void amd64_decode_bus_error(int node_id, struct err_regs *regs) ··· 2196 if (regs->nbsh & K8_NBSH_UC_ERR && !report_gart_errors) 2197 edac_mc_handle_ue_no_info(mci, "UE bit is set"); 2198 2199 } 2200 2201 /* ··· 2284 static void amd64_read_mc_registers(struct amd64_pvt *pvt) 2285 { 2286 u64 msr_val; 2287 + u32 tmp; 2288 int dram; 2289 2290 /* ··· 2349 amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0); 2350 amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0); 2351 2352 + if (boot_cpu_data.x86 >= 0x10) { 2353 + if (!dct_ganging_enabled(pvt)) { 2354 + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1); 2355 + amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1); 2356 + } 2357 + amd64_read_pci_cfg(pvt->misc_f3_ctl, EXT_NB_MCA_CFG, &tmp); 2358 } 2359 + 2360 + if (boot_cpu_data.x86 == 0x10 && 2361 + boot_cpu_data.x86_model > 7 && 2362 + /* F3x180[EccSymbolSize]=1 => x8 symbols */ 2363 + tmp & BIT(25)) 2364 + pvt->syn_type = 8; 2365 + else 2366 + pvt->syn_type = 4; 2367 + 2368 amd64_dump_misc_regs(pvt); 2369 } 2370 ··· 2738 mci->ctl_name = get_amd_family_name(pvt->mc_type_index); 2739 mci->dev_name = pci_name(pvt->dram_f2_ctl); 2740 mci->ctl_page_to_phys = NULL; 2741 2742 /* memory scrubber interface */ 2743 mci->set_sdram_scrub_rate = amd64_set_scrub_rate;

+13 -35

drivers/edac/amd64_edac.h

··· 244 245 246 #define F10_DCTL_SEL_LOW 0x110 247 - 248 - #define dct_sel_baseaddr(pvt) \ 249 - ((pvt->dram_ctl_select_low) & 0xFFFFF800) 250 - 251 - #define dct_sel_interleave_addr(pvt) \ 252 - (((pvt->dram_ctl_select_low) >> 6) & 0x3) 253 - 254 - enum { 255 - F10_DCTL_SEL_LOW_DctSelHiRngEn = BIT(0), 256 - F10_DCTL_SEL_LOW_DctSelIntLvEn = BIT(2), 257 - F10_DCTL_SEL_LOW_DctGangEn = BIT(4), 258 - F10_DCTL_SEL_LOW_DctDatIntLv = BIT(5), 259 - F10_DCTL_SEL_LOW_DramEnable = BIT(8), 260 - F10_DCTL_SEL_LOW_MemCleared = BIT(10), 261 - }; 262 - 263 - #define dct_high_range_enabled(pvt) \ 264 - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelHiRngEn) 265 - 266 - #define dct_interleave_enabled(pvt) \ 267 - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctSelIntLvEn) 268 - 269 - #define dct_ganging_enabled(pvt) \ 270 - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctGangEn) 271 - 272 - #define dct_data_intlv_enabled(pvt) \ 273 - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DctDatIntLv) 274 - 275 - #define dct_dram_enabled(pvt) \ 276 - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_DramEnable) 277 - 278 - #define dct_memory_cleared(pvt) \ 279 - (pvt->dram_ctl_select_low & F10_DCTL_SEL_LOW_MemCleared) 280 - 281 282 #define F10_DCTL_SEL_HIGH 0x114 283 - 284 285 /* 286 * Function 3 - Misc Control ··· 355 #define K8_NBCAP_SECDED BIT(3) 356 #define K8_NBCAP_DCT_DUAL BIT(0) 357 358 /* MSRs */ 359 #define K8_MSR_MCGCTL_NBE BIT(4) 360 ··· 445 u32 dram_ctl_select_low; /* DRAM Controller Select Low Reg */ 446 u32 dram_ctl_select_high; /* DRAM Controller Select High Reg */ 447 u32 online_spare; /* On-Line spare Reg */ 448 449 /* temp storage for when input is received from sysfs */ 450 struct err_regs ctl_error_info;

··· 244 245 246 #define F10_DCTL_SEL_LOW 0x110 247 + #define dct_sel_baseaddr(pvt) ((pvt->dram_ctl_select_low) & 0xFFFFF800) 248 + #define dct_sel_interleave_addr(pvt) (((pvt->dram_ctl_select_low) >> 6) & 0x3) 249 + #define dct_high_range_enabled(pvt) (pvt->dram_ctl_select_low & BIT(0)) 250 + #define dct_interleave_enabled(pvt) (pvt->dram_ctl_select_low & BIT(2)) 251 + #define dct_ganging_enabled(pvt) (pvt->dram_ctl_select_low & BIT(4)) 252 + #define dct_data_intlv_enabled(pvt) (pvt->dram_ctl_select_low & BIT(5)) 253 + #define dct_dram_enabled(pvt) (pvt->dram_ctl_select_low & BIT(8)) 254 + #define dct_memory_cleared(pvt) (pvt->dram_ctl_select_low & BIT(10)) 255 256 #define F10_DCTL_SEL_HIGH 0x114 257 258 /* 259 * Function 3 - Misc Control ··· 382 #define K8_NBCAP_SECDED BIT(3) 383 #define K8_NBCAP_DCT_DUAL BIT(0) 384 385 + #define EXT_NB_MCA_CFG 0x180 386 + 387 /* MSRs */ 388 #define K8_MSR_MCGCTL_NBE BIT(4) 389 ··· 470 u32 dram_ctl_select_low; /* DRAM Controller Select Low Reg */ 471 u32 dram_ctl_select_high; /* DRAM Controller Select High Reg */ 472 u32 online_spare; /* On-Line spare Reg */ 473 + 474 + /* x4 or x8 syndromes in use */ 475 + u8 syn_type; 476 477 /* temp storage for when input is received from sysfs */ 478 struct err_regs ctl_error_info;

+2 -2

drivers/edac/e752x_edac.c

··· 958 } 959 960 /* Program byte/sec bandwidth scrub rate to hardware */ 961 - static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *new_bw) 962 { 963 const struct scrubrate *scrubrates; 964 struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; ··· 975 * desired rate and program the cooresponding register value. 976 */ 977 for (i = 0; scrubrates[i].bandwidth != SDRATE_EOT; i++) 978 - if (scrubrates[i].bandwidth >= *new_bw) 979 break; 980 981 if (scrubrates[i].bandwidth == SDRATE_EOT)

··· 958 } 959 960 /* Program byte/sec bandwidth scrub rate to hardware */ 961 + static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw) 962 { 963 const struct scrubrate *scrubrates; 964 struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info; ··· 975 * desired rate and program the cooresponding register value. 976 */ 977 for (i = 0; scrubrates[i].bandwidth != SDRATE_EOT; i++) 978 + if (scrubrates[i].bandwidth >= new_bw) 979 break; 980 981 if (scrubrates[i].bandwidth == SDRATE_EOT)

+1 -16

drivers/edac/edac_core.h

··· 49 #define edac_printk(level, prefix, fmt, arg...) \ 50 printk(level "EDAC " prefix ": " fmt, ##arg) 51 52 - #define edac_printk_verbose(level, prefix, fmt, arg...) \ 53 - printk(level "EDAC " prefix ": " "in %s, line at %d: " fmt, \ 54 - __FILE__, __LINE__, ##arg) 55 - 56 #define edac_mc_printk(mci, level, fmt, arg...) \ 57 printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg) 58 59 #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ 60 printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) 61 62 - /* edac_device printk */ 63 #define edac_device_printk(ctl, level, fmt, arg...) \ 64 printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) 65 66 - /* edac_pci printk */ 67 #define edac_pci_printk(ctl, level, fmt, arg...) \ 68 printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) 69 ··· 70 extern int edac_debug_level; 71 extern const char *edac_mem_types[]; 72 73 - #ifndef CONFIG_EDAC_DEBUG_VERBOSE 74 #define edac_debug_printk(level, fmt, arg...) \ 75 do { \ 76 if (level <= edac_debug_level) \ 77 edac_printk(KERN_DEBUG, EDAC_DEBUG, \ 78 "%s: " fmt, __func__, ##arg); \ 79 } while (0) 80 - #else /* CONFIG_EDAC_DEBUG_VERBOSE */ 81 - #define edac_debug_printk(level, fmt, arg...) \ 82 - do { \ 83 - if (level <= edac_debug_level) \ 84 - edac_printk_verbose(KERN_DEBUG, EDAC_DEBUG, fmt, \ 85 - ##arg); \ 86 - } while (0) 87 - #endif 88 89 #define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ ) 90 #define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ ) ··· 378 internal representation and configures whatever else needs 379 to be configured. 380 */ 381 - int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw); 382 383 /* Get the current sdram memory scrub rate from the internal 384 representation and converts it to the closest matching

··· 49 #define edac_printk(level, prefix, fmt, arg...) \ 50 printk(level "EDAC " prefix ": " fmt, ##arg) 51 52 #define edac_mc_printk(mci, level, fmt, arg...) \ 53 printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg) 54 55 #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \ 56 printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg) 57 58 #define edac_device_printk(ctl, level, fmt, arg...) \ 59 printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg) 60 61 #define edac_pci_printk(ctl, level, fmt, arg...) \ 62 printk(level "EDAC PCI%d: " fmt, ctl->pci_idx, ##arg) 63 ··· 76 extern int edac_debug_level; 77 extern const char *edac_mem_types[]; 78 79 #define edac_debug_printk(level, fmt, arg...) \ 80 do { \ 81 if (level <= edac_debug_level) \ 82 edac_printk(KERN_DEBUG, EDAC_DEBUG, \ 83 "%s: " fmt, __func__, ##arg); \ 84 } while (0) 85 86 #define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ ) 87 #define debugf1( ... ) edac_debug_printk(1, __VA_ARGS__ ) ··· 393 internal representation and configures whatever else needs 394 to be configured. 395 */ 396 + int (*set_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 bw); 397 398 /* Get the current sdram memory scrub rate from the internal 399 representation and converts it to the closest matching

+37 -49

drivers/edac/edac_mc_sysfs.c

··· 124 [EDAC_S16ECD16ED] = "S16ECD16ED" 125 }; 126 127 - 128 - 129 - static ssize_t memctrl_int_store(void *ptr, const char *buffer, size_t count) 130 - { 131 - int *value = (int *)ptr; 132 - 133 - if (isdigit(*buffer)) 134 - *value = simple_strtoul(buffer, NULL, 0); 135 - 136 - return count; 137 - } 138 - 139 - 140 /* EDAC sysfs CSROW data structures and methods 141 */ 142 ··· 437 438 /* memory scrubbing */ 439 static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, 440 - const char *data, size_t count) 441 { 442 - u32 bandwidth = -1; 443 444 - if (mci->set_sdram_scrub_rate) { 445 - 446 - memctrl_int_store(&bandwidth, data, count); 447 - 448 - if (!(*mci->set_sdram_scrub_rate) (mci, &bandwidth)) { 449 - edac_printk(KERN_DEBUG, EDAC_MC, 450 - "Scrub rate set successfully, applied: %d\n", 451 - bandwidth); 452 - } else { 453 - /* FIXME: error codes maybe? */ 454 - edac_printk(KERN_DEBUG, EDAC_MC, 455 - "Scrub rate set FAILED, could not apply: %d\n", 456 - bandwidth); 457 - } 458 - } else { 459 - /* FIXME: produce "not implemented" ERROR for user-side. */ 460 edac_printk(KERN_WARNING, EDAC_MC, 461 - "Memory scrubbing 'set'control is not implemented!\n"); 462 } 463 - return count; 464 } 465 466 static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) 467 { 468 - u32 bandwidth = -1; 469 470 - if (mci->get_sdram_scrub_rate) { 471 - if (!(*mci->get_sdram_scrub_rate) (mci, &bandwidth)) { 472 - edac_printk(KERN_DEBUG, EDAC_MC, 473 - "Scrub rate successfully, fetched: %d\n", 474 - bandwidth); 475 - } else { 476 - /* FIXME: error codes maybe? */ 477 - edac_printk(KERN_DEBUG, EDAC_MC, 478 - "Scrub rate fetch FAILED, got: %d\n", 479 - bandwidth); 480 - } 481 - } else { 482 - /* FIXME: produce "not implemented" ERROR for user-side. */ 483 edac_printk(KERN_WARNING, EDAC_MC, 484 - "Memory scrubbing 'get' control is not implemented\n"); 485 } 486 - return sprintf(data, "%d\n", bandwidth); 487 } 488 489 /* default attribute files for the MCI object */

··· 124 [EDAC_S16ECD16ED] = "S16ECD16ED" 125 }; 126 127 /* EDAC sysfs CSROW data structures and methods 128 */ 129 ··· 450 451 /* memory scrubbing */ 452 static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci, 453 + const char *data, size_t count) 454 { 455 + unsigned long bandwidth = 0; 456 + int err; 457 458 + if (!mci->set_sdram_scrub_rate) { 459 edac_printk(KERN_WARNING, EDAC_MC, 460 + "Memory scrub rate setting not implemented!\n"); 461 + return -EINVAL; 462 } 463 + 464 + if (strict_strtoul(data, 10, &bandwidth) < 0) 465 + return -EINVAL; 466 + 467 + err = mci->set_sdram_scrub_rate(mci, (u32)bandwidth); 468 + if (err) { 469 + edac_printk(KERN_DEBUG, EDAC_MC, 470 + "Failed setting scrub rate to %lu\n", bandwidth); 471 + return -EINVAL; 472 + } 473 + else { 474 + edac_printk(KERN_DEBUG, EDAC_MC, 475 + "Scrub rate set to: %lu\n", bandwidth); 476 + return count; 477 + } 478 } 479 480 static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data) 481 { 482 + u32 bandwidth = 0; 483 + int err; 484 485 + if (!mci->get_sdram_scrub_rate) { 486 edac_printk(KERN_WARNING, EDAC_MC, 487 + "Memory scrub rate reading not implemented\n"); 488 + return -EINVAL; 489 } 490 + 491 + err = mci->get_sdram_scrub_rate(mci, &bandwidth); 492 + if (err) { 493 + edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); 494 + return err; 495 + } 496 + else { 497 + edac_printk(KERN_DEBUG, EDAC_MC, 498 + "Read scrub rate: %d\n", bandwidth); 499 + return sprintf(data, "%d\n", bandwidth); 500 + } 501 } 502 503 /* default attribute files for the MCI object */

+8 -8

drivers/edac/edac_mce_amd.c

··· 133 u32 ec = mc0_status & 0xffff; 134 u32 xec = (mc0_status >> 16) & 0xf; 135 136 - pr_emerg(" Data Cache Error"); 137 138 if (xec == 1 && TLB_ERROR(ec)) 139 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); ··· 176 u32 ec = mc1_status & 0xffff; 177 u32 xec = (mc1_status >> 16) & 0xf; 178 179 - pr_emerg(" Instruction Cache Error"); 180 181 if (xec == 1 && TLB_ERROR(ec)) 182 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); ··· 233 u32 ec = mc2_status & 0xffff; 234 u32 xec = (mc2_status >> 16) & 0xf; 235 236 - pr_emerg(" Bus Unit Error"); 237 238 if (xec == 0x1) 239 pr_cont(" in the write data buffers.\n"); ··· 275 u32 ec = mc3_status & 0xffff; 276 u32 xec = (mc3_status >> 16) & 0xf; 277 278 - pr_emerg(" Load Store Error"); 279 280 if (xec == 0x0) { 281 u8 rrrr = (ec >> 4) & 0xf; ··· 304 if (TLB_ERROR(ec) && !report_gart_errors) 305 return; 306 307 - pr_emerg(" Northbridge Error, node %d", node_id); 308 309 /* 310 * F10h, revD can disable ErrCpu[3:0] so check that first and also the ··· 342 static inline void amd_decode_err_code(unsigned int ec) 343 { 344 if (TLB_ERROR(ec)) { 345 - pr_emerg(" Transaction: %s, Cache Level %s\n", 346 TT_MSG(ec), LL_MSG(ec)); 347 } else if (MEM_ERROR(ec)) { 348 - pr_emerg(" Transaction: %s, Type: %s, Cache Level: %s", 349 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); 350 } else if (BUS_ERROR(ec)) { 351 - pr_emerg(" Transaction type: %s(%s), %s, Cache Level: %s, " 352 "Participating Processor: %s\n", 353 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), 354 PP_MSG(ec));

··· 133 u32 ec = mc0_status & 0xffff; 134 u32 xec = (mc0_status >> 16) & 0xf; 135 136 + pr_emerg("Data Cache Error"); 137 138 if (xec == 1 && TLB_ERROR(ec)) 139 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); ··· 176 u32 ec = mc1_status & 0xffff; 177 u32 xec = (mc1_status >> 16) & 0xf; 178 179 + pr_emerg("Instruction Cache Error"); 180 181 if (xec == 1 && TLB_ERROR(ec)) 182 pr_cont(": %s TLB multimatch.\n", LL_MSG(ec)); ··· 233 u32 ec = mc2_status & 0xffff; 234 u32 xec = (mc2_status >> 16) & 0xf; 235 236 + pr_emerg("Bus Unit Error"); 237 238 if (xec == 0x1) 239 pr_cont(" in the write data buffers.\n"); ··· 275 u32 ec = mc3_status & 0xffff; 276 u32 xec = (mc3_status >> 16) & 0xf; 277 278 + pr_emerg("Load Store Error"); 279 280 if (xec == 0x0) { 281 u8 rrrr = (ec >> 4) & 0xf; ··· 304 if (TLB_ERROR(ec) && !report_gart_errors) 305 return; 306 307 + pr_emerg("Northbridge Error, node %d", node_id); 308 309 /* 310 * F10h, revD can disable ErrCpu[3:0] so check that first and also the ··· 342 static inline void amd_decode_err_code(unsigned int ec) 343 { 344 if (TLB_ERROR(ec)) { 345 + pr_emerg("Transaction: %s, Cache Level %s\n", 346 TT_MSG(ec), LL_MSG(ec)); 347 } else if (MEM_ERROR(ec)) { 348 + pr_emerg("Transaction: %s, Type: %s, Cache Level: %s", 349 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec)); 350 } else if (BUS_ERROR(ec)) { 351 + pr_emerg("Transaction type: %s(%s), %s, Cache Level: %s, " 352 "Participating Processor: %s\n", 353 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec), 354 PP_MSG(ec));

+3 -4

drivers/edac/i5100_edac.c

··· 589 /* 590 * The bandwidth is based on experimentation, feel free to refine it. 591 */ 592 - static int i5100_set_scrub_rate(struct mem_ctl_info *mci, 593 - u32 *bandwidth) 594 { 595 struct i5100_priv *priv = mci->pvt_info; 596 u32 dw; 597 598 pci_read_config_dword(priv->mc, I5100_MC, &dw); 599 - if (*bandwidth) { 600 priv->scrub_enable = 1; 601 dw |= I5100_MC_SCRBEN_MASK; 602 schedule_delayed_work(&(priv->i5100_scrubbing), ··· 609 610 pci_read_config_dword(priv->mc, I5100_MC, &dw); 611 612 - *bandwidth = 5900000 * i5100_mc_scrben(dw); 613 614 return 0; 615 }

··· 589 /* 590 * The bandwidth is based on experimentation, feel free to refine it. 591 */ 592 + static int i5100_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth) 593 { 594 struct i5100_priv *priv = mci->pvt_info; 595 u32 dw; 596 597 pci_read_config_dword(priv->mc, I5100_MC, &dw); 598 + if (bandwidth) { 599 priv->scrub_enable = 1; 600 dw |= I5100_MC_SCRBEN_MASK; 601 schedule_delayed_work(&(priv->i5100_scrubbing), ··· 610 611 pci_read_config_dword(priv->mc, I5100_MC, &dw); 612 613 + bandwidth = 5900000 * i5100_mc_scrben(dw); 614 615 return 0; 616 }