Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

amd64_edac: Simplify decoding path

Use the struct mce directly instead of copying from it into a custom
struct err_regs.

No functionality change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>

+35 -65
+27 -49
drivers/edac/amd64_edac.c
··· 788 788 789 789 static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); 790 790 791 - static u16 extract_syndrome(struct err_regs *err) 792 - { 793 - return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00); 794 - } 795 - 796 791 /* 797 792 * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs 798 793 * are ECC capable. ··· 970 975 return (flag) ? 2 : 1; 971 976 } 972 977 973 - /* Extract the ERROR ADDRESS for the K8 CPUs */ 974 - static u64 k8_get_error_address(struct mem_ctl_info *mci, 975 - struct err_regs *info) 978 + static u64 k8_get_error_address(struct mem_ctl_info *mci, struct mce *m) 976 979 { 977 - return (((u64) (info->nbeah & 0xff)) << 32) + 978 - (info->nbeal & ~0x03); 980 + /* ErrAddr[39:3] */ 981 + return m->addr & GENMASK(3, 39); 979 982 } 980 983 981 984 static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) ··· 993 1000 amd64_read_pci_cfg(pvt->F1, DRAM_LIMIT_HI + off, &pvt->ranges[range].lim.hi); 994 1001 } 995 1002 996 - static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, 997 - struct err_regs *err_info, u64 sys_addr) 1003 + static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, 1004 + u16 syndrome) 998 1005 { 999 1006 struct mem_ctl_info *src_mci; 1007 + struct amd64_pvt *pvt = mci->pvt_info; 1000 1008 int channel, csrow; 1001 1009 u32 page, offset; 1002 - u16 syndrome; 1003 - 1004 - syndrome = extract_syndrome(err_info); 1005 1010 1006 1011 /* CHIPKILL enabled */ 1007 - if (err_info->nbcfg & NBCFG_CHIPKILL) { 1012 + if (pvt->nbcfg & NBCFG_CHIPKILL) { 1008 1013 channel = get_channel_from_ecc_syndrome(mci, syndrome); 1009 1014 if (channel < 0) { 1010 1015 /* ··· 1127 1136 return dbam_map[cs_mode]; 1128 1137 } 1129 1138 1130 - static u64 f10_get_error_address(struct mem_ctl_info *mci, 1131 - struct err_regs *info) 1139 + static u64 f10_get_error_address(struct mem_ctl_info *mci, struct mce *m) 1132 1140 { 1133 - return (((u64) (info->nbeah & 0xffff)) << 32) + 1134 - (info->nbeal & ~0x01); 1141 + return m->addr & GENMASK(1, 47); 1135 1142 } 1136 1143 1137 1144 static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) ··· 1423 1434 * The @sys_addr is usually an error address received from the hardware 1424 1435 * (MCX_ADDR). 1425 1436 */ 1426 - static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, 1427 - struct err_regs *err_info, 1428 - u64 sys_addr) 1437 + static void f10_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, 1438 + u16 syndrome) 1429 1439 { 1430 1440 struct amd64_pvt *pvt = mci->pvt_info; 1431 1441 u32 page, offset; 1432 1442 int nid, csrow, chan = 0; 1433 - u16 syndrome; 1434 1443 1435 1444 csrow = f10_translate_sysaddr_to_cs(pvt, sys_addr, &nid, &chan); 1436 1445 ··· 1438 1451 } 1439 1452 1440 1453 error_address_to_page_and_offset(sys_addr, &page, &offset); 1441 - 1442 - syndrome = extract_syndrome(err_info); 1443 1454 1444 1455 /* 1445 1456 * We need the syndromes for channel detection only when we're ··· 1725 1740 * Handle any Correctable Errors (CEs) that have occurred. Check for valid ERROR 1726 1741 * ADDRESS and process. 1727 1742 */ 1728 - static void amd64_handle_ce(struct mem_ctl_info *mci, 1729 - struct err_regs *info) 1743 + static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m) 1730 1744 { 1731 1745 struct amd64_pvt *pvt = mci->pvt_info; 1732 1746 u64 sys_addr; 1747 + u16 syndrome; 1733 1748 1734 1749 /* Ensure that the Error Address is VALID */ 1735 - if (!(info->nbsh & NBSH_VALID_ERROR_ADDR)) { 1750 + if (!(m->status & MCI_STATUS_ADDRV)) { 1736 1751 amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); 1737 1752 edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR); 1738 1753 return; 1739 1754 } 1740 1755 1741 - sys_addr = pvt->ops->get_error_address(mci, info); 1756 + sys_addr = pvt->ops->get_error_address(mci, m); 1757 + syndrome = extract_syndrome(m->status); 1742 1758 1743 1759 amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr); 1744 1760 1745 - pvt->ops->map_sysaddr_to_csrow(mci, info, sys_addr); 1761 + pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome); 1746 1762 } 1747 1763 1748 1764 /* Handle any Un-correctable Errors (UEs) */ 1749 - static void amd64_handle_ue(struct mem_ctl_info *mci, 1750 - struct err_regs *info) 1765 + static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) 1751 1766 { 1752 1767 struct amd64_pvt *pvt = mci->pvt_info; 1753 1768 struct mem_ctl_info *log_mci, *src_mci = NULL; ··· 1757 1772 1758 1773 log_mci = mci; 1759 1774 1760 - if (!(info->nbsh & NBSH_VALID_ERROR_ADDR)) { 1775 + if (!(m->status & MCI_STATUS_ADDRV)) { 1761 1776 amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n"); 1762 1777 edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR); 1763 1778 return; 1764 1779 } 1765 1780 1766 - sys_addr = pvt->ops->get_error_address(mci, info); 1781 + sys_addr = pvt->ops->get_error_address(mci, m); 1767 1782 1768 1783 /* 1769 1784 * Find out which node the error address belongs to. This may be ··· 1791 1806 } 1792 1807 1793 1808 static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, 1794 - struct err_regs *info) 1809 + struct mce *m) 1795 1810 { 1796 - u16 ec = EC(info->nbsl); 1797 - u8 xec = XEC(info->nbsl, 0x1f); 1798 - int ecc_type = (info->nbsh >> 13) & 0x3; 1811 + u16 ec = EC(m->status); 1812 + u8 xec = XEC(m->status, 0x1f); 1813 + u8 ecc_type = (m->status >> 45) & 0x3; 1799 1814 1800 1815 /* Bail early out if this was an 'observed' error */ 1801 1816 if (PP(ec) == NBSL_PP_OBS) ··· 1806 1821 return; 1807 1822 1808 1823 if (ecc_type == 2) 1809 - amd64_handle_ce(mci, info); 1824 + amd64_handle_ce(mci, m); 1810 1825 else if (ecc_type == 1) 1811 - amd64_handle_ue(mci, info); 1826 + amd64_handle_ue(mci, m); 1812 1827 } 1813 1828 1814 1829 void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg) 1815 1830 { 1816 1831 struct mem_ctl_info *mci = mcis[node_id]; 1817 - struct err_regs regs; 1818 1832 1819 - regs.nbsl = (u32) m->status; 1820 - regs.nbsh = (u32)(m->status >> 32); 1821 - regs.nbeal = (u32) m->addr; 1822 - regs.nbeah = (u32)(m->addr >> 32); 1823 - regs.nbcfg = nbcfg; 1824 - 1825 - __amd64_decode_bus_error(mci, &regs); 1833 + __amd64_decode_bus_error(mci, m); 1826 1834 } 1827 1835 1828 1836 /*
+8 -4
drivers/edac/amd64_edac.h
··· 397 397 return (((u64)pvt->ranges[i].lim.hi & 0x000000ff) << 40) | lim; 398 398 } 399 399 400 + static inline u16 extract_syndrome(u64 status) 401 + { 402 + return ((status >> 47) & 0xff) | ((status >> 16) & 0xff00); 403 + } 404 + 400 405 /* 401 406 * per-node ECC settings descriptor 402 407 */ ··· 445 440 struct low_ops { 446 441 int (*early_channel_count) (struct amd64_pvt *pvt); 447 442 448 - u64 (*get_error_address) (struct mem_ctl_info *mci, 449 - struct err_regs *info); 443 + u64 (*get_error_address) (struct mem_ctl_info *mci, struct mce *m); 450 444 void (*read_dram_ctl_register) (struct amd64_pvt *pvt); 451 - void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, 452 - struct err_regs *info, u64 SystemAddr); 445 + void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, 446 + u16 syndrome); 453 447 int (*dbam_to_cs) (struct amd64_pvt *pvt, int cs_mode); 454 448 int (*read_dct_pci_cfg) (struct amd64_pvt *pvt, int offset, 455 449 u32 *val, const char *func);
-12
drivers/edac/mce_amd.h
··· 34 34 /* 35 35 * F3x4C bits (MCi_STATUS' high half) 36 36 */ 37 - #define NBSH_VALID_ERROR_ADDR BIT(26) 38 37 #define NBSH_ERR_CPU_VAL BIT(24) 39 38 40 39 enum tt_ids { ··· 75 76 extern const char *pp_msgs[]; 76 77 extern const char *to_msgs[]; 77 78 extern const char *ii_msgs[]; 78 - 79 - /* 80 - * relevant NB regs 81 - */ 82 - struct err_regs { 83 - u32 nbcfg; 84 - u32 nbsh; 85 - u32 nbsl; 86 - u32 nbeah; 87 - u32 nbeal; 88 - }; 89 79 90 80 /* 91 81 * per-family decoder ops