Merge tag 'edac_updates_for_v6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras

Pull EDAC updates from Borislav Petkov:

- ie31200: Add support for Raptor Lake-S and Alder Lake-S compute dies

- Rework how RRL registers per channel tracking is done in order to
support newer hardware with different RRL configurations and refactor
that code. Add support for Granite Rapids server

- i10nm: explicitly set RRL modes to fix any wrong BIOS programming

- Properly save and restore Retry Read error Log channel configuration
info on Intel drivers

- igen6: Handle correctly the case of fused off memory controllers on
Arizona Beach and Amston Lake SoCs before adding support for them

- the usual set of fixes and cleanups

* tag 'edac_updates_for_v6.16' of git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras:
EDAC/bluefield: Don't use bluefield_edac_readl() result on error
EDAC/i10nm: Fix the bitwise operation between variables of different sizes
EDAC/ie31200: Add two Intel SoCs for EDAC support
EDAC/{skx_common,i10nm}: Add RRL support for Intel Granite Rapids server
EDAC/{skx_common,i10nm}: Refactor show_retry_rd_err_log()
EDAC/{skx_common,i10nm}: Refactor enable_retry_rd_err_log()
EDAC/{skx_common,i10nm}: Structure the per-channel RRL registers
EDAC/i10nm: Explicitly set the modes of the RRL register sets
EDAC/{skx_common,i10nm}: Fix the loss of saved RRL for HBM pseudo channel 0
EDAC/skx_common: Fix general protection fault
EDAC/igen6: Add Intel Amston Lake SoCs support
EDAC/igen6: Add Intel Arizona Beach SoCs support
EDAC/igen6: Skip absent memory controllers

+418 -227
+15 -5
drivers/edac/bluefield_edac.c
··· 199 199 * error without the detailed information. 200 200 */ 201 201 err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); 202 - if (err) 202 + if (err) { 203 203 dev_err(priv->dev, "DRAM syndrom read failed.\n"); 204 + return; 205 + } 204 206 205 207 serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); 206 208 derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); ··· 215 213 } 216 214 217 215 err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); 218 - if (err) 216 + if (err) { 219 217 dev_err(priv->dev, "DRAM additional info read failed.\n"); 218 + return; 219 + } 220 220 221 221 err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); 222 222 223 223 ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; 224 224 225 225 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); 226 - if (err) 226 + if (err) { 227 227 dev_err(priv->dev, "Error addr 0 read failed.\n"); 228 + return; 229 + } 228 230 229 231 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); 230 - if (err) 232 + if (err) { 231 233 dev_err(priv->dev, "Error addr 1 read failed.\n"); 234 + return; 235 + } 232 236 233 237 ecc_dimm_addr = ((u64)edea1 << 32) | edea0; 234 238 ··· 258 250 return; 259 251 260 252 err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); 261 - if (err) 253 + if (err) { 262 254 dev_err(priv->dev, "ECC count read failed.\n"); 255 + return; 256 + } 263 257 264 258 single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); 265 259 double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count);
+276 -195
drivers/edac/i10nm_base.c
··· 72 72 #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0) 73 73 #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5) 74 74 75 - #define RETRY_RD_ERR_LOG_UC BIT(1) 76 - #define RETRY_RD_ERR_LOG_NOOVER BIT(14) 77 - #define RETRY_RD_ERR_LOG_EN BIT(15) 78 - #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1)) 79 - #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0)) 80 - 81 75 static struct list_head *i10nm_edac_list; 82 76 83 77 static struct res_config *res_cfg; ··· 79 85 static int decoding_via_mca; 80 86 static bool mem_cfg_2lm; 81 87 82 - static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}; 83 - static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}; 84 - static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}; 85 - static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}; 86 - static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}; 87 - static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}; 88 - static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}; 89 - static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}; 90 - static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}; 88 + static struct reg_rrl icx_reg_rrl_ddr = { 89 + .set_num = 2, 90 + .reg_num = 6, 91 + .modes = {LRE_SCRUB, LRE_DEMAND}, 92 + .offsets = { 93 + {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8}, 94 + {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0}, 95 + }, 96 + .widths = {4, 4, 4, 4, 4, 8}, 97 + .v_mask = BIT(0), 98 + .uc_mask = BIT(1), 99 + .over_mask = BIT(2), 100 + .en_patspr_mask = BIT(13), 101 + .noover_mask = BIT(14), 102 + .en_mask = BIT(15), 91 103 92 - static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, 93 - u32 *offsets_scrub, u32 *offsets_demand, 94 - u32 *offsets_demand2) 104 + .cecnt_num = 4, 105 + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, 106 + .cecnt_widths = {4, 4, 4, 4}, 107 + }; 108 + 109 + static struct reg_rrl spr_reg_rrl_ddr = { 110 + .set_num = 3, 111 + .reg_num = 6, 112 + .modes = {LRE_SCRUB, LRE_DEMAND, FRE_DEMAND}, 113 + .offsets = { 114 + {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8}, 115 + {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0}, 116 + {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10}, 117 + }, 118 + .widths = {4, 4, 8, 4, 4, 8}, 119 + .v_mask = BIT(0), 120 + .uc_mask = BIT(1), 121 + .over_mask = BIT(2), 122 + .en_patspr_mask = BIT(13), 123 + .noover_mask = BIT(14), 124 + .en_mask = BIT(15), 125 + 126 + .cecnt_num = 4, 127 + .cecnt_offsets = {0x22c18, 0x22c1c, 0x22c20, 0x22c24}, 128 + .cecnt_widths = {4, 4, 4, 4}, 129 + }; 130 + 131 + static struct reg_rrl spr_reg_rrl_hbm_pch0 = { 132 + .set_num = 2, 133 + .reg_num = 6, 134 + .modes = {LRE_SCRUB, LRE_DEMAND}, 135 + .offsets = { 136 + {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8}, 137 + {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0}, 138 + }, 139 + .widths = {4, 4, 8, 4, 4, 8}, 140 + .v_mask = BIT(0), 141 + .uc_mask = BIT(1), 142 + .over_mask = BIT(2), 143 + .en_patspr_mask = BIT(13), 144 + .noover_mask = BIT(14), 145 + .en_mask = BIT(15), 146 + 147 + .cecnt_num = 4, 148 + .cecnt_offsets = {0x2818, 0x281c, 0x2820, 0x2824}, 149 + .cecnt_widths = {4, 4, 4, 4}, 150 + }; 151 + 152 + static struct reg_rrl spr_reg_rrl_hbm_pch1 = { 153 + .set_num = 2, 154 + .reg_num = 6, 155 + .modes = {LRE_SCRUB, LRE_DEMAND}, 156 + .offsets = { 157 + {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8}, 158 + {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0}, 159 + }, 160 + .widths = {4, 4, 8, 4, 4, 8}, 161 + .v_mask = BIT(0), 162 + .uc_mask = BIT(1), 163 + .over_mask = BIT(2), 164 + .en_patspr_mask = BIT(13), 165 + .noover_mask = BIT(14), 166 + .en_mask = BIT(15), 167 + 168 + .cecnt_num = 4, 169 + .cecnt_offsets = {0x2c18, 0x2c1c, 0x2c20, 0x2c24}, 170 + .cecnt_widths = {4, 4, 4, 4}, 171 + }; 172 + 173 + static struct reg_rrl gnr_reg_rrl_ddr = { 174 + .set_num = 4, 175 + .reg_num = 6, 176 + .modes = {FRE_SCRUB, FRE_DEMAND, LRE_SCRUB, LRE_DEMAND}, 177 + .offsets = { 178 + {0x2f10, 0x2f20, 0x2f30, 0x2f50, 0x2f60, 0xba0}, 179 + {0x2f14, 0x2f24, 0x2f38, 0x2f54, 0x2f64, 0xba8}, 180 + {0x2f18, 0x2f28, 0x2f40, 0x2f58, 0x2f68, 0xbb0}, 181 + {0x2f1c, 0x2f2c, 0x2f48, 0x2f5c, 0x2f6c, 0xbb8}, 182 + }, 183 + .widths = {4, 4, 8, 4, 4, 8}, 184 + .v_mask = BIT(0), 185 + .uc_mask = BIT(1), 186 + .over_mask = BIT(2), 187 + .en_patspr_mask = BIT(14), 188 + .noover_mask = BIT(15), 189 + .en_mask = BIT(12), 190 + 191 + .cecnt_num = 8, 192 + .cecnt_offsets = {0x2c10, 0x2c14, 0x2c18, 0x2c1c, 0x2c20, 0x2c24, 0x2c28, 0x2c2c}, 193 + .cecnt_widths = {4, 4, 4, 4, 4, 4, 4, 4}, 194 + }; 195 + 196 + static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width) 95 197 { 96 - u32 s, d, d2; 198 + switch (width) { 199 + case 4: 200 + return I10NM_GET_REG32(imc, chan, offset); 201 + case 8: 202 + return I10NM_GET_REG64(imc, chan, offset); 203 + default: 204 + i10nm_printk(KERN_ERR, "Invalid readd RRL 0x%x width %d\n", offset, width); 205 + return 0; 206 + } 207 + } 97 208 98 - s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]); 99 - d = I10NM_GET_REG32(imc, chan, offsets_demand[0]); 100 - if (offsets_demand2) 101 - d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]); 209 + static void write_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width, u64 val) 210 + { 211 + switch (width) { 212 + case 4: 213 + return I10NM_SET_REG32(imc, chan, offset, (u32)val); 214 + default: 215 + i10nm_printk(KERN_ERR, "Invalid write RRL 0x%x width %d\n", offset, width); 216 + } 217 + } 218 + 219 + static void enable_rrl(struct skx_imc *imc, int chan, struct reg_rrl *rrl, 220 + int rrl_set, bool enable, u32 *rrl_ctl) 221 + { 222 + enum rrl_mode mode = rrl->modes[rrl_set]; 223 + u32 offset = rrl->offsets[rrl_set][0], v; 224 + u8 width = rrl->widths[0]; 225 + bool first, scrub; 226 + 227 + /* First or last read error. */ 228 + first = (mode == FRE_SCRUB || mode == FRE_DEMAND); 229 + /* Patrol scrub or on-demand read error. */ 230 + scrub = (mode == FRE_SCRUB || mode == LRE_SCRUB); 231 + 232 + v = read_imc_reg(imc, chan, offset, width); 102 233 103 234 if (enable) { 104 - /* Save default configurations */ 105 - imc->chan[chan].retry_rd_err_log_s = s; 106 - imc->chan[chan].retry_rd_err_log_d = d; 107 - if (offsets_demand2) 108 - imc->chan[chan].retry_rd_err_log_d2 = d2; 235 + /* Save default configurations. */ 236 + *rrl_ctl = v; 237 + v &= ~rrl->uc_mask; 109 238 110 - s &= ~RETRY_RD_ERR_LOG_NOOVER_UC; 111 - s |= RETRY_RD_ERR_LOG_EN; 112 - d &= ~RETRY_RD_ERR_LOG_NOOVER_UC; 113 - d |= RETRY_RD_ERR_LOG_EN; 239 + if (first) 240 + v |= rrl->noover_mask; 241 + else 242 + v &= ~rrl->noover_mask; 114 243 115 - if (offsets_demand2) { 116 - d2 &= ~RETRY_RD_ERR_LOG_UC; 117 - d2 |= RETRY_RD_ERR_LOG_NOOVER; 118 - d2 |= RETRY_RD_ERR_LOG_EN; 119 - } 244 + if (scrub) 245 + v |= rrl->en_patspr_mask; 246 + else 247 + v &= ~rrl->en_patspr_mask; 248 + 249 + v |= rrl->en_mask; 120 250 } else { 121 - /* Restore default configurations */ 122 - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC) 123 - s |= RETRY_RD_ERR_LOG_UC; 124 - if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER) 125 - s |= RETRY_RD_ERR_LOG_NOOVER; 126 - if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN)) 127 - s &= ~RETRY_RD_ERR_LOG_EN; 128 - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC) 129 - d |= RETRY_RD_ERR_LOG_UC; 130 - if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER) 131 - d |= RETRY_RD_ERR_LOG_NOOVER; 132 - if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN)) 133 - d &= ~RETRY_RD_ERR_LOG_EN; 251 + /* Restore default configurations. */ 252 + if (*rrl_ctl & rrl->uc_mask) 253 + v |= rrl->uc_mask; 134 254 135 - if (offsets_demand2) { 136 - if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC) 137 - d2 |= RETRY_RD_ERR_LOG_UC; 138 - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER)) 139 - d2 &= ~RETRY_RD_ERR_LOG_NOOVER; 140 - if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN)) 141 - d2 &= ~RETRY_RD_ERR_LOG_EN; 255 + if (first) { 256 + if (!(*rrl_ctl & rrl->noover_mask)) 257 + v &= ~rrl->noover_mask; 258 + } else { 259 + if (*rrl_ctl & rrl->noover_mask) 260 + v |= rrl->noover_mask; 142 261 } 262 + 263 + if (scrub) { 264 + if (!(*rrl_ctl & rrl->en_patspr_mask)) 265 + v &= ~rrl->en_patspr_mask; 266 + } else { 267 + if (*rrl_ctl & rrl->en_patspr_mask) 268 + v |= rrl->en_patspr_mask; 269 + } 270 + 271 + if (!(*rrl_ctl & rrl->en_mask)) 272 + v &= ~rrl->en_mask; 143 273 } 144 274 145 - I10NM_SET_REG32(imc, chan, offsets_scrub[0], s); 146 - I10NM_SET_REG32(imc, chan, offsets_demand[0], d); 147 - if (offsets_demand2) 148 - I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2); 275 + write_imc_reg(imc, chan, offset, width, v); 276 + } 277 + 278 + static void enable_rrls(struct skx_imc *imc, int chan, struct reg_rrl *rrl, 279 + bool enable, u32 *rrl_ctl) 280 + { 281 + for (int i = 0; i < rrl->set_num; i++) 282 + enable_rrl(imc, chan, rrl, i, enable, rrl_ctl + i); 283 + } 284 + 285 + static void enable_rrls_ddr(struct skx_imc *imc, bool enable) 286 + { 287 + struct reg_rrl *rrl_ddr = res_cfg->reg_rrl_ddr; 288 + int i, chan_num = res_cfg->ddr_chan_num; 289 + struct skx_channel *chan = imc->chan; 290 + 291 + if (!imc->mbase) 292 + return; 293 + 294 + for (i = 0; i < chan_num; i++) 295 + enable_rrls(imc, i, rrl_ddr, enable, chan[i].rrl_ctl[0]); 296 + } 297 + 298 + static void enable_rrls_hbm(struct skx_imc *imc, bool enable) 299 + { 300 + struct reg_rrl **rrl_hbm = res_cfg->reg_rrl_hbm; 301 + int i, chan_num = res_cfg->hbm_chan_num; 302 + struct skx_channel *chan = imc->chan; 303 + 304 + if (!imc->mbase || !imc->hbm_mc || !rrl_hbm[0] || !rrl_hbm[1]) 305 + return; 306 + 307 + for (i = 0; i < chan_num; i++) { 308 + enable_rrls(imc, i, rrl_hbm[0], enable, chan[i].rrl_ctl[0]); 309 + enable_rrls(imc, i, rrl_hbm[1], enable, chan[i].rrl_ctl[1]); 310 + } 149 311 } 150 312 151 313 static void enable_retry_rd_err_log(bool enable) 152 314 { 153 - int i, j, imc_num, chan_num; 154 - struct skx_imc *imc; 155 315 struct skx_dev *d; 316 + int i, imc_num; 156 317 157 318 edac_dbg(2, "\n"); 158 319 159 320 list_for_each_entry(d, i10nm_edac_list, list) { 160 321 imc_num = res_cfg->ddr_imc_num; 161 - chan_num = res_cfg->ddr_chan_num; 162 - 163 - for (i = 0; i < imc_num; i++) { 164 - imc = &d->imc[i]; 165 - if (!imc->mbase) 166 - continue; 167 - 168 - for (j = 0; j < chan_num; j++) 169 - __enable_retry_rd_err_log(imc, j, enable, 170 - res_cfg->offsets_scrub, 171 - res_cfg->offsets_demand, 172 - res_cfg->offsets_demand2); 173 - } 322 + for (i = 0; i < imc_num; i++) 323 + enable_rrls_ddr(&d->imc[i], enable); 174 324 175 325 imc_num += res_cfg->hbm_imc_num; 176 - chan_num = res_cfg->hbm_chan_num; 177 - 178 - for (; i < imc_num; i++) { 179 - imc = &d->imc[i]; 180 - if (!imc->mbase || !imc->hbm_mc) 181 - continue; 182 - 183 - for (j = 0; j < chan_num; j++) { 184 - __enable_retry_rd_err_log(imc, j, enable, 185 - res_cfg->offsets_scrub_hbm0, 186 - res_cfg->offsets_demand_hbm0, 187 - NULL); 188 - __enable_retry_rd_err_log(imc, j, enable, 189 - res_cfg->offsets_scrub_hbm1, 190 - res_cfg->offsets_demand_hbm1, 191 - NULL); 192 - } 193 - } 326 + for (; i < imc_num; i++) 327 + enable_rrls_hbm(&d->imc[i], enable); 194 328 } 195 329 } 196 330 197 331 static void show_retry_rd_err_log(struct decoded_addr *res, char *msg, 198 332 int len, bool scrub_err) 199 333 { 334 + int i, j, n, ch = res->channel, pch = res->cs & 1; 200 335 struct skx_imc *imc = &res->dev->imc[res->imc]; 201 - u32 log0, log1, log2, log3, log4; 202 - u32 corr0, corr1, corr2, corr3; 203 - u32 lxg0, lxg1, lxg3, lxg4; 204 - u32 *xffsets = NULL; 205 - u64 log2a, log5; 206 - u64 lxg2a, lxg5; 207 - u32 *offsets; 208 - int n, pch; 336 + u64 log, corr, status_mask; 337 + struct reg_rrl *rrl; 338 + bool scrub; 339 + u32 offset; 340 + u8 width; 209 341 210 342 if (!imc->mbase) 211 343 return; 212 344 213 - if (imc->hbm_mc) { 214 - pch = res->cs & 1; 345 + rrl = imc->hbm_mc ? res_cfg->reg_rrl_hbm[pch] : res_cfg->reg_rrl_ddr; 215 346 216 - if (pch) 217 - offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 : 218 - res_cfg->offsets_demand_hbm1; 219 - else 220 - offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 : 221 - res_cfg->offsets_demand_hbm0; 222 - } else { 223 - if (scrub_err) { 224 - offsets = res_cfg->offsets_scrub; 225 - } else { 226 - offsets = res_cfg->offsets_demand; 227 - xffsets = res_cfg->offsets_demand2; 347 + if (!rrl) 348 + return; 349 + 350 + status_mask = rrl->over_mask | rrl->uc_mask | rrl->v_mask; 351 + 352 + n = snprintf(msg, len, " retry_rd_err_log["); 353 + for (i = 0; i < rrl->set_num; i++) { 354 + scrub = (rrl->modes[i] == FRE_SCRUB || rrl->modes[i] == LRE_SCRUB); 355 + if (scrub_err != scrub) 356 + continue; 357 + 358 + for (j = 0; j < rrl->reg_num && len - n > 0; j++) { 359 + offset = rrl->offsets[i][j]; 360 + width = rrl->widths[j]; 361 + log = read_imc_reg(imc, ch, offset, width); 362 + 363 + if (width == 4) 364 + n += snprintf(msg + n, len - n, "%.8llx ", log); 365 + else 366 + n += snprintf(msg + n, len - n, "%.16llx ", log); 367 + 368 + /* Clear RRL status if RRL in Linux control mode. */ 369 + if (retry_rd_err_log == 2 && !j && (log & status_mask)) 370 + write_imc_reg(imc, ch, offset, width, log & ~status_mask); 228 371 } 229 372 } 230 373 231 - log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]); 232 - log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]); 233 - log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]); 234 - log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]); 235 - log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]); 374 + /* Move back one space. */ 375 + n--; 376 + n += snprintf(msg + n, len - n, "]"); 236 377 237 - if (xffsets) { 238 - lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]); 239 - lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]); 240 - lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]); 241 - lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]); 242 - lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]); 243 - } 378 + if (len - n > 0) { 379 + n += snprintf(msg + n, len - n, " correrrcnt["); 380 + for (i = 0; i < rrl->cecnt_num && len - n > 0; i++) { 381 + offset = rrl->cecnt_offsets[i]; 382 + width = rrl->cecnt_widths[i]; 383 + corr = read_imc_reg(imc, ch, offset, width); 244 384 245 - if (res_cfg->type == SPR) { 246 - log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]); 247 - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx", 248 - log0, log1, log2a, log3, log4, log5); 249 - 250 - if (len - n > 0) { 251 - if (xffsets) { 252 - lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]); 253 - n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]", 254 - lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5); 385 + /* CPUs {ICX,SPR} encode two counters per 4-byte CORRERRCNT register. */ 386 + if (res_cfg->type <= SPR) { 387 + n += snprintf(msg + n, len - n, "%.4llx %.4llx ", 388 + corr & 0xffff, corr >> 16); 255 389 } else { 256 - n += snprintf(msg + n, len - n, "]"); 390 + /* CPUs {GNR} encode one counter per CORRERRCNT register. */ 391 + if (width == 4) 392 + n += snprintf(msg + n, len - n, "%.8llx ", corr); 393 + else 394 + n += snprintf(msg + n, len - n, "%.16llx ", corr); 257 395 } 258 396 } 259 - } else { 260 - log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]); 261 - n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]", 262 - log0, log1, log2, log3, log4, log5); 263 - } 264 397 265 - if (imc->hbm_mc) { 266 - if (pch) { 267 - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18); 268 - corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c); 269 - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20); 270 - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24); 271 - } else { 272 - corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818); 273 - corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c); 274 - corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820); 275 - corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824); 276 - } 277 - } else { 278 - corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18); 279 - corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c); 280 - corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20); 281 - corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24); 282 - } 283 - 284 - if (len - n > 0) 285 - snprintf(msg + n, len - n, 286 - " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]", 287 - corr0 & 0xffff, corr0 >> 16, 288 - corr1 & 0xffff, corr1 >> 16, 289 - corr2 & 0xffff, corr2 >> 16, 290 - corr3 & 0xffff, corr3 >> 16); 291 - 292 - /* Clear status bits */ 293 - if (retry_rd_err_log == 2) { 294 - if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) { 295 - log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; 296 - I10NM_SET_REG32(imc, res->channel, offsets[0], log0); 297 - } 298 - 299 - if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) { 300 - lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V; 301 - I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0); 302 - } 398 + /* Move back one space. */ 399 + n--; 400 + n += snprintf(msg + n, len - n, "]"); 303 401 } 304 402 } 305 403 ··· 956 870 .ddr_mdev_bdf = {0, 12, 0}, 957 871 .hbm_mdev_bdf = {0, 12, 1}, 958 872 .sad_all_offset = 0x108, 959 - .offsets_scrub = offsets_scrub_icx, 960 - .offsets_demand = offsets_demand_icx, 873 + .reg_rrl_ddr = &icx_reg_rrl_ddr, 961 874 }; 962 875 963 876 static struct res_config i10nm_cfg1 = { ··· 974 889 .ddr_mdev_bdf = {0, 12, 0}, 975 890 .hbm_mdev_bdf = {0, 12, 1}, 976 891 .sad_all_offset = 0x108, 977 - .offsets_scrub = offsets_scrub_icx, 978 - .offsets_demand = offsets_demand_icx, 892 + .reg_rrl_ddr = &icx_reg_rrl_ddr, 979 893 }; 980 894 981 895 static struct res_config spr_cfg = { ··· 997 913 .ddr_mdev_bdf = {0, 12, 0}, 998 914 .hbm_mdev_bdf = {0, 12, 1}, 999 915 .sad_all_offset = 0x300, 1000 - .offsets_scrub = offsets_scrub_spr, 1001 - .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0, 1002 - .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1, 1003 - .offsets_demand = offsets_demand_spr, 1004 - .offsets_demand2 = offsets_demand2_spr, 1005 - .offsets_demand_hbm0 = offsets_demand_spr_hbm0, 1006 - .offsets_demand_hbm1 = offsets_demand_spr_hbm1, 916 + .reg_rrl_ddr = &spr_reg_rrl_ddr, 917 + .reg_rrl_hbm[0] = &spr_reg_rrl_hbm_pch0, 918 + .reg_rrl_hbm[1] = &spr_reg_rrl_hbm_pch1, 1007 919 }; 1008 920 1009 921 static struct res_config gnr_cfg = { ··· 1017 937 .uracu_bdf = {0, 0, 1}, 1018 938 .ddr_mdev_bdf = {0, 5, 1}, 1019 939 .sad_all_offset = 0x300, 940 + .reg_rrl_ddr = &gnr_reg_rrl_ddr, 1020 941 }; 1021 942 1022 943 static const struct x86_cpu_id i10nm_cpuids[] = { ··· 1189 1108 mce_register_decode_chain(&i10nm_mce_dec); 1190 1109 skx_setup_debug("i10nm_test"); 1191 1110 1192 - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { 1111 + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { 1193 1112 skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log); 1194 1113 if (retry_rd_err_log == 2) 1195 1114 enable_retry_rd_err_log(true); ··· 1209 1128 { 1210 1129 edac_dbg(2, "\n"); 1211 1130 1212 - if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) { 1131 + if (retry_rd_err_log && res_cfg->reg_rrl_ddr) { 1213 1132 skx_set_decode(NULL, NULL); 1214 1133 if (retry_rd_err_log == 2) 1215 1134 enable_retry_rd_err_log(false);
+6
drivers/edac/ie31200_edac.c
··· 90 90 #define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1 0xa703 91 91 #define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2 0x4640 92 92 #define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3 0x4630 93 + #define PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4 0xa700 94 + 95 + /* Alder Lake-S */ 96 + #define PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1 0x4660 93 97 94 98 #define IE31200_RANKS_PER_CHANNEL 8 95 99 #define IE31200_DIMMS_PER_CHANNEL 2 ··· 739 735 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_1), (kernel_ulong_t)&rpl_s_cfg}, 740 736 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_2), (kernel_ulong_t)&rpl_s_cfg}, 741 737 { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_3), (kernel_ulong_t)&rpl_s_cfg}, 738 + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_RPL_S_4), (kernel_ulong_t)&rpl_s_cfg}, 739 + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IE31200_ADL_S_1), (kernel_ulong_t)&rpl_s_cfg}, 742 740 { 0, } /* 0 terminated list. */ 743 741 }; 744 742 MODULE_DEVICE_TABLE(pci, ie31200_pci_tbl);
+70 -16
drivers/edac/igen6_edac.c
··· 127 127 128 128 static const struct res_config { 129 129 bool machine_check; 130 + /* The number of present memory controllers. */ 130 131 int num_imc; 131 132 u32 imc_base; 132 133 u32 cmf_base; ··· 240 239 #define DID_ADL_N_SKU10 0x4679 241 240 #define DID_ADL_N_SKU11 0x467c 242 241 #define DID_ADL_N_SKU12 0x4632 242 + 243 + /* Compute die IDs for Arizona Beach with IBECC */ 244 + #define DID_AZB_SKU1 0x4676 245 + 246 + /* Compute did IDs for Amston Lake with IBECC */ 247 + #define DID_ASL_SKU1 0x464a 243 248 244 249 /* Compute die IDs for Raptor Lake-P with IBECC */ 245 250 #define DID_RPL_P_SKU1 0xa706 ··· 602 595 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU10), (kernel_ulong_t)&adl_n_cfg }, 603 596 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU11), (kernel_ulong_t)&adl_n_cfg }, 604 597 { PCI_VDEVICE(INTEL, DID_ADL_N_SKU12), (kernel_ulong_t)&adl_n_cfg }, 598 + { PCI_VDEVICE(INTEL, DID_AZB_SKU1), (kernel_ulong_t)&adl_n_cfg }, 599 + { PCI_VDEVICE(INTEL, DID_ASL_SKU1), (kernel_ulong_t)&adl_n_cfg }, 605 600 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU1), (kernel_ulong_t)&rpl_p_cfg }, 606 601 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU2), (kernel_ulong_t)&rpl_p_cfg }, 607 602 { PCI_VDEVICE(INTEL, DID_RPL_P_SKU3), (kernel_ulong_t)&rpl_p_cfg }, ··· 1210 1201 irq_work_queue(&ecclog_irq_work); 1211 1202 } 1212 1203 1213 - static int igen6_register_mci(int mc, u64 mchbar, struct pci_dev *pdev) 1204 + /* Check whether the memory controller is absent. */ 1205 + static bool igen6_imc_absent(void __iomem *window) 1206 + { 1207 + return readl(window + MAD_INTER_CHANNEL_OFFSET) == ~0; 1208 + } 1209 + 1210 + static int igen6_register_mci(int mc, void __iomem *window, struct pci_dev *pdev) 1214 1211 { 1215 1212 struct edac_mc_layer layers[2]; 1216 1213 struct mem_ctl_info *mci; 1217 1214 struct igen6_imc *imc; 1218 - void __iomem *window; 1219 1215 int rc; 1220 1216 1221 1217 edac_dbg(2, "\n"); 1222 - 1223 - mchbar += mc * MCHBAR_SIZE; 1224 - window = ioremap(mchbar, MCHBAR_SIZE); 1225 - if (!window) { 1226 - igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", mchbar); 1227 - return -ENODEV; 1228 - } 1229 1218 1230 1219 layers[0].type = EDAC_MC_LAYER_CHANNEL; 1231 1220 layers[0].size = NUM_CHANNELS; ··· 1290 1283 fail2: 1291 1284 edac_mc_free(mci); 1292 1285 fail: 1293 - iounmap(window); 1294 1286 return rc; 1295 1287 } 1296 1288 ··· 1313 1307 edac_mc_free(mci); 1314 1308 iounmap(imc->window); 1315 1309 } 1310 + } 1311 + 1312 + static int igen6_register_mcis(struct pci_dev *pdev, u64 mchbar) 1313 + { 1314 + void __iomem *window; 1315 + int lmc, pmc, rc; 1316 + u64 base; 1317 + 1318 + for (lmc = 0, pmc = 0; pmc < NUM_IMC; pmc++) { 1319 + base = mchbar + pmc * MCHBAR_SIZE; 1320 + window = ioremap(base, MCHBAR_SIZE); 1321 + if (!window) { 1322 + igen6_printk(KERN_ERR, "Failed to ioremap 0x%llx for mc%d\n", base, pmc); 1323 + rc = -ENOMEM; 1324 + goto out_unregister_mcis; 1325 + } 1326 + 1327 + if (igen6_imc_absent(window)) { 1328 + iounmap(window); 1329 + edac_dbg(2, "Skip absent mc%d\n", pmc); 1330 + continue; 1331 + } 1332 + 1333 + rc = igen6_register_mci(lmc, window, pdev); 1334 + if (rc) 1335 + goto out_iounmap; 1336 + 1337 + /* Done, if all present MCs are detected and registered. */ 1338 + if (++lmc >= res_cfg->num_imc) 1339 + break; 1340 + } 1341 + 1342 + if (!lmc) { 1343 + igen6_printk(KERN_ERR, "No mc found.\n"); 1344 + return -ENODEV; 1345 + } 1346 + 1347 + if (lmc < res_cfg->num_imc) 1348 + igen6_printk(KERN_WARNING, "Expected %d mcs, but only %d detected.", 1349 + res_cfg->num_imc, lmc); 1350 + 1351 + return 0; 1352 + 1353 + out_iounmap: 1354 + iounmap(window); 1355 + 1356 + out_unregister_mcis: 1357 + igen6_unregister_mcis(); 1358 + 1359 + return rc; 1316 1360 } 1317 1361 1318 1362 static int igen6_mem_slice_setup(u64 mchbar) ··· 1461 1405 static int igen6_probe(struct pci_dev *pdev, const struct pci_device_id *ent) 1462 1406 { 1463 1407 u64 mchbar; 1464 - int i, rc; 1408 + int rc; 1465 1409 1466 1410 edac_dbg(2, "\n"); 1467 1411 ··· 1477 1421 1478 1422 opstate_set(res_cfg, ent); 1479 1423 1480 - for (i = 0; i < res_cfg->num_imc; i++) { 1481 - rc = igen6_register_mci(i, mchbar, pdev); 1482 - if (rc) 1483 - goto fail2; 1484 - } 1424 + rc = igen6_register_mcis(pdev, mchbar); 1425 + if (rc) 1426 + goto fail; 1485 1427 1486 1428 if (res_cfg->num_imc > 1) { 1487 1429 rc = igen6_mem_slice_setup(mchbar);
+1
drivers/edac/skx_common.c
··· 116 116 117 117 void skx_adxl_put(void) 118 118 { 119 + adxl_component_count = 0; 119 120 kfree(adxl_values); 120 121 kfree(adxl_msg); 121 122 }
+50 -11
drivers/edac/skx_common.h
··· 79 79 */ 80 80 #define MCACOD_EXT_MEM_ERR 0x280 81 81 82 + /* Max RRL register sets per {,sub-,pseudo-}channel. */ 83 + #define NUM_RRL_SET 4 84 + /* Max RRL registers per set. */ 85 + #define NUM_RRL_REG 6 86 + /* Max correctable error count registers. */ 87 + #define NUM_CECNT_REG 8 88 + 89 + /* Modes of RRL register set. */ 90 + enum rrl_mode { 91 + /* Last read error from patrol scrub. */ 92 + LRE_SCRUB, 93 + /* Last read error from demand. */ 94 + LRE_DEMAND, 95 + /* First read error from patrol scrub. */ 96 + FRE_SCRUB, 97 + /* First read error from demand. */ 98 + FRE_DEMAND, 99 + }; 100 + 101 + /* RRL registers per {,sub-,pseudo-}channel. */ 102 + struct reg_rrl { 103 + /* RRL register parts. */ 104 + int set_num, reg_num; 105 + enum rrl_mode modes[NUM_RRL_SET]; 106 + u32 offsets[NUM_RRL_SET][NUM_RRL_REG]; 107 + /* RRL register widths in byte per set. */ 108 + u8 widths[NUM_RRL_REG]; 109 + /* RRL control bits of the first register per set. */ 110 + u32 v_mask; 111 + u32 uc_mask; 112 + u32 over_mask; 113 + u32 en_patspr_mask; 114 + u32 noover_mask; 115 + u32 en_mask; 116 + 117 + /* CORRERRCNT register parts. */ 118 + int cecnt_num; 119 + u32 cecnt_offsets[NUM_CECNT_REG]; 120 + u8 cecnt_widths[NUM_CECNT_REG]; 121 + }; 122 + 82 123 /* 83 124 * Each cpu socket contains some pci devices that provide global 84 125 * information, and also some that are local to each of the two ··· 158 117 struct skx_channel { 159 118 struct pci_dev *cdev; 160 119 struct pci_dev *edev; 161 - u32 retry_rd_err_log_s; 162 - u32 retry_rd_err_log_d; 163 - u32 retry_rd_err_log_d2; 120 + /* 121 + * Two groups of RRL control registers per channel to save default RRL 122 + * settings of two {sub-,pseudo-}channels in Linux RRL control mode. 123 + */ 124 + u32 rrl_ctl[2][NUM_RRL_SET]; 164 125 struct skx_dimm { 165 126 u8 close_pg; 166 127 u8 bank_xor_enable; ··· 275 232 /* HBM mdev device BDF */ 276 233 struct pci_bdf hbm_mdev_bdf; 277 234 int sad_all_offset; 278 - /* Offsets of retry_rd_err_log registers */ 279 - u32 *offsets_scrub; 280 - u32 *offsets_scrub_hbm0; 281 - u32 *offsets_scrub_hbm1; 282 - u32 *offsets_demand; 283 - u32 *offsets_demand2; 284 - u32 *offsets_demand_hbm0; 285 - u32 *offsets_demand_hbm1; 235 + /* RRL register sets per DDR channel */ 236 + struct reg_rrl *reg_rrl_ddr; 237 + /* RRL register sets per HBM channel */ 238 + struct reg_rrl *reg_rrl_hbm[2]; 286 239 }; 287 240 288 241 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,