at master 13 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Bluefield-specific EDAC driver. 4 * 5 * Copyright (c) 2019 Mellanox Technologies. 6 */ 7 8#include <linux/acpi.h> 9#include <linux/arm-smccc.h> 10#include <linux/bitfield.h> 11#include <linux/edac.h> 12#include <linux/io.h> 13#include <linux/module.h> 14#include <linux/platform_device.h> 15 16#include "edac_module.h" 17 18#define DRIVER_NAME "bluefield-edac" 19 20/* 21 * Mellanox BlueField EMI (External Memory Interface) register definitions. 22 */ 23 24#define MLXBF_ECC_CNT 0x340 25#define MLXBF_ECC_CNT__SERR_CNT GENMASK(15, 0) 26#define MLXBF_ECC_CNT__DERR_CNT GENMASK(31, 16) 27 28#define MLXBF_ECC_ERR 0x348 29#define MLXBF_ECC_ERR__SECC BIT(0) 30#define MLXBF_ECC_ERR__DECC BIT(16) 31 32#define MLXBF_ECC_LATCH_SEL 0x354 33#define MLXBF_ECC_LATCH_SEL__START BIT(24) 34 35#define MLXBF_ERR_ADDR_0 0x358 36 37#define MLXBF_ERR_ADDR_1 0x37c 38 39#define MLXBF_SYNDROM 0x35c 40#define MLXBF_SYNDROM__DERR BIT(0) 41#define MLXBF_SYNDROM__SERR BIT(1) 42#define MLXBF_SYNDROM__SYN GENMASK(25, 16) 43 44#define MLXBF_ADD_INFO 0x364 45#define MLXBF_ADD_INFO__ERR_PRANK GENMASK(9, 8) 46 47#define MLXBF_EDAC_MAX_DIMM_PER_MC 2 48#define MLXBF_EDAC_ERROR_GRAIN 8 49 50#define MLXBF_WRITE_REG_32 (0x82000009) 51#define MLXBF_READ_REG_32 (0x8200000A) 52#define MLXBF_SIP_SVC_VERSION (0x8200ff03) 53 54#define MLXBF_SMCCC_ACCESS_VIOLATION (-4) 55 56#define MLXBF_SVC_REQ_MAJOR 0 57#define MLXBF_SVC_REQ_MINOR 3 58 59/* 60 * Request MLXBF_SIP_GET_DIMM_INFO 61 * 62 * Retrieve information about DIMM on a certain slot. 63 * 64 * Call register usage: 65 * a0: MLXBF_SIP_GET_DIMM_INFO 66 * a1: (Memory controller index) << 16 | (Dimm index in memory controller) 67 * a2-7: not used. 68 * 69 * Return status: 70 * a0: MLXBF_DIMM_INFO defined below describing the DIMM. 71 * a1-3: not used. 72 */ 73#define MLXBF_SIP_GET_DIMM_INFO 0x82000008 74 75/* Format for the SMC response about the memory information */ 76#define MLXBF_DIMM_INFO__SIZE_GB GENMASK_ULL(15, 0) 77#define MLXBF_DIMM_INFO__IS_RDIMM BIT(16) 78#define MLXBF_DIMM_INFO__IS_LRDIMM BIT(17) 79#define MLXBF_DIMM_INFO__IS_NVDIMM BIT(18) 80#define MLXBF_DIMM_INFO__RANKS GENMASK_ULL(23, 21) 81#define MLXBF_DIMM_INFO__PACKAGE_X GENMASK_ULL(31, 24) 82 83struct bluefield_edac_priv { 84 /* pointer to device structure */ 85 struct device *dev; 86 int dimm_ranks[MLXBF_EDAC_MAX_DIMM_PER_MC]; 87 void __iomem *emi_base; 88 int dimm_per_mc; 89 /* access to secure regs supported */ 90 bool svc_sreg_support; 91 /* SMC table# for secure regs access */ 92 u32 sreg_tbl; 93}; 94 95static u64 smc_call1(u64 smc_op, u64 smc_arg) 96{ 97 struct arm_smccc_res res; 98 99 arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res); 100 101 return res.a0; 102} 103 104static int secure_readl(void __iomem *addr, u32 *result, u32 sreg_tbl) 105{ 106 struct arm_smccc_res res; 107 int status; 108 109 arm_smccc_smc(MLXBF_READ_REG_32, sreg_tbl, (uintptr_t)addr, 110 0, 0, 0, 0, 0, &res); 111 112 status = res.a0; 113 114 if (status == SMCCC_RET_NOT_SUPPORTED || 115 status == MLXBF_SMCCC_ACCESS_VIOLATION) 116 return -1; 117 118 *result = (u32)res.a1; 119 return 0; 120} 121 122static int secure_writel(void __iomem *addr, u32 data, u32 sreg_tbl) 123{ 124 struct arm_smccc_res res; 125 int status; 126 127 arm_smccc_smc(MLXBF_WRITE_REG_32, sreg_tbl, data, (uintptr_t)addr, 128 0, 0, 0, 0, &res); 129 130 status = res.a0; 131 132 if (status == SMCCC_RET_NOT_SUPPORTED || 133 status == MLXBF_SMCCC_ACCESS_VIOLATION) 134 return -1; 135 else 136 return 0; 137} 138 139static int bluefield_edac_readl(struct bluefield_edac_priv *priv, u32 offset, u32 *result) 140{ 141 void __iomem *addr; 142 int err = 0; 143 144 addr = priv->emi_base + offset; 145 146 if (priv->svc_sreg_support) 147 err = secure_readl(addr, result, priv->sreg_tbl); 148 else 149 *result = readl(addr); 150 151 return err; 152} 153 154static int bluefield_edac_writel(struct bluefield_edac_priv *priv, u32 offset, u32 data) 155{ 156 void __iomem *addr; 157 int err = 0; 158 159 addr = priv->emi_base + offset; 160 161 if (priv->svc_sreg_support) 162 err = secure_writel(addr, data, priv->sreg_tbl); 163 else 164 writel(data, addr); 165 166 return err; 167} 168 169/* 170 * Gather the ECC information from the External Memory Interface registers 171 * and report it to the edac handler. 172 */ 173static void bluefield_gather_report_ecc(struct mem_ctl_info *mci, 174 int error_cnt, 175 int is_single_ecc) 176{ 177 struct bluefield_edac_priv *priv = mci->pvt_info; 178 u32 dram_additional_info, err_prank, edea0, edea1; 179 u32 ecc_latch_select, dram_syndrom, serr, derr, syndrom; 180 enum hw_event_mc_err_type ecc_type; 181 u64 ecc_dimm_addr; 182 int ecc_dimm, err; 183 184 ecc_type = is_single_ecc ? HW_EVENT_ERR_CORRECTED : 185 HW_EVENT_ERR_UNCORRECTED; 186 187 /* 188 * Tell the External Memory Interface to populate the relevant 189 * registers with information about the last ECC error occurrence. 190 */ 191 ecc_latch_select = MLXBF_ECC_LATCH_SEL__START; 192 err = bluefield_edac_writel(priv, MLXBF_ECC_LATCH_SEL, ecc_latch_select); 193 if (err) 194 dev_err(priv->dev, "ECC latch select write failed.\n"); 195 196 /* 197 * Verify that the ECC reported info in the registers is of the 198 * same type as the one asked to report. If not, just report the 199 * error without the detailed information. 200 */ 201 err = bluefield_edac_readl(priv, MLXBF_SYNDROM, &dram_syndrom); 202 if (err) { 203 dev_err(priv->dev, "DRAM syndrom read failed.\n"); 204 return; 205 } 206 207 serr = FIELD_GET(MLXBF_SYNDROM__SERR, dram_syndrom); 208 derr = FIELD_GET(MLXBF_SYNDROM__DERR, dram_syndrom); 209 syndrom = FIELD_GET(MLXBF_SYNDROM__SYN, dram_syndrom); 210 211 if ((is_single_ecc && !serr) || (!is_single_ecc && !derr)) { 212 edac_mc_handle_error(ecc_type, mci, error_cnt, 0, 0, 0, 213 0, 0, -1, mci->ctl_name, ""); 214 return; 215 } 216 217 err = bluefield_edac_readl(priv, MLXBF_ADD_INFO, &dram_additional_info); 218 if (err) { 219 dev_err(priv->dev, "DRAM additional info read failed.\n"); 220 return; 221 } 222 223 err_prank = FIELD_GET(MLXBF_ADD_INFO__ERR_PRANK, dram_additional_info); 224 225 ecc_dimm = (err_prank >= 2 && priv->dimm_ranks[0] <= 2) ? 1 : 0; 226 227 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_0, &edea0); 228 if (err) { 229 dev_err(priv->dev, "Error addr 0 read failed.\n"); 230 return; 231 } 232 233 err = bluefield_edac_readl(priv, MLXBF_ERR_ADDR_1, &edea1); 234 if (err) { 235 dev_err(priv->dev, "Error addr 1 read failed.\n"); 236 return; 237 } 238 239 ecc_dimm_addr = ((u64)edea1 << 32) | edea0; 240 241 edac_mc_handle_error(ecc_type, mci, error_cnt, 242 PFN_DOWN(ecc_dimm_addr), 243 offset_in_page(ecc_dimm_addr), 244 syndrom, ecc_dimm, 0, 0, mci->ctl_name, ""); 245} 246 247static void bluefield_edac_check(struct mem_ctl_info *mci) 248{ 249 struct bluefield_edac_priv *priv = mci->pvt_info; 250 u32 ecc_count, single_error_count, double_error_count, ecc_error = 0; 251 int err; 252 253 /* 254 * The memory controller might not be initialized by the firmware 255 * when there isn't memory, which may lead to bad register readings. 256 */ 257 if (mci->edac_cap == EDAC_FLAG_NONE) 258 return; 259 260 err = bluefield_edac_readl(priv, MLXBF_ECC_CNT, &ecc_count); 261 if (err) { 262 dev_err(priv->dev, "ECC count read failed.\n"); 263 return; 264 } 265 266 single_error_count = FIELD_GET(MLXBF_ECC_CNT__SERR_CNT, ecc_count); 267 double_error_count = FIELD_GET(MLXBF_ECC_CNT__DERR_CNT, ecc_count); 268 269 if (single_error_count) { 270 ecc_error |= MLXBF_ECC_ERR__SECC; 271 272 bluefield_gather_report_ecc(mci, single_error_count, 1); 273 } 274 275 if (double_error_count) { 276 ecc_error |= MLXBF_ECC_ERR__DECC; 277 278 bluefield_gather_report_ecc(mci, double_error_count, 0); 279 } 280 281 /* Write to clear reported errors. */ 282 if (ecc_count) { 283 err = bluefield_edac_writel(priv, MLXBF_ECC_ERR, ecc_error); 284 if (err) 285 dev_err(priv->dev, "ECC Error write failed.\n"); 286 } 287} 288 289/* Initialize the DIMMs information for the given memory controller. */ 290static void bluefield_edac_init_dimms(struct mem_ctl_info *mci) 291{ 292 struct bluefield_edac_priv *priv = mci->pvt_info; 293 u64 mem_ctrl_idx = mci->mc_idx; 294 struct dimm_info *dimm; 295 u64 smc_info, smc_arg; 296 int is_empty = 1, i; 297 298 for (i = 0; i < priv->dimm_per_mc; i++) { 299 dimm = mci->dimms[i]; 300 301 smc_arg = mem_ctrl_idx << 16 | i; 302 smc_info = smc_call1(MLXBF_SIP_GET_DIMM_INFO, smc_arg); 303 304 if (!FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info)) { 305 dimm->mtype = MEM_EMPTY; 306 continue; 307 } 308 309 is_empty = 0; 310 311 dimm->edac_mode = EDAC_SECDED; 312 313 if (FIELD_GET(MLXBF_DIMM_INFO__IS_NVDIMM, smc_info)) 314 dimm->mtype = MEM_NVDIMM; 315 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_LRDIMM, smc_info)) 316 dimm->mtype = MEM_LRDDR4; 317 else if (FIELD_GET(MLXBF_DIMM_INFO__IS_RDIMM, smc_info)) 318 dimm->mtype = MEM_RDDR4; 319 else 320 dimm->mtype = MEM_DDR4; 321 322 dimm->nr_pages = 323 FIELD_GET(MLXBF_DIMM_INFO__SIZE_GB, smc_info) * 324 (SZ_1G / PAGE_SIZE); 325 dimm->grain = MLXBF_EDAC_ERROR_GRAIN; 326 327 /* Mem controller for BlueField only supports x4, x8 and x16 */ 328 switch (FIELD_GET(MLXBF_DIMM_INFO__PACKAGE_X, smc_info)) { 329 case 4: 330 dimm->dtype = DEV_X4; 331 break; 332 case 8: 333 dimm->dtype = DEV_X8; 334 break; 335 case 16: 336 dimm->dtype = DEV_X16; 337 break; 338 default: 339 dimm->dtype = DEV_UNKNOWN; 340 } 341 342 priv->dimm_ranks[i] = 343 FIELD_GET(MLXBF_DIMM_INFO__RANKS, smc_info); 344 } 345 346 if (is_empty) 347 mci->edac_cap = EDAC_FLAG_NONE; 348 else 349 mci->edac_cap = EDAC_FLAG_SECDED; 350} 351 352static int bluefield_edac_mc_probe(struct platform_device *pdev) 353{ 354 struct bluefield_edac_priv *priv; 355 struct device *dev = &pdev->dev; 356 struct edac_mc_layer layers[1]; 357 struct arm_smccc_res res; 358 struct mem_ctl_info *mci; 359 struct resource *emi_res; 360 unsigned int mc_idx, dimm_count; 361 int rc, ret; 362 363 /* Read the MSS (Memory SubSystem) index from ACPI table. */ 364 if (device_property_read_u32(dev, "mss_number", &mc_idx)) { 365 dev_warn(dev, "bf_edac: MSS number unknown\n"); 366 return -EINVAL; 367 } 368 369 /* Read the DIMMs per MC from ACPI table. */ 370 if (device_property_read_u32(dev, "dimm_per_mc", &dimm_count)) { 371 dev_warn(dev, "bf_edac: DIMMs per MC unknown\n"); 372 return -EINVAL; 373 } 374 375 if (dimm_count > MLXBF_EDAC_MAX_DIMM_PER_MC) { 376 dev_warn(dev, "bf_edac: DIMMs per MC not valid\n"); 377 return -EINVAL; 378 } 379 380 emi_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 381 if (!emi_res) 382 return -EINVAL; 383 384 layers[0].type = EDAC_MC_LAYER_SLOT; 385 layers[0].size = dimm_count; 386 layers[0].is_virt_csrow = true; 387 388 mci = edac_mc_alloc(mc_idx, ARRAY_SIZE(layers), layers, sizeof(*priv)); 389 if (!mci) 390 return -ENOMEM; 391 392 priv = mci->pvt_info; 393 priv->dev = dev; 394 395 /* 396 * The "sec_reg_block" property in the ACPI table determines the method 397 * the driver uses to access the EMI registers: 398 * a) property is not present - directly access registers via readl/writel 399 * b) property is present - indirectly access registers via SMC calls 400 * (assuming required Silicon Provider service version found) 401 */ 402 if (device_property_read_u32(dev, "sec_reg_block", &priv->sreg_tbl)) { 403 priv->svc_sreg_support = false; 404 } else { 405 /* 406 * Check for minimum required Arm Silicon Provider (SiP) service 407 * version, ensuring support of required SMC function IDs. 408 */ 409 arm_smccc_smc(MLXBF_SIP_SVC_VERSION, 0, 0, 0, 0, 0, 0, 0, &res); 410 if (res.a0 == MLXBF_SVC_REQ_MAJOR && 411 res.a1 >= MLXBF_SVC_REQ_MINOR) { 412 priv->svc_sreg_support = true; 413 } else { 414 dev_err(dev, "Required SMCs are not supported.\n"); 415 ret = -EINVAL; 416 goto err; 417 } 418 } 419 420 priv->dimm_per_mc = dimm_count; 421 if (!priv->svc_sreg_support) { 422 priv->emi_base = devm_ioremap_resource(dev, emi_res); 423 if (IS_ERR(priv->emi_base)) { 424 dev_err(dev, "failed to map EMI IO resource\n"); 425 ret = PTR_ERR(priv->emi_base); 426 goto err; 427 } 428 } else { 429 priv->emi_base = (void __iomem *)emi_res->start; 430 } 431 432 mci->pdev = dev; 433 mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4 | 434 MEM_FLAG_LRDDR4 | MEM_FLAG_NVDIMM; 435 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 436 437 mci->mod_name = DRIVER_NAME; 438 mci->ctl_name = "BlueField_Memory_Controller"; 439 mci->dev_name = dev_name(dev); 440 mci->edac_check = bluefield_edac_check; 441 442 /* Initialize mci with the actual populated DIMM information. */ 443 bluefield_edac_init_dimms(mci); 444 445 platform_set_drvdata(pdev, mci); 446 447 /* Register with EDAC core */ 448 rc = edac_mc_add_mc(mci); 449 if (rc) { 450 dev_err(dev, "failed to register with EDAC core\n"); 451 ret = rc; 452 goto err; 453 } 454 455 /* Only POLL mode supported so far. */ 456 edac_op_state = EDAC_OPSTATE_POLL; 457 458 return 0; 459 460err: 461 edac_mc_free(mci); 462 463 return ret; 464} 465 466static void bluefield_edac_mc_remove(struct platform_device *pdev) 467{ 468 struct mem_ctl_info *mci = platform_get_drvdata(pdev); 469 470 edac_mc_del_mc(&pdev->dev); 471 edac_mc_free(mci); 472} 473 474static const struct acpi_device_id bluefield_mc_acpi_ids[] = { 475 {"MLNXBF08", 0}, 476 {} 477}; 478 479MODULE_DEVICE_TABLE(acpi, bluefield_mc_acpi_ids); 480 481static struct platform_driver bluefield_edac_mc_driver = { 482 .driver = { 483 .name = DRIVER_NAME, 484 .acpi_match_table = bluefield_mc_acpi_ids, 485 }, 486 .probe = bluefield_edac_mc_probe, 487 .remove = bluefield_edac_mc_remove, 488}; 489 490module_platform_driver(bluefield_edac_mc_driver); 491 492MODULE_DESCRIPTION("Mellanox BlueField memory edac driver"); 493MODULE_AUTHOR("Mellanox Technologies"); 494MODULE_LICENSE("GPL v2");