Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

misc: smpro-errmon: Add dimm training failure syndrome

Adds event_dimm[0-15]_syndrome sysfs to report the failure syndrome
to BMC when DIMM training failed.

Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
Link: https://lore.kernel.org/r/20230310083416.3670980-3-quan@os.amperecomputing.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

Quan Nguyen and committed by
Greg Kroah-Hartman
c2c99326 b0f64c80

+87
+10
Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
··· 265 265 For more details, see section `5.7 GPI Status Registers and 5.9 Memory Error Register Definitions, 266 266 Altra Family Soc BMC Interface Specification`. 267 267 268 + What: /sys/bus/platform/devices/smpro-errmon.*/event_dimm[0-15]_syndrome 269 + KernelVersion: 6.4 270 + Contact: Quan Nguyen <quan@os.amperecomputing.com> 271 + Description: 272 + (RO) The sysfs returns the 2-byte DIMM failure syndrome data for slot 273 + 0-15 if it failed to initialize. 274 + 275 + For more details, see section `5.11 Boot Stage Register Definitions, 276 + Altra Family Soc BMC Interface Specification`. 277 + 268 278 What: /sys/bus/platform/devices/smpro-misc.*/boot_progress 269 279 KernelVersion: 6.1 270 280 Contact: Quan Nguyen <quan@os.amperecomputing.com>
+77
drivers/misc/smpro-errmon.c
··· 47 47 #define WARN_PMPRO_INFO_LO 0xAC 48 48 #define WARN_PMPRO_INFO_HI 0xAD 49 49 50 + /* Boot Stage Register */ 51 + #define BOOTSTAGE 0xB0 52 + #define DIMM_SYNDROME_SEL 0xB4 53 + #define DIMM_SYNDROME_ERR 0xB5 54 + #define DIMM_SYNDROME_STAGE 4 55 + 50 56 /* PCIE Error Registers */ 51 57 #define PCIE_CE_ERR_CNT 0xC0 52 58 #define PCIE_CE_ERR_LEN 0xC1 ··· 474 468 EVENT_RO(dimm_hot, DIMM_HOT_EVENT); 475 469 EVENT_RO(dimm_2x_refresh, DIMM_2X_REFRESH_EVENT); 476 470 471 + static ssize_t smpro_dimm_syndrome_read(struct device *dev, struct device_attribute *da, 472 + char *buf, unsigned int slot) 473 + { 474 + struct smpro_errmon *errmon = dev_get_drvdata(dev); 475 + unsigned int data; 476 + int ret; 477 + 478 + ret = regmap_read(errmon->regmap, BOOTSTAGE, &data); 479 + if (ret) 480 + return ret; 481 + 482 + /* check for valid stage */ 483 + data = (data >> 8) & 0xff; 484 + if (data != DIMM_SYNDROME_STAGE) 485 + return ret; 486 + 487 + /* Write the slot ID to retrieve Error Syndrome */ 488 + ret = regmap_write(errmon->regmap, DIMM_SYNDROME_SEL, slot); 489 + if (ret) 490 + return ret; 491 + 492 + /* Read the Syndrome error */ 493 + ret = regmap_read(errmon->regmap, DIMM_SYNDROME_ERR, &data); 494 + if (ret || !data) 495 + return ret; 496 + 497 + return sysfs_emit(buf, "%04x\n", data); 498 + } 499 + 500 + #define EVENT_DIMM_SYNDROME(_slot) \ 501 + static ssize_t event_dimm##_slot##_syndrome_show(struct device *dev, \ 502 + struct device_attribute *da, \ 503 + char *buf) \ 504 + { \ 505 + return smpro_dimm_syndrome_read(dev, da, buf, _slot); \ 506 + } \ 507 + static DEVICE_ATTR_RO(event_dimm##_slot##_syndrome) 508 + 509 + EVENT_DIMM_SYNDROME(0); 510 + EVENT_DIMM_SYNDROME(1); 511 + EVENT_DIMM_SYNDROME(2); 512 + EVENT_DIMM_SYNDROME(3); 513 + EVENT_DIMM_SYNDROME(4); 514 + EVENT_DIMM_SYNDROME(5); 515 + EVENT_DIMM_SYNDROME(6); 516 + EVENT_DIMM_SYNDROME(7); 517 + EVENT_DIMM_SYNDROME(8); 518 + EVENT_DIMM_SYNDROME(9); 519 + EVENT_DIMM_SYNDROME(10); 520 + EVENT_DIMM_SYNDROME(11); 521 + EVENT_DIMM_SYNDROME(12); 522 + EVENT_DIMM_SYNDROME(13); 523 + EVENT_DIMM_SYNDROME(14); 524 + EVENT_DIMM_SYNDROME(15); 525 + 477 526 static struct attribute *smpro_errmon_attrs[] = { 478 527 &dev_attr_overflow_core_ce.attr, 479 528 &dev_attr_overflow_core_ue.attr, ··· 554 493 &dev_attr_event_vrd_hot.attr, 555 494 &dev_attr_event_dimm_hot.attr, 556 495 &dev_attr_event_dimm_2x_refresh.attr, 496 + &dev_attr_event_dimm0_syndrome.attr, 497 + &dev_attr_event_dimm1_syndrome.attr, 498 + &dev_attr_event_dimm2_syndrome.attr, 499 + &dev_attr_event_dimm3_syndrome.attr, 500 + &dev_attr_event_dimm4_syndrome.attr, 501 + &dev_attr_event_dimm5_syndrome.attr, 502 + &dev_attr_event_dimm6_syndrome.attr, 503 + &dev_attr_event_dimm7_syndrome.attr, 504 + &dev_attr_event_dimm8_syndrome.attr, 505 + &dev_attr_event_dimm9_syndrome.attr, 506 + &dev_attr_event_dimm10_syndrome.attr, 507 + &dev_attr_event_dimm11_syndrome.attr, 508 + &dev_attr_event_dimm12_syndrome.attr, 509 + &dev_attr_event_dimm13_syndrome.attr, 510 + &dev_attr_event_dimm14_syndrome.attr, 511 + &dev_attr_event_dimm15_syndrome.attr, 557 512 NULL 558 513 }; 559 514