Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types

Add the (HWID, MCATYPE) tuples and names for new SMCA bank types.

Also, add their respective error descriptions to the MCE decoding module
edac_mce_amd. Also while at it, optimize the string names for some SMCA
banks.

[ bp: Drop repeated comments, explain why UMC_V2 is a separate entry. ]

Signed-off-by: Muralidhara M K <muralimk@amd.com>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Link: https://lkml.kernel.org/r/20210526164601.66228-1-nchatrad@amd.com

authored by

Muralidhara M K and committed by
Borislav Petkov
94a311ce c4681547

+113 -25
+9 -4
arch/x86/include/asm/mce.h
··· 305 305 /* These may be used by multiple smca_hwid_mcatypes */ 306 306 enum smca_bank_types { 307 307 SMCA_LS = 0, /* Load Store */ 308 - SMCA_LS_V2, /* Load Store */ 308 + SMCA_LS_V2, 309 309 SMCA_IF, /* Instruction Fetch */ 310 310 SMCA_L2_CACHE, /* L2 Cache */ 311 311 SMCA_DE, /* Decoder Unit */ ··· 314 314 SMCA_FP, /* Floating Point */ 315 315 SMCA_L3_CACHE, /* L3 Cache */ 316 316 SMCA_CS, /* Coherent Slave */ 317 - SMCA_CS_V2, /* Coherent Slave */ 317 + SMCA_CS_V2, 318 318 SMCA_PIE, /* Power, Interrupts, etc. */ 319 319 SMCA_UMC, /* Unified Memory Controller */ 320 + SMCA_UMC_V2, 320 321 SMCA_PB, /* Parameter Block */ 321 322 SMCA_PSP, /* Platform Security Processor */ 322 - SMCA_PSP_V2, /* Platform Security Processor */ 323 + SMCA_PSP_V2, 323 324 SMCA_SMU, /* System Management Unit */ 324 - SMCA_SMU_V2, /* System Management Unit */ 325 + SMCA_SMU_V2, 325 326 SMCA_MP5, /* Microprocessor 5 Unit */ 326 327 SMCA_NBIO, /* Northbridge IO Unit */ 327 328 SMCA_PCIE, /* PCI Express Unit */ 329 + SMCA_PCIE_V2, 330 + SMCA_XGMI_PCS, /* xGMI PCS Unit */ 331 + SMCA_XGMI_PHY, /* xGMI PHY Unit */ 332 + SMCA_WAFL_PHY, /* WAFL PHY Unit */ 328 333 N_SMCA_BANK_TYPES 329 334 }; 330 335
+34 -21
arch/x86/kernel/cpu/mce/amd.c
··· 77 77 }; 78 78 79 79 static struct smca_bank_name smca_names[] = { 80 - [SMCA_LS] = { "load_store", "Load Store Unit" }, 81 - [SMCA_LS_V2] = { "load_store", "Load Store Unit" }, 82 - [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, 83 - [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, 84 - [SMCA_DE] = { "decode_unit", "Decode Unit" }, 85 - [SMCA_RESERVED] = { "reserved", "Reserved" }, 86 - [SMCA_EX] = { "execution_unit", "Execution Unit" }, 87 - [SMCA_FP] = { "floating_point", "Floating Point Unit" }, 88 - [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, 89 - [SMCA_CS] = { "coherent_slave", "Coherent Slave" }, 90 - [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, 91 - [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, 92 - [SMCA_UMC] = { "umc", "Unified Memory Controller" }, 93 - [SMCA_PB] = { "param_block", "Parameter Block" }, 94 - [SMCA_PSP] = { "psp", "Platform Security Processor" }, 95 - [SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, 96 - [SMCA_SMU] = { "smu", "System Management Unit" }, 97 - [SMCA_SMU_V2] = { "smu", "System Management Unit" }, 98 - [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, 99 - [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, 100 - [SMCA_PCIE] = { "pcie", "PCI Express Unit" }, 80 + [SMCA_LS ... SMCA_LS_V2] = { "load_store", "Load Store Unit" }, 81 + [SMCA_IF] = { "insn_fetch", "Instruction Fetch Unit" }, 82 + [SMCA_L2_CACHE] = { "l2_cache", "L2 Cache" }, 83 + [SMCA_DE] = { "decode_unit", "Decode Unit" }, 84 + [SMCA_RESERVED] = { "reserved", "Reserved" }, 85 + [SMCA_EX] = { "execution_unit", "Execution Unit" }, 86 + [SMCA_FP] = { "floating_point", "Floating Point Unit" }, 87 + [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, 88 + [SMCA_CS ... SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" }, 89 + [SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, 90 + 91 + /* UMC v2 is separate because both of them can exist in a single system. */ 92 + [SMCA_UMC] = { "umc", "Unified Memory Controller" }, 93 + [SMCA_UMC_V2] = { "umc_v2", "Unified Memory Controller v2" }, 94 + [SMCA_PB] = { "param_block", "Parameter Block" }, 95 + [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, 96 + [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" }, 97 + [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, 98 + [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, 99 + [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" }, 100 + [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" }, 101 + [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" }, 102 + [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" }, 101 103 }; 102 104 103 105 static const char *smca_get_name(enum smca_bank_types t) ··· 157 155 158 156 /* Unified Memory Controller MCA type */ 159 157 { SMCA_UMC, HWID_MCATYPE(0x96, 0x0) }, 158 + { SMCA_UMC_V2, HWID_MCATYPE(0x96, 0x1) }, 160 159 161 160 /* Parameter Block MCA type */ 162 161 { SMCA_PB, HWID_MCATYPE(0x05, 0x0) }, ··· 178 175 179 176 /* PCI Express Unit MCA type */ 180 177 { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) }, 178 + { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) }, 179 + 180 + /* xGMI PCS MCA type */ 181 + { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) }, 182 + 183 + /* xGMI PHY MCA type */ 184 + { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, 185 + 186 + /* WAFL PHY MCA type */ 187 + { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, 181 188 }; 182 189 183 190 struct smca_bank smca_banks[MAX_NR_BANKS];
+70
drivers/edac/mce_amd.c
··· 323 323 "AES SRAM ECC error", 324 324 }; 325 325 326 + static const char * const smca_umc2_mce_desc[] = { 327 + "DRAM ECC error", 328 + "Data poison error", 329 + "SDP parity error", 330 + "Reserved", 331 + "Address/Command parity error", 332 + "Write data parity error", 333 + "DCQ SRAM ECC error", 334 + "Reserved", 335 + "Read data parity error", 336 + "Rdb SRAM ECC error", 337 + "RdRsp SRAM ECC error", 338 + "LM32 MP errors", 339 + }; 340 + 326 341 static const char * const smca_pb_mce_desc[] = { 327 342 "An ECC error in the Parameter Block RAM array", 328 343 }; ··· 415 400 "CCIX Non-okay write response with data error", 416 401 }; 417 402 403 + static const char * const smca_pcie2_mce_desc[] = { 404 + "SDP Parity Error logging", 405 + }; 406 + 407 + static const char * const smca_xgmipcs_mce_desc[] = { 408 + "Data Loss Error", 409 + "Training Error", 410 + "Flow Control Acknowledge Error", 411 + "Rx Fifo Underflow Error", 412 + "Rx Fifo Overflow Error", 413 + "CRC Error", 414 + "BER Exceeded Error", 415 + "Tx Vcid Data Error", 416 + "Replay Buffer Parity Error", 417 + "Data Parity Error", 418 + "Replay Fifo Overflow Error", 419 + "Replay FIfo Underflow Error", 420 + "Elastic Fifo Overflow Error", 421 + "Deskew Error", 422 + "Flow Control CRC Error", 423 + "Data Startup Limit Error", 424 + "FC Init Timeout Error", 425 + "Recovery Timeout Error", 426 + "Ready Serial Timeout Error", 427 + "Ready Serial Attempt Error", 428 + "Recovery Attempt Error", 429 + "Recovery Relock Attempt Error", 430 + "Replay Attempt Error", 431 + "Sync Header Error", 432 + "Tx Replay Timeout Error", 433 + "Rx Replay Timeout Error", 434 + "LinkSub Tx Timeout Error", 435 + "LinkSub Rx Timeout Error", 436 + "Rx CMD Pocket Error", 437 + }; 438 + 439 + static const char * const smca_xgmiphy_mce_desc[] = { 440 + "RAM ECC Error", 441 + "ARC instruction buffer parity error", 442 + "ARC data buffer parity error", 443 + "PHY APB error", 444 + }; 445 + 446 + static const char * const smca_waflphy_mce_desc[] = { 447 + "RAM ECC Error", 448 + "ARC instruction buffer parity error", 449 + "ARC data buffer parity error", 450 + "PHY APB error", 451 + }; 452 + 418 453 struct smca_mce_desc { 419 454 const char * const *descs; 420 455 unsigned int num_descs; ··· 483 418 [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) }, 484 419 [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, 485 420 [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, 421 + [SMCA_UMC_V2] = { smca_umc2_mce_desc, ARRAY_SIZE(smca_umc2_mce_desc) }, 486 422 [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, 487 423 [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, 488 424 [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) }, ··· 492 426 [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, 493 427 [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) }, 494 428 [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) }, 429 + [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, 430 + [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, 431 + [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, 432 + [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, 495 433 }; 496 434 497 435 static bool f12h_mc0_mce(u16 ec, u8 xec)