Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v6.2-rc3 529 lines 14 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Ampere Computing SoC's SMpro Error Monitoring Driver 4 * 5 * Copyright (c) 2022, Ampere Computing LLC 6 * 7 */ 8 9#include <linux/i2c.h> 10#include <linux/mod_devicetable.h> 11#include <linux/module.h> 12#include <linux/platform_device.h> 13#include <linux/regmap.h> 14 15/* GPI RAS Error Registers */ 16#define GPI_RAS_ERR 0x7E 17 18/* Core and L2C Error Registers */ 19#define CORE_CE_ERR_CNT 0x80 20#define CORE_CE_ERR_LEN 0x81 21#define CORE_CE_ERR_DATA 0x82 22#define CORE_UE_ERR_CNT 0x83 23#define CORE_UE_ERR_LEN 0x84 24#define CORE_UE_ERR_DATA 0x85 25 26/* Memory Error Registers */ 27#define MEM_CE_ERR_CNT 0x90 28#define MEM_CE_ERR_LEN 0x91 29#define MEM_CE_ERR_DATA 0x92 30#define MEM_UE_ERR_CNT 0x93 31#define MEM_UE_ERR_LEN 0x94 32#define MEM_UE_ERR_DATA 0x95 33 34/* RAS Error/Warning Registers */ 35#define ERR_SMPRO_TYPE 0xA0 36#define ERR_PMPRO_TYPE 0xA1 37#define ERR_SMPRO_INFO_LO 0xA2 38#define ERR_SMPRO_INFO_HI 0xA3 39#define ERR_SMPRO_DATA_LO 0xA4 40#define ERR_SMPRO_DATA_HI 0xA5 41#define WARN_SMPRO_INFO_LO 0xAA 42#define WARN_SMPRO_INFO_HI 0xAB 43#define ERR_PMPRO_INFO_LO 0xA6 44#define ERR_PMPRO_INFO_HI 0xA7 45#define ERR_PMPRO_DATA_LO 0xA8 46#define ERR_PMPRO_DATA_HI 0xA9 47#define WARN_PMPRO_INFO_LO 0xAC 48#define WARN_PMPRO_INFO_HI 0xAD 49 50/* PCIE Error Registers */ 51#define PCIE_CE_ERR_CNT 0xC0 52#define PCIE_CE_ERR_LEN 0xC1 53#define PCIE_CE_ERR_DATA 0xC2 54#define PCIE_UE_ERR_CNT 0xC3 55#define PCIE_UE_ERR_LEN 0xC4 56#define PCIE_UE_ERR_DATA 0xC5 57 58/* Other Error Registers */ 59#define OTHER_CE_ERR_CNT 0xD0 60#define OTHER_CE_ERR_LEN 0xD1 61#define OTHER_CE_ERR_DATA 0xD2 62#define OTHER_UE_ERR_CNT 0xD8 63#define OTHER_UE_ERR_LEN 0xD9 64#define OTHER_UE_ERR_DATA 0xDA 65 66/* Event Data Registers */ 67#define VRD_WARN_FAULT_EVENT_DATA 0x78 68#define VRD_HOT_EVENT_DATA 0x79 69#define DIMM_HOT_EVENT_DATA 0x7A 70 71#define MAX_READ_BLOCK_LENGTH 48 72 73#define RAS_SMPRO_ERR 0 74#define RAS_PMPRO_ERR 1 75 76enum RAS_48BYTES_ERR_TYPES { 77 CORE_CE_ERR, 78 CORE_UE_ERR, 79 MEM_CE_ERR, 80 MEM_UE_ERR, 81 PCIE_CE_ERR, 82 PCIE_UE_ERR, 83 OTHER_CE_ERR, 84 OTHER_UE_ERR, 85 NUM_48BYTES_ERR_TYPE, 86}; 87 88struct smpro_error_hdr { 89 u8 count; /* Number of the RAS errors */ 90 u8 len; /* Number of data bytes */ 91 u8 data; /* Start of 48-byte data */ 92 u8 max_cnt; /* Max num of errors */ 93}; 94 95/* 96 * Included Address of registers to get Count, Length of data and Data 97 * of the 48 bytes error data 98 */ 99static struct smpro_error_hdr smpro_error_table[] = { 100 [CORE_CE_ERR] = { 101 .count = CORE_CE_ERR_CNT, 102 .len = CORE_CE_ERR_LEN, 103 .data = CORE_CE_ERR_DATA, 104 .max_cnt = 32 105 }, 106 [CORE_UE_ERR] = { 107 .count = CORE_UE_ERR_CNT, 108 .len = CORE_UE_ERR_LEN, 109 .data = CORE_UE_ERR_DATA, 110 .max_cnt = 32 111 }, 112 [MEM_CE_ERR] = { 113 .count = MEM_CE_ERR_CNT, 114 .len = MEM_CE_ERR_LEN, 115 .data = MEM_CE_ERR_DATA, 116 .max_cnt = 16 117 }, 118 [MEM_UE_ERR] = { 119 .count = MEM_UE_ERR_CNT, 120 .len = MEM_UE_ERR_LEN, 121 .data = MEM_UE_ERR_DATA, 122 .max_cnt = 16 123 }, 124 [PCIE_CE_ERR] = { 125 .count = PCIE_CE_ERR_CNT, 126 .len = PCIE_CE_ERR_LEN, 127 .data = PCIE_CE_ERR_DATA, 128 .max_cnt = 96 129 }, 130 [PCIE_UE_ERR] = { 131 .count = PCIE_UE_ERR_CNT, 132 .len = PCIE_UE_ERR_LEN, 133 .data = PCIE_UE_ERR_DATA, 134 .max_cnt = 96 135 }, 136 [OTHER_CE_ERR] = { 137 .count = OTHER_CE_ERR_CNT, 138 .len = OTHER_CE_ERR_LEN, 139 .data = OTHER_CE_ERR_DATA, 140 .max_cnt = 8 141 }, 142 [OTHER_UE_ERR] = { 143 .count = OTHER_UE_ERR_CNT, 144 .len = OTHER_UE_ERR_LEN, 145 .data = OTHER_UE_ERR_DATA, 146 .max_cnt = 8 147 }, 148}; 149 150/* 151 * List of SCP registers which are used to get 152 * one type of RAS Internal errors. 153 */ 154struct smpro_int_error_hdr { 155 u8 type; 156 u8 info_l; 157 u8 info_h; 158 u8 data_l; 159 u8 data_h; 160 u8 warn_l; 161 u8 warn_h; 162}; 163 164static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = { 165 [RAS_SMPRO_ERR] = { 166 .type = ERR_SMPRO_TYPE, 167 .info_l = ERR_SMPRO_INFO_LO, 168 .info_h = ERR_SMPRO_INFO_HI, 169 .data_l = ERR_SMPRO_DATA_LO, 170 .data_h = ERR_SMPRO_DATA_HI, 171 .warn_l = WARN_SMPRO_INFO_LO, 172 .warn_h = WARN_SMPRO_INFO_HI, 173 }, 174 [RAS_PMPRO_ERR] = { 175 .type = ERR_PMPRO_TYPE, 176 .info_l = ERR_PMPRO_INFO_LO, 177 .info_h = ERR_PMPRO_INFO_HI, 178 .data_l = ERR_PMPRO_DATA_LO, 179 .data_h = ERR_PMPRO_DATA_HI, 180 .warn_l = WARN_PMPRO_INFO_LO, 181 .warn_h = WARN_PMPRO_INFO_HI, 182 }, 183}; 184 185struct smpro_errmon { 186 struct regmap *regmap; 187}; 188 189enum EVENT_TYPES { 190 VRD_WARN_FAULT_EVENT, 191 VRD_HOT_EVENT, 192 DIMM_HOT_EVENT, 193 NUM_EVENTS_TYPE, 194}; 195 196/* Included Address of event source and data registers */ 197static u8 smpro_event_table[NUM_EVENTS_TYPE] = { 198 VRD_WARN_FAULT_EVENT_DATA, 199 VRD_HOT_EVENT_DATA, 200 DIMM_HOT_EVENT_DATA, 201}; 202 203static ssize_t smpro_event_data_read(struct device *dev, 204 struct device_attribute *da, char *buf, 205 int channel) 206{ 207 struct smpro_errmon *errmon = dev_get_drvdata(dev); 208 s32 event_data; 209 int ret; 210 211 ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data); 212 if (ret) 213 return ret; 214 /* Clear event after read */ 215 if (event_data != 0) 216 regmap_write(errmon->regmap, smpro_event_table[channel], event_data); 217 218 return sysfs_emit(buf, "%04x\n", event_data); 219} 220 221static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da, 222 char *buf, int channel) 223{ 224 struct smpro_errmon *errmon = dev_get_drvdata(dev); 225 struct smpro_error_hdr *err_info; 226 s32 err_count; 227 int ret; 228 229 err_info = &smpro_error_table[channel]; 230 231 ret = regmap_read(errmon->regmap, err_info->count, &err_count); 232 if (ret) 233 return ret; 234 235 /* Bit 8 indicates the overflow status */ 236 return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0); 237} 238 239static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da, 240 char *buf, int channel) 241{ 242 struct smpro_errmon *errmon = dev_get_drvdata(dev); 243 unsigned char err_data[MAX_READ_BLOCK_LENGTH]; 244 struct smpro_error_hdr *err_info; 245 s32 err_count, err_length; 246 int ret; 247 248 err_info = &smpro_error_table[channel]; 249 250 ret = regmap_read(errmon->regmap, err_info->count, &err_count); 251 /* Error count is the low byte */ 252 err_count &= 0xff; 253 if (ret || !err_count || err_count > err_info->max_cnt) 254 return ret; 255 256 ret = regmap_read(errmon->regmap, err_info->len, &err_length); 257 if (ret || err_length <= 0) 258 return ret; 259 260 if (err_length > MAX_READ_BLOCK_LENGTH) 261 err_length = MAX_READ_BLOCK_LENGTH; 262 263 memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH); 264 ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length); 265 if (ret < 0) 266 return ret; 267 268 /* clear the error */ 269 ret = regmap_write(errmon->regmap, err_info->count, 0x100); 270 if (ret) 271 return ret; 272 /* 273 * The output of Core/Memory/PCIe/Others UE/CE errors follows the format 274 * specified in section 5.8.1 CE/UE Error Data record in 275 * Altra SOC BMC Interface specification. 276 */ 277 return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data); 278} 279 280/* 281 * Output format: 282 * <4-byte hex value of error info><4-byte hex value of error extensive data> 283 * Where: 284 * + error info : The error information 285 * + error data : Extensive data (32 bits) 286 * Reference to section 5.10 RAS Internal Error Register Definition in 287 * Altra SOC BMC Interface specification 288 */ 289static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da, 290 char *buf, int channel) 291{ 292 struct smpro_errmon *errmon = dev_get_drvdata(dev); 293 struct smpro_int_error_hdr *err_info; 294 unsigned int err[4] = { 0 }; 295 unsigned int err_type; 296 unsigned int val; 297 int ret; 298 299 /* read error status */ 300 ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val); 301 if (ret) 302 return ret; 303 304 if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) || 305 (channel == RAS_PMPRO_ERR && !(val & BIT(1)))) 306 return 0; 307 308 err_info = &list_smpro_int_error_hdr[channel]; 309 ret = regmap_read(errmon->regmap, err_info->type, &val); 310 if (ret) 311 return ret; 312 313 err_type = (val & BIT(1)) ? BIT(1) : 314 (val & BIT(2)) ? BIT(2) : 0; 315 316 if (!err_type) 317 return 0; 318 319 ret = regmap_read(errmon->regmap, err_info->info_l, err + 1); 320 if (ret) 321 return ret; 322 323 ret = regmap_read(errmon->regmap, err_info->info_h, err); 324 if (ret) 325 return ret; 326 327 if (err_type & BIT(2)) { 328 /* Error with data type */ 329 ret = regmap_read(errmon->regmap, err_info->data_l, err + 3); 330 if (ret) 331 return ret; 332 333 ret = regmap_read(errmon->regmap, err_info->data_h, err + 2); 334 if (ret) 335 return ret; 336 } 337 338 /* clear the read errors */ 339 ret = regmap_write(errmon->regmap, err_info->type, err_type); 340 if (ret) 341 return ret; 342 343 return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err); 344} 345 346/* 347 * Output format: 348 * <4-byte hex value of warining info> 349 * Reference to section 5.10 RAS Internal Error Register Definition in 350 * Altra SOC BMC Interface specification 351 */ 352static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da, 353 char *buf, int channel) 354{ 355 struct smpro_errmon *errmon = dev_get_drvdata(dev); 356 struct smpro_int_error_hdr *err_info; 357 unsigned int warn[2] = { 0 }; 358 unsigned int val; 359 int ret; 360 361 /* read error status */ 362 ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val); 363 if (ret) 364 return ret; 365 366 if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) || 367 (channel == RAS_PMPRO_ERR && !(val & BIT(1)))) 368 return 0; 369 370 err_info = &list_smpro_int_error_hdr[channel]; 371 ret = regmap_read(errmon->regmap, err_info->type, &val); 372 if (ret) 373 return ret; 374 375 if (!(val & BIT(0))) 376 return 0; 377 378 ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1); 379 if (ret) 380 return ret; 381 382 ret = regmap_read(errmon->regmap, err_info->warn_h, warn); 383 if (ret) 384 return ret; 385 386 /* clear the warning */ 387 ret = regmap_write(errmon->regmap, err_info->type, BIT(0)); 388 if (ret) 389 return ret; 390 391 return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn); 392} 393 394#define ERROR_OVERFLOW_RO(_error, _index) \ 395 static ssize_t overflow_##_error##_show(struct device *dev, \ 396 struct device_attribute *da, \ 397 char *buf) \ 398 { \ 399 return smpro_overflow_data_read(dev, da, buf, _index); \ 400 } \ 401 static DEVICE_ATTR_RO(overflow_##_error) 402 403ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR); 404ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR); 405ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR); 406ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR); 407ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR); 408ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR); 409ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR); 410ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR); 411 412#define ERROR_RO(_error, _index) \ 413 static ssize_t error_##_error##_show(struct device *dev, \ 414 struct device_attribute *da, \ 415 char *buf) \ 416 { \ 417 return smpro_error_data_read(dev, da, buf, _index); \ 418 } \ 419 static DEVICE_ATTR_RO(error_##_error) 420 421ERROR_RO(core_ce, CORE_CE_ERR); 422ERROR_RO(core_ue, CORE_UE_ERR); 423ERROR_RO(mem_ce, MEM_CE_ERR); 424ERROR_RO(mem_ue, MEM_UE_ERR); 425ERROR_RO(pcie_ce, PCIE_CE_ERR); 426ERROR_RO(pcie_ue, PCIE_UE_ERR); 427ERROR_RO(other_ce, OTHER_CE_ERR); 428ERROR_RO(other_ue, OTHER_UE_ERR); 429 430static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf) 431{ 432 return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR); 433} 434static DEVICE_ATTR_RO(error_smpro); 435 436static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf) 437{ 438 return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR); 439} 440static DEVICE_ATTR_RO(error_pmpro); 441 442static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf) 443{ 444 return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR); 445} 446static DEVICE_ATTR_RO(warn_smpro); 447 448static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf) 449{ 450 return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR); 451} 452static DEVICE_ATTR_RO(warn_pmpro); 453 454#define EVENT_RO(_event, _index) \ 455 static ssize_t event_##_event##_show(struct device *dev, \ 456 struct device_attribute *da, \ 457 char *buf) \ 458 { \ 459 return smpro_event_data_read(dev, da, buf, _index); \ 460 } \ 461 static DEVICE_ATTR_RO(event_##_event) 462 463EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT); 464EVENT_RO(vrd_hot, VRD_HOT_EVENT); 465EVENT_RO(dimm_hot, DIMM_HOT_EVENT); 466 467static struct attribute *smpro_errmon_attrs[] = { 468 &dev_attr_overflow_core_ce.attr, 469 &dev_attr_overflow_core_ue.attr, 470 &dev_attr_overflow_mem_ce.attr, 471 &dev_attr_overflow_mem_ue.attr, 472 &dev_attr_overflow_pcie_ce.attr, 473 &dev_attr_overflow_pcie_ue.attr, 474 &dev_attr_overflow_other_ce.attr, 475 &dev_attr_overflow_other_ue.attr, 476 &dev_attr_error_core_ce.attr, 477 &dev_attr_error_core_ue.attr, 478 &dev_attr_error_mem_ce.attr, 479 &dev_attr_error_mem_ue.attr, 480 &dev_attr_error_pcie_ce.attr, 481 &dev_attr_error_pcie_ue.attr, 482 &dev_attr_error_other_ce.attr, 483 &dev_attr_error_other_ue.attr, 484 &dev_attr_error_smpro.attr, 485 &dev_attr_error_pmpro.attr, 486 &dev_attr_warn_smpro.attr, 487 &dev_attr_warn_pmpro.attr, 488 &dev_attr_event_vrd_warn_fault.attr, 489 &dev_attr_event_vrd_hot.attr, 490 &dev_attr_event_dimm_hot.attr, 491 NULL 492}; 493 494ATTRIBUTE_GROUPS(smpro_errmon); 495 496static int smpro_errmon_probe(struct platform_device *pdev) 497{ 498 struct smpro_errmon *errmon; 499 500 errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL); 501 if (!errmon) 502 return -ENOMEM; 503 504 platform_set_drvdata(pdev, errmon); 505 506 errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL); 507 if (!errmon->regmap) 508 return -ENODEV; 509 510 return 0; 511} 512 513static struct platform_driver smpro_errmon_driver = { 514 .probe = smpro_errmon_probe, 515 .driver = { 516 .name = "smpro-errmon", 517 .dev_groups = smpro_errmon_groups, 518 }, 519}; 520 521module_platform_driver(smpro_errmon_driver); 522 523MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>"); 524MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>"); 525MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>"); 526MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>"); 527MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>"); 528MODULE_DESCRIPTION("Ampere Altra SMpro driver"); 529MODULE_LICENSE("GPL");