Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI/AER: Convert aer_get_device_error_info(), aer_print_error() to index

Previously aer_get_device_error_info() and aer_print_error() took a pointer
to struct aer_err_info and a pointer to a pci_dev. Typically the pci_dev
was one of the elements of the aer_err_info.dev[] array (DPC was an
exception, where the dev[] array was unused).

Convert aer_get_device_error_info() and aer_print_error() to take an index
into the aer_err_info.dev[] array instead. A future patch will add
per-device ratelimit information, so the index makes it convenient to find
the ratelimit associated with the device.

To accommodate DPC, set info->dev[0] to the DPC port before using these
interfaces.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: https://patch.msgid.link/20250522232339.1525671-17-helgaas@kernel.org

+31 -14
+2 -2
drivers/pci/pci.h
··· 605 605 struct pcie_tlp_log tlp; /* TLP Header */ 606 606 }; 607 607 608 - int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); 609 - void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); 608 + int aer_get_device_error_info(struct aer_err_info *info, int i); 609 + void aer_print_error(struct aer_err_info *info, int i); 610 610 611 611 int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, 612 612 unsigned int tlp_len, bool flit,
+23 -10
drivers/pci/pcie/aer.c
··· 705 705 found ? "" : " (no details found"); 706 706 } 707 707 708 - void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 708 + void aer_print_error(struct aer_err_info *info, int i) 709 709 { 710 - int layer, agent; 711 - int id = pci_dev_id(dev); 710 + struct pci_dev *dev; 711 + int layer, agent, id; 712 712 const char *level = info->level; 713 + 714 + if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) 715 + return; 716 + 717 + dev = info->dev[i]; 718 + id = pci_dev_id(dev); 713 719 714 720 pci_dev_aer_stats_incr(dev, info); 715 721 trace_aer_event(pci_name(dev), (info->status & ~info->mask), ··· 1199 1193 1200 1194 /** 1201 1195 * aer_get_device_error_info - read error status from dev and store it to info 1202 - * @dev: pointer to the device expected to have an error record 1203 1196 * @info: pointer to structure to store the error record 1197 + * @i: index into info->dev[] 1204 1198 * 1205 1199 * Return: 1 on success, 0 on error. 1206 1200 * 1207 1201 * Note that @info is reused among all error devices. Clear fields properly. 1208 1202 */ 1209 - int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) 1203 + int aer_get_device_error_info(struct aer_err_info *info, int i) 1210 1204 { 1211 - int type = pci_pcie_type(dev); 1212 - int aer = dev->aer_cap; 1205 + struct pci_dev *dev; 1206 + int type, aer; 1213 1207 u32 aercc; 1208 + 1209 + if (i >= AER_MAX_MULTI_ERR_DEVICES) 1210 + return 0; 1211 + 1212 + dev = info->dev[i]; 1213 + aer = dev->aer_cap; 1214 + type = pci_pcie_type(dev); 1214 1215 1215 1216 /* Must reset in this function */ 1216 1217 info->status = 0; ··· 1270 1257 1271 1258 /* Report all before handling them, to not lose records by reset etc. */ 1272 1259 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1273 - if (aer_get_device_error_info(e_info->dev[i], e_info)) 1274 - aer_print_error(e_info->dev[i], e_info); 1260 + if (aer_get_device_error_info(e_info, i)) 1261 + aer_print_error(e_info, i); 1275 1262 } 1276 1263 for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { 1277 - if (aer_get_device_error_info(e_info->dev[i], e_info)) 1264 + if (aer_get_device_error_info(e_info, i)) 1278 1265 handle_error_source(e_info->dev[i], e_info); 1279 1266 } 1280 1267 }
+6 -2
drivers/pci/pcie/dpc.c
··· 253 253 info->severity = AER_NONFATAL; 254 254 255 255 info->level = KERN_ERR; 256 + 257 + info->dev[0] = dev; 258 + info->error_dev_num = 1; 259 + 256 260 return 1; 257 261 } 258 262 ··· 274 270 pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n", 275 271 status); 276 272 if (dpc_get_aer_uncorrect_severity(pdev, &info) && 277 - aer_get_device_error_info(pdev, &info)) { 278 - aer_print_error(pdev, &info); 273 + aer_get_device_error_info(&info, 0)) { 274 + aer_print_error(&info, 0); 279 275 pci_aer_clear_nonfatal_status(pdev); 280 276 pci_aer_clear_fatal_status(pdev); 281 277 }