Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'remotes/lorenzo/pci/apei'

- Add ACPI APEI notifier chain for unknown (vendor) CPER records (Shiju
Jose)

- Add handling of HiSilicon HIP PCIe controller errors (Yicong Yang)

* remotes/lorenzo/pci/apei:
PCI: hip: Add handling of HiSilicon HIP PCIe controller errors
ACPI / APEI: Add a notifier chain for unknown (vendor) CPER records

+416
+63
drivers/acpi/apei/ghes.c
··· 79 79 ((struct acpi_hest_generic_status *) \ 80 80 ((struct ghes_estatus_node *)(estatus_node) + 1)) 81 81 82 + #define GHES_VENDOR_ENTRY_LEN(gdata_len) \ 83 + (sizeof(struct ghes_vendor_record_entry) + (gdata_len)) 84 + #define GHES_GDATA_FROM_VENDOR_ENTRY(vendor_entry) \ 85 + ((struct acpi_hest_generic_data *) \ 86 + ((struct ghes_vendor_record_entry *)(vendor_entry) + 1)) 87 + 82 88 /* 83 89 * NMI-like notifications vary by architecture, before the compiler can prune 84 90 * unused static functions it needs a value for these enums. ··· 128 122 * simultaneously. 129 123 */ 130 124 static DEFINE_SPINLOCK(ghes_notify_lock_irq); 125 + 126 + struct ghes_vendor_record_entry { 127 + struct work_struct work; 128 + int error_severity; 129 + char vendor_record[]; 130 + }; 131 131 132 132 static struct gen_pool *ghes_estatus_pool; 133 133 static unsigned long ghes_estatus_pool_size_request; ··· 523 511 #endif 524 512 } 525 513 514 + static BLOCKING_NOTIFIER_HEAD(vendor_record_notify_list); 515 + 516 + int ghes_register_vendor_record_notifier(struct notifier_block *nb) 517 + { 518 + return blocking_notifier_chain_register(&vendor_record_notify_list, nb); 519 + } 520 + EXPORT_SYMBOL_GPL(ghes_register_vendor_record_notifier); 521 + 522 + void ghes_unregister_vendor_record_notifier(struct notifier_block *nb) 523 + { 524 + blocking_notifier_chain_unregister(&vendor_record_notify_list, nb); 525 + } 526 + EXPORT_SYMBOL_GPL(ghes_unregister_vendor_record_notifier); 527 + 528 + static void ghes_vendor_record_work_func(struct work_struct *work) 529 + { 530 + struct ghes_vendor_record_entry *entry; 531 + struct acpi_hest_generic_data *gdata; 532 + u32 len; 533 + 534 + entry = container_of(work, struct ghes_vendor_record_entry, work); 535 + gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); 536 + 537 + blocking_notifier_call_chain(&vendor_record_notify_list, 538 + entry->error_severity, gdata); 539 + 540 + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); 541 + gen_pool_free(ghes_estatus_pool, (unsigned long)entry, len); 542 + } 543 + 544 + static void ghes_defer_non_standard_event(struct acpi_hest_generic_data *gdata, 545 + int sev) 546 + { 547 + struct acpi_hest_generic_data *copied_gdata; 548 + struct ghes_vendor_record_entry *entry; 549 + u32 len; 550 + 551 + len = GHES_VENDOR_ENTRY_LEN(acpi_hest_get_record_size(gdata)); 552 + entry = (void *)gen_pool_alloc(ghes_estatus_pool, len); 553 + if (!entry) 554 + return; 555 + 556 + copied_gdata = GHES_GDATA_FROM_VENDOR_ENTRY(entry); 557 + memcpy(copied_gdata, gdata, acpi_hest_get_record_size(gdata)); 558 + entry->error_severity = sev; 559 + 560 + INIT_WORK(&entry->work, ghes_vendor_record_work_func); 561 + schedule_work(&entry->work); 562 + } 563 + 526 564 static bool ghes_do_proc(struct ghes *ghes, 527 565 const struct acpi_hest_generic_status *estatus) 528 566 { ··· 611 549 } else { 612 550 void *err = acpi_hest_get_payload(gdata); 613 551 552 + ghes_defer_non_standard_event(gdata, sev); 614 553 log_non_standard_event(sec_type, fru_id, fru_text, 615 554 sec_sev, err, 616 555 gdata->error_data_length);
+7
drivers/pci/controller/Kconfig
··· 294 294 Say Y here if you want to enable PCI controller support on 295 295 Loongson systems. 296 296 297 + config PCIE_HISI_ERR 298 + depends on ACPI_APEI_GHES && (ARM64 || COMPILE_TEST) 299 + bool "HiSilicon HIP PCIe controller error handling driver" 300 + help 301 + Say Y here if you want error handling support 302 + for the PCIe controller's errors on HiSilicon HIP SoCs 303 + 297 304 source "drivers/pci/controller/dwc/Kconfig" 298 305 source "drivers/pci/controller/mobiveil/Kconfig" 299 306 source "drivers/pci/controller/cadence/Kconfig"
+1
drivers/pci/controller/Makefile
··· 31 31 obj-$(CONFIG_VMD) += vmd.o 32 32 obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o 33 33 obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o 34 + obj-$(CONFIG_PCIE_HISI_ERR) += pcie-hisi-error.o 34 35 # pcie-hisi.o quirks are needed even without CONFIG_PCIE_DW 35 36 obj-y += dwc/ 36 37 obj-y += mobiveil/
+327
drivers/pci/controller/pcie-hisi-error.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Driver for handling the PCIe controller errors on 4 + * HiSilicon HIP SoCs. 5 + * 6 + * Copyright (c) 2020 HiSilicon Limited. 7 + */ 8 + 9 + #include <linux/acpi.h> 10 + #include <acpi/ghes.h> 11 + #include <linux/bitops.h> 12 + #include <linux/delay.h> 13 + #include <linux/pci.h> 14 + #include <linux/platform_device.h> 15 + #include <linux/kfifo.h> 16 + #include <linux/spinlock.h> 17 + 18 + /* HISI PCIe controller error definitions */ 19 + #define HISI_PCIE_ERR_MISC_REGS 33 20 + 21 + #define HISI_PCIE_LOCAL_VALID_VERSION BIT(0) 22 + #define HISI_PCIE_LOCAL_VALID_SOC_ID BIT(1) 23 + #define HISI_PCIE_LOCAL_VALID_SOCKET_ID BIT(2) 24 + #define HISI_PCIE_LOCAL_VALID_NIMBUS_ID BIT(3) 25 + #define HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID BIT(4) 26 + #define HISI_PCIE_LOCAL_VALID_CORE_ID BIT(5) 27 + #define HISI_PCIE_LOCAL_VALID_PORT_ID BIT(6) 28 + #define HISI_PCIE_LOCAL_VALID_ERR_TYPE BIT(7) 29 + #define HISI_PCIE_LOCAL_VALID_ERR_SEVERITY BIT(8) 30 + #define HISI_PCIE_LOCAL_VALID_ERR_MISC 9 31 + 32 + static guid_t hisi_pcie_sec_guid = 33 + GUID_INIT(0xB2889FC9, 0xE7D7, 0x4F9D, 34 + 0xA8, 0x67, 0xAF, 0x42, 0xE9, 0x8B, 0xE7, 0x72); 35 + 36 + /* 37 + * Firmware reports the socket port ID where the error occurred. These 38 + * macros convert that to the core ID and core port ID required by the 39 + * ACPI reset method. 40 + */ 41 + #define HISI_PCIE_PORT_ID(core, v) (((v) >> 1) + ((core) << 3)) 42 + #define HISI_PCIE_CORE_ID(v) ((v) >> 3) 43 + #define HISI_PCIE_CORE_PORT_ID(v) (((v) & 7) << 1) 44 + 45 + struct hisi_pcie_error_data { 46 + u64 val_bits; 47 + u8 version; 48 + u8 soc_id; 49 + u8 socket_id; 50 + u8 nimbus_id; 51 + u8 sub_module_id; 52 + u8 core_id; 53 + u8 port_id; 54 + u8 err_severity; 55 + u16 err_type; 56 + u8 reserv[2]; 57 + u32 err_misc[HISI_PCIE_ERR_MISC_REGS]; 58 + }; 59 + 60 + struct hisi_pcie_error_private { 61 + struct notifier_block nb; 62 + struct device *dev; 63 + }; 64 + 65 + enum hisi_pcie_submodule_id { 66 + HISI_PCIE_SUB_MODULE_ID_AP, 67 + HISI_PCIE_SUB_MODULE_ID_TL, 68 + HISI_PCIE_SUB_MODULE_ID_MAC, 69 + HISI_PCIE_SUB_MODULE_ID_DL, 70 + HISI_PCIE_SUB_MODULE_ID_SDI, 71 + }; 72 + 73 + static const char * const hisi_pcie_sub_module[] = { 74 + [HISI_PCIE_SUB_MODULE_ID_AP] = "AP Layer", 75 + [HISI_PCIE_SUB_MODULE_ID_TL] = "TL Layer", 76 + [HISI_PCIE_SUB_MODULE_ID_MAC] = "MAC Layer", 77 + [HISI_PCIE_SUB_MODULE_ID_DL] = "DL Layer", 78 + [HISI_PCIE_SUB_MODULE_ID_SDI] = "SDI Layer", 79 + }; 80 + 81 + enum hisi_pcie_err_severity { 82 + HISI_PCIE_ERR_SEV_RECOVERABLE, 83 + HISI_PCIE_ERR_SEV_FATAL, 84 + HISI_PCIE_ERR_SEV_CORRECTED, 85 + HISI_PCIE_ERR_SEV_NONE, 86 + }; 87 + 88 + static const char * const hisi_pcie_error_sev[] = { 89 + [HISI_PCIE_ERR_SEV_RECOVERABLE] = "recoverable", 90 + [HISI_PCIE_ERR_SEV_FATAL] = "fatal", 91 + [HISI_PCIE_ERR_SEV_CORRECTED] = "corrected", 92 + [HISI_PCIE_ERR_SEV_NONE] = "none", 93 + }; 94 + 95 + static const char *hisi_pcie_get_string(const char * const *array, 96 + size_t n, u32 id) 97 + { 98 + u32 index; 99 + 100 + for (index = 0; index < n; index++) { 101 + if (index == id && array[index]) 102 + return array[index]; 103 + } 104 + 105 + return "unknown"; 106 + } 107 + 108 + static int hisi_pcie_port_reset(struct platform_device *pdev, 109 + u32 chip_id, u32 port_id) 110 + { 111 + struct device *dev = &pdev->dev; 112 + acpi_handle handle = ACPI_HANDLE(dev); 113 + union acpi_object arg[3]; 114 + struct acpi_object_list arg_list; 115 + acpi_status s; 116 + unsigned long long data = 0; 117 + 118 + arg[0].type = ACPI_TYPE_INTEGER; 119 + arg[0].integer.value = chip_id; 120 + arg[1].type = ACPI_TYPE_INTEGER; 121 + arg[1].integer.value = HISI_PCIE_CORE_ID(port_id); 122 + arg[2].type = ACPI_TYPE_INTEGER; 123 + arg[2].integer.value = HISI_PCIE_CORE_PORT_ID(port_id); 124 + 125 + arg_list.count = 3; 126 + arg_list.pointer = arg; 127 + 128 + s = acpi_evaluate_integer(handle, "RST", &arg_list, &data); 129 + if (ACPI_FAILURE(s)) { 130 + dev_err(dev, "No RST method\n"); 131 + return -EIO; 132 + } 133 + 134 + if (data) { 135 + dev_err(dev, "Failed to Reset\n"); 136 + return -EIO; 137 + } 138 + 139 + return 0; 140 + } 141 + 142 + static int hisi_pcie_port_do_recovery(struct platform_device *dev, 143 + u32 chip_id, u32 port_id) 144 + { 145 + acpi_status s; 146 + struct device *device = &dev->dev; 147 + acpi_handle root_handle = ACPI_HANDLE(device); 148 + struct acpi_pci_root *pci_root; 149 + struct pci_bus *root_bus; 150 + struct pci_dev *pdev; 151 + u32 domain, busnr, devfn; 152 + 153 + s = acpi_get_parent(root_handle, &root_handle); 154 + if (ACPI_FAILURE(s)) 155 + return -ENODEV; 156 + pci_root = acpi_pci_find_root(root_handle); 157 + if (!pci_root) 158 + return -ENODEV; 159 + root_bus = pci_root->bus; 160 + domain = pci_root->segment; 161 + 162 + busnr = root_bus->number; 163 + devfn = PCI_DEVFN(port_id, 0); 164 + pdev = pci_get_domain_bus_and_slot(domain, busnr, devfn); 165 + if (!pdev) { 166 + dev_info(device, "Fail to get root port %04x:%02x:%02x.%d device\n", 167 + domain, busnr, PCI_SLOT(devfn), PCI_FUNC(devfn)); 168 + return -ENODEV; 169 + } 170 + 171 + pci_stop_and_remove_bus_device_locked(pdev); 172 + pci_dev_put(pdev); 173 + 174 + if (hisi_pcie_port_reset(dev, chip_id, port_id)) 175 + return -EIO; 176 + 177 + /* 178 + * The initialization time of subordinate devices after 179 + * hot reset is no more than 1s, which is required by 180 + * the PCI spec v5.0 sec 6.6.1. The time will shorten 181 + * if Readiness Notifications mechanisms are used. But 182 + * wait 1s here to adapt any conditions. 183 + */ 184 + ssleep(1UL); 185 + 186 + /* add root port and downstream devices */ 187 + pci_lock_rescan_remove(); 188 + pci_rescan_bus(root_bus); 189 + pci_unlock_rescan_remove(); 190 + 191 + return 0; 192 + } 193 + 194 + static void hisi_pcie_handle_error(struct platform_device *pdev, 195 + const struct hisi_pcie_error_data *edata) 196 + { 197 + struct device *dev = &pdev->dev; 198 + int idx, rc; 199 + const unsigned long valid_bits[] = {BITMAP_FROM_U64(edata->val_bits)}; 200 + 201 + if (edata->val_bits == 0) { 202 + dev_warn(dev, "%s: no valid error information\n", __func__); 203 + return; 204 + } 205 + 206 + dev_info(dev, "\nHISI : HIP : PCIe controller error\n"); 207 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOC_ID) 208 + dev_info(dev, "Table version = %d\n", edata->version); 209 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SOCKET_ID) 210 + dev_info(dev, "Socket ID = %d\n", edata->socket_id); 211 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_NIMBUS_ID) 212 + dev_info(dev, "Nimbus ID = %d\n", edata->nimbus_id); 213 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_SUB_MODULE_ID) 214 + dev_info(dev, "Sub Module = %s\n", 215 + hisi_pcie_get_string(hisi_pcie_sub_module, 216 + ARRAY_SIZE(hisi_pcie_sub_module), 217 + edata->sub_module_id)); 218 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_CORE_ID) 219 + dev_info(dev, "Core ID = core%d\n", edata->core_id); 220 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_PORT_ID) 221 + dev_info(dev, "Port ID = port%d\n", edata->port_id); 222 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_SEVERITY) 223 + dev_info(dev, "Error severity = %s\n", 224 + hisi_pcie_get_string(hisi_pcie_error_sev, 225 + ARRAY_SIZE(hisi_pcie_error_sev), 226 + edata->err_severity)); 227 + if (edata->val_bits & HISI_PCIE_LOCAL_VALID_ERR_TYPE) 228 + dev_info(dev, "Error type = 0x%x\n", edata->err_type); 229 + 230 + dev_info(dev, "Reg Dump:\n"); 231 + idx = HISI_PCIE_LOCAL_VALID_ERR_MISC; 232 + for_each_set_bit_from(idx, valid_bits, 233 + HISI_PCIE_LOCAL_VALID_ERR_MISC + HISI_PCIE_ERR_MISC_REGS) 234 + dev_info(dev, "ERR_MISC_%d = 0x%x\n", idx - HISI_PCIE_LOCAL_VALID_ERR_MISC, 235 + edata->err_misc[idx - HISI_PCIE_LOCAL_VALID_ERR_MISC]); 236 + 237 + if (edata->err_severity != HISI_PCIE_ERR_SEV_RECOVERABLE) 238 + return; 239 + 240 + /* Recovery for the PCIe controller errors, try reset 241 + * PCI port for the error recovery 242 + */ 243 + rc = hisi_pcie_port_do_recovery(pdev, edata->socket_id, 244 + HISI_PCIE_PORT_ID(edata->core_id, edata->port_id)); 245 + if (rc) 246 + dev_info(dev, "fail to do hisi pcie port reset\n"); 247 + } 248 + 249 + static int hisi_pcie_notify_error(struct notifier_block *nb, 250 + unsigned long event, void *data) 251 + { 252 + struct acpi_hest_generic_data *gdata = data; 253 + const struct hisi_pcie_error_data *error_data = acpi_hest_get_payload(gdata); 254 + struct hisi_pcie_error_private *priv; 255 + struct device *dev; 256 + struct platform_device *pdev; 257 + guid_t err_sec_guid; 258 + u8 socket; 259 + 260 + import_guid(&err_sec_guid, gdata->section_type); 261 + if (!guid_equal(&err_sec_guid, &hisi_pcie_sec_guid)) 262 + return NOTIFY_DONE; 263 + 264 + priv = container_of(nb, struct hisi_pcie_error_private, nb); 265 + dev = priv->dev; 266 + 267 + if (device_property_read_u8(dev, "socket", &socket)) 268 + return NOTIFY_DONE; 269 + 270 + if (error_data->socket_id != socket) 271 + return NOTIFY_DONE; 272 + 273 + pdev = container_of(dev, struct platform_device, dev); 274 + hisi_pcie_handle_error(pdev, error_data); 275 + 276 + return NOTIFY_OK; 277 + } 278 + 279 + static int hisi_pcie_error_handler_probe(struct platform_device *pdev) 280 + { 281 + struct hisi_pcie_error_private *priv; 282 + int ret; 283 + 284 + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); 285 + if (!priv) 286 + return -ENOMEM; 287 + 288 + priv->nb.notifier_call = hisi_pcie_notify_error; 289 + priv->dev = &pdev->dev; 290 + ret = ghes_register_vendor_record_notifier(&priv->nb); 291 + if (ret) { 292 + dev_err(&pdev->dev, 293 + "Failed to register hisi pcie controller error handler with apei\n"); 294 + return ret; 295 + } 296 + 297 + platform_set_drvdata(pdev, priv); 298 + 299 + return 0; 300 + } 301 + 302 + static int hisi_pcie_error_handler_remove(struct platform_device *pdev) 303 + { 304 + struct hisi_pcie_error_private *priv = platform_get_drvdata(pdev); 305 + 306 + ghes_unregister_vendor_record_notifier(&priv->nb); 307 + 308 + return 0; 309 + } 310 + 311 + static const struct acpi_device_id hisi_pcie_acpi_match[] = { 312 + { "HISI0361", 0 }, 313 + { } 314 + }; 315 + 316 + static struct platform_driver hisi_pcie_error_handler_driver = { 317 + .driver = { 318 + .name = "hisi-pcie-error-handler", 319 + .acpi_match_table = hisi_pcie_acpi_match, 320 + }, 321 + .probe = hisi_pcie_error_handler_probe, 322 + .remove = hisi_pcie_error_handler_remove, 323 + }; 324 + module_platform_driver(hisi_pcie_error_handler_driver); 325 + 326 + MODULE_DESCRIPTION("HiSilicon HIP PCIe controller error handling driver"); 327 + MODULE_LICENSE("GPL v2");
+18
include/acpi/ghes.h
··· 53 53 GHES_SEV_PANIC = 0x3, 54 54 }; 55 55 56 + #ifdef CONFIG_ACPI_APEI_GHES 57 + /** 58 + * ghes_register_vendor_record_notifier - register a notifier for vendor 59 + * records that the kernel would otherwise ignore. 60 + * @nb: pointer to the notifier_block structure of the event handler. 61 + * 62 + * return 0 : SUCCESS, non-zero : FAIL 63 + */ 64 + int ghes_register_vendor_record_notifier(struct notifier_block *nb); 65 + 66 + /** 67 + * ghes_unregister_vendor_record_notifier - unregister the previously 68 + * registered vendor record notifier. 69 + * @nb: pointer to the notifier_block structure of the vendor record handler. 70 + */ 71 + void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); 72 + #endif 73 + 56 74 int ghes_estatus_pool_init(int num_ghes); 57 75 58 76 /* From drivers/edac/ghes_edac.c */