Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI-Express AER implemetation: AER core and aerdriver

Patch 3 implements the core part of PCI-Express AER and aerdrv
port service driver.

When a root port service device is probed, the aerdrv will call
request_irq to register irq handler for AER error interrupt.

When a device sends an PCI-Express error message to the root port,
the root port will trigger an interrupt, by either MSI or IO-APIC,
then kernel would run the irq handler. The handler collects root
error status register and schedules a work. The work will call
the core part to process the error based on its type
(Correctable/non-fatal/fatal).

As for Correctable errors, the patch chooses to just clear the correctable
error status register of the device.

As for the non-fatal error, the patch follows generic PCI error handler
rules to call the error callback functions of the endpoint's driver. If
the device is a bridge, the patch chooses to broadcast the error to
downstream devices.

As for the fatal error, the patch resets the pci-express link and
follows generic PCI error handler rules to call the error callback
functions of the endpoint's driver. If the device is a bridge, the patch
chooses to broadcast the error to downstream devices.

Signed-off-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

authored by

Zhang, Yanmin and committed by
Greg Kroah-Hartman
6c2b374d 48408157

+1598
+1
drivers/pci/pcie/Kconfig
··· 34 34 35 35 When in doubt, say N. 36 36 37 + source "drivers/pci/pcie/aer/Kconfig"
+3
drivers/pci/pcie/Makefile
··· 5 5 pcieportdrv-y := portdrv_core.o portdrv_pci.o portdrv_bus.o 6 6 7 7 obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o 8 + 9 + # Build PCI Express AER if needed 10 + obj-$(CONFIG_PCIEAER) += aer/
+12
drivers/pci/pcie/aer/Kconfig
··· 1 + # 2 + # PCI Express Root Port Device AER Configuration 3 + # 4 + 5 + config PCIEAER 6 + boolean "Root Port Advanced Error Reporting support" 7 + depends on PCIEPORTBUS && ACPI 8 + default y 9 + help 10 + This enables PCI Express Root Port Advanced Error Reporting 11 + (AER) driver support. Error reporting messages sent to Root 12 + Port will be handled by PCI Express AER driver.
+8
drivers/pci/pcie/aer/Makefile
··· 1 + # 2 + # Makefile for PCI-Express Root Port Advanced Error Reporting Driver 3 + # 4 + 5 + obj-$(CONFIG_PCIEAER) += aerdriver.o 6 + 7 + aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o aerdrv_acpi.o 8 +
+346
drivers/pci/pcie/aer/aerdrv.c
··· 1 + /* 2 + * drivers/pci/pcie/aer/aerdrv.c 3 + * 4 + * This file is subject to the terms and conditions of the GNU General Public 5 + * License. See the file "COPYING" in the main directory of this archive 6 + * for more details. 7 + * 8 + * This file implements the AER root port service driver. The driver will 9 + * register an irq handler. When root port triggers an AER interrupt, the irq 10 + * handler will collect root port status and schedule a work. 11 + * 12 + * Copyright (C) 2006 Intel Corp. 13 + * Tom Long Nguyen (tom.l.nguyen@intel.com) 14 + * Zhang Yanmin (yanmin.zhang@intel.com) 15 + * 16 + */ 17 + 18 + #include <linux/module.h> 19 + #include <linux/pci.h> 20 + #include <linux/kernel.h> 21 + #include <linux/errno.h> 22 + #include <linux/pm.h> 23 + #include <linux/init.h> 24 + #include <linux/interrupt.h> 25 + #include <linux/delay.h> 26 + #include <linux/pcieport_if.h> 27 + 28 + #include "aerdrv.h" 29 + 30 + /* 31 + * Version Information 32 + */ 33 + #define DRIVER_VERSION "v1.0" 34 + #define DRIVER_AUTHOR "tom.l.nguyen@intel.com" 35 + #define DRIVER_DESC "Root Port Advanced Error Reporting Driver" 36 + MODULE_AUTHOR(DRIVER_AUTHOR); 37 + MODULE_DESCRIPTION(DRIVER_DESC); 38 + MODULE_LICENSE("GPL"); 39 + 40 + static int __devinit aer_probe (struct pcie_device *dev, 41 + const struct pcie_port_service_id *id ); 42 + static void aer_remove(struct pcie_device *dev); 43 + static int aer_suspend(struct pcie_device *dev, pm_message_t state) 44 + {return 0;} 45 + static int aer_resume(struct pcie_device *dev) {return 0;} 46 + static pci_ers_result_t aer_error_detected(struct pci_dev *dev, 47 + enum pci_channel_state error); 48 + static void aer_error_resume(struct pci_dev *dev); 49 + static pci_ers_result_t aer_root_reset(struct pci_dev *dev); 50 + 51 + /* 52 + * PCI Express bus's AER Root service driver data structure 53 + */ 54 + static struct pcie_port_service_id aer_id[] = { 55 + { 56 + .vendor = PCI_ANY_ID, 57 + .device = PCI_ANY_ID, 58 + .port_type = PCIE_RC_PORT, 59 + .service_type = PCIE_PORT_SERVICE_AER, 60 + }, 61 + { /* end: all zeroes */ } 62 + }; 63 + 64 + static struct pci_error_handlers aer_error_handlers = { 65 + .error_detected = aer_error_detected, 66 + .resume = aer_error_resume, 67 + }; 68 + 69 + static struct pcie_port_service_driver aerdrv = { 70 + .name = "aer", 71 + .id_table = &aer_id[0], 72 + 73 + .probe = aer_probe, 74 + .remove = aer_remove, 75 + 76 + .suspend = aer_suspend, 77 + .resume = aer_resume, 78 + 79 + .err_handler = &aer_error_handlers, 80 + 81 + .reset_link = aer_root_reset, 82 + }; 83 + 84 + /** 85 + * aer_irq - Root Port's ISR 86 + * @irq: IRQ assigned to Root Port 87 + * @context: pointer to Root Port data structure 88 + * @r: pointer struct pt_regs 89 + * 90 + * Invoked when Root Port detects AER messages. 91 + **/ 92 + static irqreturn_t aer_irq(int irq, void *context, struct pt_regs * r) 93 + { 94 + unsigned int status, id; 95 + struct pcie_device *pdev = (struct pcie_device *)context; 96 + struct aer_rpc *rpc = get_service_data(pdev); 97 + int next_prod_idx; 98 + unsigned long flags; 99 + int pos; 100 + 101 + pos = pci_find_aer_capability(pdev->port); 102 + /* 103 + * Must lock access to Root Error Status Reg, Root Error ID Reg, 104 + * and Root error producer/consumer index 105 + */ 106 + spin_lock_irqsave(&rpc->e_lock, flags); 107 + 108 + /* Read error status */ 109 + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status); 110 + if (!(status & ROOT_ERR_STATUS_MASKS)) { 111 + spin_unlock_irqrestore(&rpc->e_lock, flags); 112 + return IRQ_NONE; 113 + } 114 + 115 + /* Read error source and clear error status */ 116 + pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_COR_SRC, &id); 117 + pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status); 118 + 119 + /* Store error source for later DPC handler */ 120 + next_prod_idx = rpc->prod_idx + 1; 121 + if (next_prod_idx == AER_ERROR_SOURCES_MAX) 122 + next_prod_idx = 0; 123 + if (next_prod_idx == rpc->cons_idx) { 124 + /* 125 + * Error Storm Condition - possibly the same error occurred. 126 + * Drop the error. 127 + */ 128 + spin_unlock_irqrestore(&rpc->e_lock, flags); 129 + return IRQ_HANDLED; 130 + } 131 + rpc->e_sources[rpc->prod_idx].status = status; 132 + rpc->e_sources[rpc->prod_idx].id = id; 133 + rpc->prod_idx = next_prod_idx; 134 + spin_unlock_irqrestore(&rpc->e_lock, flags); 135 + 136 + /* Invoke DPC handler */ 137 + schedule_work(&rpc->dpc_handler); 138 + 139 + return IRQ_HANDLED; 140 + } 141 + 142 + /** 143 + * aer_alloc_rpc - allocate Root Port data structure 144 + * @dev: pointer to the pcie_dev data structure 145 + * 146 + * Invoked when Root Port's AER service is loaded. 147 + **/ 148 + static struct aer_rpc* aer_alloc_rpc(struct pcie_device *dev) 149 + { 150 + struct aer_rpc *rpc; 151 + 152 + if (!(rpc = (struct aer_rpc *)kmalloc(sizeof(struct aer_rpc), 153 + GFP_KERNEL))) 154 + return NULL; 155 + 156 + memset(rpc, 0, sizeof(struct aer_rpc)); 157 + /* 158 + * Initialize Root lock access, e_lock, to Root Error Status Reg, 159 + * Root Error ID Reg, and Root error producer/consumer index. 160 + */ 161 + rpc->e_lock = SPIN_LOCK_UNLOCKED; 162 + 163 + rpc->rpd = dev; 164 + INIT_WORK(&rpc->dpc_handler, aer_isr, (void *)dev); 165 + rpc->prod_idx = rpc->cons_idx = 0; 166 + mutex_init(&rpc->rpc_mutex); 167 + init_waitqueue_head(&rpc->wait_release); 168 + 169 + /* Use PCIE bus function to store rpc into PCIE device */ 170 + set_service_data(dev, rpc); 171 + 172 + return rpc; 173 + } 174 + 175 + /** 176 + * aer_remove - clean up resources 177 + * @dev: pointer to the pcie_dev data structure 178 + * 179 + * Invoked when PCI Express bus unloads or AER probe fails. 180 + **/ 181 + static void aer_remove(struct pcie_device *dev) 182 + { 183 + struct aer_rpc *rpc = get_service_data(dev); 184 + 185 + if (rpc) { 186 + /* If register interrupt service, it must be free. */ 187 + if (rpc->isr) 188 + free_irq(dev->irq, dev); 189 + 190 + wait_event(rpc->wait_release, rpc->prod_idx == rpc->cons_idx); 191 + 192 + aer_delete_rootport(rpc); 193 + set_service_data(dev, NULL); 194 + } 195 + } 196 + 197 + /** 198 + * aer_probe - initialize resources 199 + * @dev: pointer to the pcie_dev data structure 200 + * @id: pointer to the service id data structure 201 + * 202 + * Invoked when PCI Express bus loads AER service driver. 203 + **/ 204 + static int __devinit aer_probe (struct pcie_device *dev, 205 + const struct pcie_port_service_id *id ) 206 + { 207 + int status; 208 + struct aer_rpc *rpc; 209 + struct device *device = &dev->device; 210 + 211 + /* Init */ 212 + if ((status = aer_init(dev))) 213 + return status; 214 + 215 + /* Alloc rpc data structure */ 216 + if (!(rpc = aer_alloc_rpc(dev))) { 217 + printk(KERN_DEBUG "%s: Alloc rpc fails on PCIE device[%s]\n", 218 + __FUNCTION__, device->bus_id); 219 + aer_remove(dev); 220 + return -ENOMEM; 221 + } 222 + 223 + /* Request IRQ ISR */ 224 + if ((status = request_irq(dev->irq, aer_irq, SA_SHIRQ, "aerdrv", 225 + dev))) { 226 + printk(KERN_DEBUG "%s: Request ISR fails on PCIE device[%s]\n", 227 + __FUNCTION__, device->bus_id); 228 + aer_remove(dev); 229 + return status; 230 + } 231 + 232 + rpc->isr = 1; 233 + 234 + aer_enable_rootport(rpc); 235 + 236 + return status; 237 + } 238 + 239 + /** 240 + * aer_root_reset - reset link on Root Port 241 + * @dev: pointer to Root Port's pci_dev data structure 242 + * 243 + * Invoked by Port Bus driver when performing link reset at Root Port. 244 + **/ 245 + static pci_ers_result_t aer_root_reset(struct pci_dev *dev) 246 + { 247 + u16 p2p_ctrl; 248 + u32 status; 249 + int pos; 250 + 251 + pos = pci_find_aer_capability(dev); 252 + 253 + /* Disable Root's interrupt in response to error messages */ 254 + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, 0); 255 + 256 + /* Assert Secondary Bus Reset */ 257 + pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &p2p_ctrl); 258 + p2p_ctrl |= PCI_CB_BRIDGE_CTL_CB_RESET; 259 + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); 260 + 261 + /* De-assert Secondary Bus Reset */ 262 + p2p_ctrl &= ~PCI_CB_BRIDGE_CTL_CB_RESET; 263 + pci_write_config_word(dev, PCI_BRIDGE_CONTROL, p2p_ctrl); 264 + 265 + /* 266 + * System software must wait for at least 100ms from the end 267 + * of a reset of one or more device before it is permitted 268 + * to issue Configuration Requests to those devices. 269 + */ 270 + msleep(200); 271 + printk(KERN_DEBUG "Complete link reset at Root[%s]\n", dev->dev.bus_id); 272 + 273 + /* Enable Root Port's interrupt in response to error messages */ 274 + pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status); 275 + pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, status); 276 + pci_write_config_dword(dev, 277 + pos + PCI_ERR_ROOT_COMMAND, 278 + ROOT_PORT_INTR_ON_MESG_MASK); 279 + 280 + return PCI_ERS_RESULT_RECOVERED; 281 + } 282 + 283 + /** 284 + * aer_error_detected - update severity status 285 + * @dev: pointer to Root Port's pci_dev data structure 286 + * @error: error severity being notified by port bus 287 + * 288 + * Invoked by Port Bus driver during error recovery. 289 + **/ 290 + static pci_ers_result_t aer_error_detected(struct pci_dev *dev, 291 + enum pci_channel_state error) 292 + { 293 + /* Root Port has no impact. Always recovers. */ 294 + return PCI_ERS_RESULT_CAN_RECOVER; 295 + } 296 + 297 + /** 298 + * aer_error_resume - clean up corresponding error status bits 299 + * @dev: pointer to Root Port's pci_dev data structure 300 + * 301 + * Invoked by Port Bus driver during nonfatal recovery. 302 + **/ 303 + static void aer_error_resume(struct pci_dev *dev) 304 + { 305 + int pos; 306 + u32 status, mask; 307 + u16 reg16; 308 + 309 + /* Clean up Root device status */ 310 + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); 311 + pci_read_config_word(dev, pos + PCI_EXP_DEVSTA, &reg16); 312 + pci_write_config_word(dev, pos + PCI_EXP_DEVSTA, reg16); 313 + 314 + /* Clean AER Root Error Status */ 315 + pos = pci_find_aer_capability(dev); 316 + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 317 + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); 318 + if (dev->error_state == pci_channel_io_normal) 319 + status &= ~mask; /* Clear corresponding nonfatal bits */ 320 + else 321 + status &= mask; /* Clear corresponding fatal bits */ 322 + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); 323 + } 324 + 325 + /** 326 + * aer_service_init - register AER root service driver 327 + * 328 + * Invoked when AER root service driver is loaded. 329 + **/ 330 + static int __init aer_service_init(void) 331 + { 332 + return pcie_port_service_register(&aerdrv); 333 + } 334 + 335 + /** 336 + * aer_service_exit - unregister AER root service driver 337 + * 338 + * Invoked when AER root service driver is unloaded. 339 + **/ 340 + static void __exit aer_service_exit(void) 341 + { 342 + pcie_port_service_unregister(&aerdrv); 343 + } 344 + 345 + module_init(aer_service_init); 346 + module_exit(aer_service_exit);
+125
drivers/pci/pcie/aer/aerdrv.h
··· 1 + /* 2 + * Copyright (C) 2006 Intel Corp. 3 + * Tom Long Nguyen (tom.l.nguyen@intel.com) 4 + * Zhang Yanmin (yanmin.zhang@intel.com) 5 + * 6 + */ 7 + 8 + #ifndef _AERDRV_H_ 9 + #define _AERDRV_H_ 10 + 11 + #include <linux/pcieport_if.h> 12 + #include <linux/aer.h> 13 + 14 + #define AER_NONFATAL 0 15 + #define AER_FATAL 1 16 + #define AER_CORRECTABLE 2 17 + #define AER_UNCORRECTABLE 4 18 + #define AER_ERROR_MASK 0x001fffff 19 + #define AER_ERROR(d) (d & AER_ERROR_MASK) 20 + 21 + #define OSC_METHOD_RUN_SUCCESS 0 22 + #define OSC_METHOD_NOT_SUPPORTED 1 23 + #define OSC_METHOD_RUN_FAILURE 2 24 + 25 + /* Root Error Status Register Bits */ 26 + #define ROOT_ERR_STATUS_MASKS 0x0f 27 + 28 + #define SYSTEM_ERROR_INTR_ON_MESG_MASK (PCI_EXP_RTCTL_SECEE| \ 29 + PCI_EXP_RTCTL_SENFEE| \ 30 + PCI_EXP_RTCTL_SEFEE) 31 + #define ROOT_PORT_INTR_ON_MESG_MASK (PCI_ERR_ROOT_CMD_COR_EN| \ 32 + PCI_ERR_ROOT_CMD_NONFATAL_EN| \ 33 + PCI_ERR_ROOT_CMD_FATAL_EN) 34 + #define ERR_COR_ID(d) (d & 0xffff) 35 + #define ERR_UNCOR_ID(d) (d >> 16) 36 + 37 + #define AER_SUCCESS 0 38 + #define AER_UNSUCCESS 1 39 + #define AER_ERROR_SOURCES_MAX 100 40 + 41 + #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ 42 + PCI_ERR_UNC_ECRC| \ 43 + PCI_ERR_UNC_UNSUP| \ 44 + PCI_ERR_UNC_COMP_ABORT| \ 45 + PCI_ERR_UNC_UNX_COMP| \ 46 + PCI_ERR_UNC_MALF_TLP) 47 + 48 + /* AER Error Info Flags */ 49 + #define AER_TLP_HEADER_VALID_FLAG 0x00000001 50 + #define AER_MULTI_ERROR_VALID_FLAG 0x00000002 51 + 52 + #define ERR_CORRECTABLE_ERROR_MASK 0x000031c1 53 + #define ERR_UNCORRECTABLE_ERROR_MASK 0x001ff010 54 + 55 + struct header_log_regs { 56 + unsigned int dw0; 57 + unsigned int dw1; 58 + unsigned int dw2; 59 + unsigned int dw3; 60 + }; 61 + 62 + struct aer_err_info { 63 + int severity; /* 0:NONFATAL | 1:FATAL | 2:COR */ 64 + int flags; 65 + unsigned int status; /* COR/UNCOR Error Status */ 66 + struct header_log_regs tlp; /* TLP Header */ 67 + }; 68 + 69 + struct aer_err_source { 70 + unsigned int status; 71 + unsigned int id; 72 + }; 73 + 74 + struct aer_rpc { 75 + struct pcie_device *rpd; /* Root Port device */ 76 + struct work_struct dpc_handler; 77 + struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; 78 + unsigned short prod_idx; /* Error Producer Index */ 79 + unsigned short cons_idx; /* Error Consumer Index */ 80 + int isr; 81 + spinlock_t e_lock; /* 82 + * Lock access to Error Status/ID Regs 83 + * and error producer/consumer index 84 + */ 85 + struct mutex rpc_mutex; /* 86 + * only one thread could do 87 + * recovery on the same 88 + * root port hierachy 89 + */ 90 + wait_queue_head_t wait_release; 91 + }; 92 + 93 + struct aer_broadcast_data { 94 + enum pci_channel_state state; 95 + enum pci_ers_result result; 96 + }; 97 + 98 + static inline pci_ers_result_t merge_result(enum pci_ers_result orig, 99 + enum pci_ers_result new) 100 + { 101 + switch (orig) { 102 + case PCI_ERS_RESULT_CAN_RECOVER: 103 + case PCI_ERS_RESULT_RECOVERED: 104 + orig = new; 105 + break; 106 + case PCI_ERS_RESULT_DISCONNECT: 107 + if (new == PCI_ERS_RESULT_NEED_RESET) 108 + orig = new; 109 + break; 110 + default: 111 + break; 112 + } 113 + 114 + return orig; 115 + } 116 + 117 + extern struct bus_type pcie_port_bus_type; 118 + extern void aer_enable_rootport(struct aer_rpc *rpc); 119 + extern void aer_delete_rootport(struct aer_rpc *rpc); 120 + extern int aer_init(struct pcie_device *dev); 121 + extern void aer_isr(void *context); 122 + extern void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); 123 + extern int aer_osc_setup(struct pci_dev *dev); 124 + 125 + #endif //_AERDRV_H_
+68
drivers/pci/pcie/aer/aerdrv_acpi.c
··· 1 + /* 2 + * Access ACPI _OSC method 3 + * 4 + * Copyright (C) 2006 Intel Corp. 5 + * Tom Long Nguyen (tom.l.nguyen@intel.com) 6 + * Zhang Yanmin (yanmin.zhang@intel.com) 7 + * 8 + */ 9 + 10 + #include <linux/module.h> 11 + #include <linux/pci.h> 12 + #include <linux/kernel.h> 13 + #include <linux/errno.h> 14 + #include <linux/pm.h> 15 + #include <linux/suspend.h> 16 + #include <linux/acpi.h> 17 + #include <linux/pci-acpi.h> 18 + #include <linux/delay.h> 19 + #include "aerdrv.h" 20 + 21 + /** 22 + * aer_osc_setup - run ACPI _OSC method 23 + * 24 + * Return: 25 + * Zero if success. Nonzero for otherwise. 26 + * 27 + * Invoked when PCIE bus loads AER service driver. To avoid conflict with 28 + * BIOS AER support requires BIOS to yield AER control to OS native driver. 29 + **/ 30 + int aer_osc_setup(struct pci_dev *dev) 31 + { 32 + int retval = OSC_METHOD_RUN_SUCCESS; 33 + acpi_status status; 34 + acpi_handle handle = DEVICE_ACPI_HANDLE(&dev->dev); 35 + struct pci_dev *pdev = dev; 36 + struct pci_bus *parent; 37 + 38 + while (!handle) { 39 + if (!pdev || !pdev->bus->parent) 40 + break; 41 + parent = pdev->bus->parent; 42 + if (!parent->self) 43 + /* Parent must be a host bridge */ 44 + handle = acpi_get_pci_rootbridge_handle( 45 + pci_domain_nr(parent), 46 + parent->number); 47 + else 48 + handle = DEVICE_ACPI_HANDLE( 49 + &(parent->self->dev)); 50 + pdev = parent->self; 51 + } 52 + 53 + if (!handle) 54 + return OSC_METHOD_NOT_SUPPORTED; 55 + 56 + pci_osc_support_set(OSC_EXT_PCI_CONFIG_SUPPORT); 57 + status = pci_osc_control_set(handle, OSC_PCI_EXPRESS_AER_CONTROL | 58 + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); 59 + if (ACPI_FAILURE(status)) { 60 + if (status == AE_SUPPORT) 61 + retval = OSC_METHOD_NOT_SUPPORTED; 62 + else 63 + retval = OSC_METHOD_RUN_FAILURE; 64 + } 65 + 66 + return retval; 67 + } 68 +
+757
drivers/pci/pcie/aer/aerdrv_core.c
··· 1 + /* 2 + * drivers/pci/pcie/aer/aerdrv_core.c 3 + * 4 + * This file is subject to the terms and conditions of the GNU General Public 5 + * License. See the file "COPYING" in the main directory of this archive 6 + * for more details. 7 + * 8 + * This file implements the core part of PCI-Express AER. When an pci-express 9 + * error is delivered, an error message will be collected and printed to 10 + * console, then, an error recovery procedure will be executed by following 11 + * the pci error recovery rules. 12 + * 13 + * Copyright (C) 2006 Intel Corp. 14 + * Tom Long Nguyen (tom.l.nguyen@intel.com) 15 + * Zhang Yanmin (yanmin.zhang@intel.com) 16 + * 17 + */ 18 + 19 + #include <linux/module.h> 20 + #include <linux/pci.h> 21 + #include <linux/kernel.h> 22 + #include <linux/errno.h> 23 + #include <linux/pm.h> 24 + #include <linux/suspend.h> 25 + #include <linux/acpi.h> 26 + #include <linux/pci-acpi.h> 27 + #include <linux/delay.h> 28 + #include "aerdrv.h" 29 + 30 + static int forceload; 31 + module_param(forceload, bool, 0); 32 + 33 + #define PCI_CFG_SPACE_SIZE (0x100) 34 + int pci_find_aer_capability(struct pci_dev *dev) 35 + { 36 + int pos; 37 + u32 reg32 = 0; 38 + 39 + /* Check if it's a pci-express device */ 40 + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); 41 + if (!pos) 42 + return 0; 43 + 44 + /* Check if it supports pci-express AER */ 45 + pos = PCI_CFG_SPACE_SIZE; 46 + while (pos) { 47 + if (pci_read_config_dword(dev, pos, &reg32)) 48 + return 0; 49 + 50 + /* some broken boards return ~0 */ 51 + if (reg32 == 0xffffffff) 52 + return 0; 53 + 54 + if (PCI_EXT_CAP_ID(reg32) == PCI_EXT_CAP_ID_ERR) 55 + break; 56 + 57 + pos = reg32 >> 20; 58 + } 59 + 60 + return pos; 61 + } 62 + 63 + int pci_enable_pcie_error_reporting(struct pci_dev *dev) 64 + { 65 + u16 reg16 = 0; 66 + int pos; 67 + 68 + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); 69 + if (!pos) 70 + return -EIO; 71 + 72 + pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16); 73 + reg16 = reg16 | 74 + PCI_EXP_DEVCTL_CERE | 75 + PCI_EXP_DEVCTL_NFERE | 76 + PCI_EXP_DEVCTL_FERE | 77 + PCI_EXP_DEVCTL_URRE; 78 + pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, 79 + reg16); 80 + return 0; 81 + } 82 + 83 + int pci_disable_pcie_error_reporting(struct pci_dev *dev) 84 + { 85 + u16 reg16 = 0; 86 + int pos; 87 + 88 + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); 89 + if (!pos) 90 + return -EIO; 91 + 92 + pci_read_config_word(dev, pos+PCI_EXP_DEVCTL, &reg16); 93 + reg16 = reg16 & ~(PCI_EXP_DEVCTL_CERE | 94 + PCI_EXP_DEVCTL_NFERE | 95 + PCI_EXP_DEVCTL_FERE | 96 + PCI_EXP_DEVCTL_URRE); 97 + pci_write_config_word(dev, pos+PCI_EXP_DEVCTL, 98 + reg16); 99 + return 0; 100 + } 101 + 102 + int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) 103 + { 104 + int pos; 105 + u32 status, mask; 106 + 107 + pos = pci_find_aer_capability(dev); 108 + if (!pos) 109 + return -EIO; 110 + 111 + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 112 + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); 113 + if (dev->error_state == pci_channel_io_normal) 114 + status &= ~mask; /* Clear corresponding nonfatal bits */ 115 + else 116 + status &= mask; /* Clear corresponding fatal bits */ 117 + pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status); 118 + 119 + return 0; 120 + } 121 + 122 + static int find_device_iter(struct device *device, void *data) 123 + { 124 + struct pci_dev *dev; 125 + u16 id = *(unsigned long *)data; 126 + u8 secondary, subordinate, d_bus = id >> 8; 127 + 128 + if (device->bus == &pci_bus_type) { 129 + dev = to_pci_dev(device); 130 + if (id == ((dev->bus->number << 8) | dev->devfn)) { 131 + /* 132 + * Device ID match 133 + */ 134 + *(unsigned long*)data = (unsigned long)device; 135 + return 1; 136 + } 137 + 138 + /* 139 + * If device is P2P, check if it is an upstream? 140 + */ 141 + if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) { 142 + pci_read_config_byte(dev, PCI_SECONDARY_BUS, 143 + &secondary); 144 + pci_read_config_byte(dev, PCI_SUBORDINATE_BUS, 145 + &subordinate); 146 + if (d_bus >= secondary && d_bus <= subordinate) { 147 + *(unsigned long*)data = (unsigned long)device; 148 + return 1; 149 + } 150 + } 151 + } 152 + 153 + return 0; 154 + } 155 + 156 + /** 157 + * find_source_device - search through device hierarchy for source device 158 + * @p_dev: pointer to Root Port pci_dev data structure 159 + * @id: device ID of agent who sends an error message to this Root Port 160 + * 161 + * Invoked when error is detected at the Root Port. 162 + **/ 163 + static struct device* find_source_device(struct pci_dev *parent, u16 id) 164 + { 165 + struct pci_dev *dev = parent; 166 + struct device *device; 167 + unsigned long device_addr; 168 + int status; 169 + 170 + /* Is Root Port an agent that sends error message? */ 171 + if (id == ((dev->bus->number << 8) | dev->devfn)) 172 + return &dev->dev; 173 + 174 + do { 175 + device_addr = id; 176 + if ((status = device_for_each_child(&dev->dev, 177 + &device_addr, find_device_iter))) { 178 + device = (struct device*)device_addr; 179 + dev = to_pci_dev(device); 180 + if (id == ((dev->bus->number << 8) | dev->devfn)) 181 + return device; 182 + } 183 + }while (status); 184 + 185 + return NULL; 186 + } 187 + 188 + static void report_error_detected(struct pci_dev *dev, void *data) 189 + { 190 + pci_ers_result_t vote; 191 + struct pci_error_handlers *err_handler; 192 + struct aer_broadcast_data *result_data; 193 + result_data = (struct aer_broadcast_data *) data; 194 + 195 + dev->error_state = result_data->state; 196 + 197 + if (!dev->driver || 198 + !dev->driver->err_handler || 199 + !dev->driver->err_handler->error_detected) { 200 + if (result_data->state == pci_channel_io_frozen && 201 + !(dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) { 202 + /* 203 + * In case of fatal recovery, if one of down- 204 + * stream device has no driver. We might be 205 + * unable to recover because a later insmod 206 + * of a driver for this device is unaware of 207 + * its hw state. 208 + */ 209 + printk(KERN_DEBUG "Device ID[%s] has %s\n", 210 + dev->dev.bus_id, (dev->driver) ? 211 + "no AER-aware driver" : "no driver"); 212 + } 213 + return; 214 + } 215 + 216 + err_handler = dev->driver->err_handler; 217 + vote = err_handler->error_detected(dev, result_data->state); 218 + result_data->result = merge_result(result_data->result, vote); 219 + return; 220 + } 221 + 222 + static void report_mmio_enabled(struct pci_dev *dev, void *data) 223 + { 224 + pci_ers_result_t vote; 225 + struct pci_error_handlers *err_handler; 226 + struct aer_broadcast_data *result_data; 227 + result_data = (struct aer_broadcast_data *) data; 228 + 229 + if (!dev->driver || 230 + !dev->driver->err_handler || 231 + !dev->driver->err_handler->mmio_enabled) 232 + return; 233 + 234 + err_handler = dev->driver->err_handler; 235 + vote = err_handler->mmio_enabled(dev); 236 + result_data->result = merge_result(result_data->result, vote); 237 + return; 238 + } 239 + 240 + static void report_slot_reset(struct pci_dev *dev, void *data) 241 + { 242 + pci_ers_result_t vote; 243 + struct pci_error_handlers *err_handler; 244 + struct aer_broadcast_data *result_data; 245 + result_data = (struct aer_broadcast_data *) data; 246 + 247 + if (!dev->driver || 248 + !dev->driver->err_handler || 249 + !dev->driver->err_handler->slot_reset) 250 + return; 251 + 252 + err_handler = dev->driver->err_handler; 253 + vote = err_handler->slot_reset(dev); 254 + result_data->result = merge_result(result_data->result, vote); 255 + return; 256 + } 257 + 258 + static void report_resume(struct pci_dev *dev, void *data) 259 + { 260 + struct pci_error_handlers *err_handler; 261 + 262 + dev->error_state = pci_channel_io_normal; 263 + 264 + if (!dev->driver || 265 + !dev->driver->err_handler || 266 + !dev->driver->err_handler->slot_reset) 267 + return; 268 + 269 + err_handler = dev->driver->err_handler; 270 + err_handler->resume(dev); 271 + return; 272 + } 273 + 274 + /** 275 + * broadcast_error_message - handle message broadcast to downstream drivers 276 + * @device: pointer to from where in a hierarchy message is broadcasted down 277 + * @api: callback to be broadcasted 278 + * @state: error state 279 + * 280 + * Invoked during error recovery process. Once being invoked, the content 281 + * of error severity will be broadcasted to all downstream drivers in a 282 + * hierarchy in question. 283 + **/ 284 + static pci_ers_result_t broadcast_error_message(struct pci_dev *dev, 285 + enum pci_channel_state state, 286 + char *error_mesg, 287 + void (*cb)(struct pci_dev *, void *)) 288 + { 289 + struct aer_broadcast_data result_data; 290 + 291 + printk(KERN_DEBUG "Broadcast %s message\n", error_mesg); 292 + result_data.state = state; 293 + if (cb == report_error_detected) 294 + result_data.result = PCI_ERS_RESULT_CAN_RECOVER; 295 + else 296 + result_data.result = PCI_ERS_RESULT_RECOVERED; 297 + 298 + if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) { 299 + /* 300 + * If the error is reported by a bridge, we think this error 301 + * is related to the downstream link of the bridge, so we 302 + * do error recovery on all subordinates of the bridge instead 303 + * of the bridge and clear the error status of the bridge. 304 + */ 305 + if (cb == report_error_detected) 306 + dev->error_state = state; 307 + pci_walk_bus(dev->subordinate, cb, &result_data); 308 + if (cb == report_resume) { 309 + pci_cleanup_aer_uncorrect_error_status(dev); 310 + dev->error_state = pci_channel_io_normal; 311 + } 312 + } 313 + else { 314 + /* 315 + * If the error is reported by an end point, we think this 316 + * error is related to the upstream link of the end point. 317 + */ 318 + pci_walk_bus(dev->bus, cb, &result_data); 319 + } 320 + 321 + return result_data.result; 322 + } 323 + 324 + struct find_aer_service_data { 325 + struct pcie_port_service_driver *aer_driver; 326 + int is_downstream; 327 + }; 328 + 329 + static int find_aer_service_iter(struct device *device, void *data) 330 + { 331 + struct device_driver *driver; 332 + struct pcie_port_service_driver *service_driver; 333 + struct pcie_device *pcie_dev; 334 + struct find_aer_service_data *result; 335 + 336 + result = (struct find_aer_service_data *) data; 337 + 338 + if (device->bus == &pcie_port_bus_type) { 339 + pcie_dev = to_pcie_device(device); 340 + if (pcie_dev->id.port_type == PCIE_SW_DOWNSTREAM_PORT) 341 + result->is_downstream = 1; 342 + 343 + driver = device->driver; 344 + if (driver) { 345 + service_driver = to_service_driver(driver); 346 + if (service_driver->id_table->service_type == 347 + PCIE_PORT_SERVICE_AER) { 348 + result->aer_driver = service_driver; 349 + return 1; 350 + } 351 + } 352 + } 353 + 354 + return 0; 355 + } 356 + 357 + static void find_aer_service(struct pci_dev *dev, 358 + struct find_aer_service_data *data) 359 + { 360 + device_for_each_child(&dev->dev, data, find_aer_service_iter); 361 + } 362 + 363 + static pci_ers_result_t reset_link(struct pcie_device *aerdev, 364 + struct pci_dev *dev) 365 + { 366 + struct pci_dev *udev; 367 + pci_ers_result_t status; 368 + struct find_aer_service_data data; 369 + 370 + if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE) 371 + udev = dev; 372 + else 373 + udev= dev->bus->self; 374 + 375 + data.is_downstream = 0; 376 + data.aer_driver = NULL; 377 + find_aer_service(udev, &data); 378 + 379 + /* 380 + * Use the aer driver of the error agent firstly. 381 + * If it hasn't the aer driver, use the root port's 382 + */ 383 + if (!data.aer_driver || !data.aer_driver->reset_link) { 384 + if (data.is_downstream && 385 + aerdev->device.driver && 386 + to_service_driver(aerdev->device.driver)->reset_link) { 387 + data.aer_driver = 388 + to_service_driver(aerdev->device.driver); 389 + } else { 390 + printk(KERN_DEBUG "No link-reset support to Device ID" 391 + "[%s]\n", 392 + dev->dev.bus_id); 393 + return PCI_ERS_RESULT_DISCONNECT; 394 + } 395 + } 396 + 397 + status = data.aer_driver->reset_link(udev); 398 + if (status != PCI_ERS_RESULT_RECOVERED) { 399 + printk(KERN_DEBUG "Link reset at upstream Device ID" 400 + "[%s] failed\n", 401 + udev->dev.bus_id); 402 + return PCI_ERS_RESULT_DISCONNECT; 403 + } 404 + 405 + return status; 406 + } 407 + 408 + /** 409 + * do_recovery - handle nonfatal/fatal error recovery process 410 + * @aerdev: pointer to a pcie_device data structure of root port 411 + * @dev: pointer to a pci_dev data structure of agent detecting an error 412 + * @severity: error severity type 413 + * 414 + * Invoked when an error is nonfatal/fatal. Once being invoked, broadcast 415 + * error detected message to all downstream drivers within a hierarchy in 416 + * question and return the returned code. 417 + **/ 418 + static pci_ers_result_t do_recovery(struct pcie_device *aerdev, 419 + struct pci_dev *dev, 420 + int severity) 421 + { 422 + pci_ers_result_t status, result = PCI_ERS_RESULT_RECOVERED; 423 + enum pci_channel_state state; 424 + 425 + if (severity == AER_FATAL) 426 + state = pci_channel_io_frozen; 427 + else 428 + state = pci_channel_io_normal; 429 + 430 + status = broadcast_error_message(dev, 431 + state, 432 + "error_detected", 433 + report_error_detected); 434 + 435 + if (severity == AER_FATAL) { 436 + result = reset_link(aerdev, dev); 437 + if (result != PCI_ERS_RESULT_RECOVERED) { 438 + /* TODO: Should panic here? */ 439 + return result; 440 + } 441 + } 442 + 443 + if (status == PCI_ERS_RESULT_CAN_RECOVER) 444 + status = broadcast_error_message(dev, 445 + state, 446 + "mmio_enabled", 447 + report_mmio_enabled); 448 + 449 + if (status == PCI_ERS_RESULT_NEED_RESET) { 450 + /* 451 + * TODO: Should call platform-specific 452 + * functions to reset slot before calling 453 + * drivers' slot_reset callbacks? 454 + */ 455 + status = broadcast_error_message(dev, 456 + state, 457 + "slot_reset", 458 + report_slot_reset); 459 + } 460 + 461 + if (status == PCI_ERS_RESULT_RECOVERED) 462 + broadcast_error_message(dev, 463 + state, 464 + "resume", 465 + report_resume); 466 + 467 + return status; 468 + } 469 + 470 + /** 471 + * handle_error_source - handle logging error into an event log 472 + * @aerdev: pointer to pcie_device data structure of the root port 473 + * @dev: pointer to pci_dev data structure of error source device 474 + * @info: comprehensive error information 475 + * 476 + * Invoked when an error being detected by Root Port. 477 + **/ 478 + static void handle_error_source(struct pcie_device * aerdev, 479 + struct pci_dev *dev, 480 + struct aer_err_info info) 481 + { 482 + pci_ers_result_t status = 0; 483 + int pos; 484 + 485 + if (info.severity == AER_CORRECTABLE) { 486 + /* 487 + * Correctable error does not need software intevention. 488 + * No need to go through error recovery process. 489 + */ 490 + pos = pci_find_aer_capability(dev); 491 + if (pos) 492 + pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, 493 + info.status); 494 + } else { 495 + status = do_recovery(aerdev, dev, info.severity); 496 + if (status == PCI_ERS_RESULT_RECOVERED) { 497 + printk(KERN_DEBUG "AER driver successfully recovered\n"); 498 + } else { 499 + /* TODO: Should kernel panic here? */ 500 + printk(KERN_DEBUG "AER driver didn't recover\n"); 501 + } 502 + } 503 + } 504 + 505 + /** 506 + * aer_enable_rootport - enable Root Port's interrupts when receiving messages 507 + * @rpc: pointer to a Root Port data structure 508 + * 509 + * Invoked when PCIE bus loads AER service driver. 510 + **/ 511 + void aer_enable_rootport(struct aer_rpc *rpc) 512 + { 513 + struct pci_dev *pdev = rpc->rpd->port; 514 + int pos, aer_pos; 515 + u16 reg16; 516 + u32 reg32; 517 + 518 + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); 519 + /* Clear PCIE Capability's Device Status */ 520 + pci_read_config_word(pdev, pos+PCI_EXP_DEVSTA, &reg16); 521 + pci_write_config_word(pdev, pos+PCI_EXP_DEVSTA, reg16); 522 + 523 + /* Disable system error generation in response to error messages */ 524 + pci_read_config_word(pdev, pos + PCI_EXP_RTCTL, &reg16); 525 + reg16 &= ~(SYSTEM_ERROR_INTR_ON_MESG_MASK); 526 + pci_write_config_word(pdev, pos + PCI_EXP_RTCTL, reg16); 527 + 528 + aer_pos = pci_find_aer_capability(pdev); 529 + /* Clear error status */ 530 + pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, &reg32); 531 + pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); 532 + pci_read_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, &reg32); 533 + pci_write_config_dword(pdev, aer_pos + PCI_ERR_COR_STATUS, reg32); 534 + pci_read_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, &reg32); 535 + pci_write_config_dword(pdev, aer_pos + PCI_ERR_UNCOR_STATUS, reg32); 536 + 537 + /* Enable Root Port device reporting error itself */ 538 + pci_read_config_word(pdev, pos+PCI_EXP_DEVCTL, &reg16); 539 + reg16 = reg16 | 540 + PCI_EXP_DEVCTL_CERE | 541 + PCI_EXP_DEVCTL_NFERE | 542 + PCI_EXP_DEVCTL_FERE | 543 + PCI_EXP_DEVCTL_URRE; 544 + pci_write_config_word(pdev, pos+PCI_EXP_DEVCTL, 545 + reg16); 546 + 547 + /* Enable Root Port's interrupt in response to error messages */ 548 + pci_write_config_dword(pdev, 549 + aer_pos + PCI_ERR_ROOT_COMMAND, 550 + ROOT_PORT_INTR_ON_MESG_MASK); 551 + } 552 + 553 + /** 554 + * disable_root_aer - disable Root Port's interrupts when receiving messages 555 + * @rpc: pointer to a Root Port data structure 556 + * 557 + * Invoked when PCIE bus unloads AER service driver. 558 + **/ 559 + static void disable_root_aer(struct aer_rpc *rpc) 560 + { 561 + struct pci_dev *pdev = rpc->rpd->port; 562 + u32 reg32; 563 + int pos; 564 + 565 + pos = pci_find_aer_capability(pdev); 566 + /* Disable Root's interrupt in response to error messages */ 567 + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, 0); 568 + 569 + /* Clear Root's error status reg */ 570 + pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, &reg32); 571 + pci_write_config_dword(pdev, pos + PCI_ERR_ROOT_STATUS, reg32); 572 + } 573 + 574 + /** 575 + * get_e_source - retrieve an error source 576 + * @rpc: pointer to the root port which holds an error 577 + * 578 + * Invoked by DPC handler to consume an error. 579 + **/ 580 + static struct aer_err_source* get_e_source(struct aer_rpc *rpc) 581 + { 582 + struct aer_err_source *e_source; 583 + unsigned long flags; 584 + 585 + /* Lock access to Root error producer/consumer index */ 586 + spin_lock_irqsave(&rpc->e_lock, flags); 587 + if (rpc->prod_idx == rpc->cons_idx) { 588 + spin_unlock_irqrestore(&rpc->e_lock, flags); 589 + return NULL; 590 + } 591 + e_source = &rpc->e_sources[rpc->cons_idx]; 592 + rpc->cons_idx++; 593 + if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) 594 + rpc->cons_idx = 0; 595 + spin_unlock_irqrestore(&rpc->e_lock, flags); 596 + 597 + return e_source; 598 + } 599 + 600 + static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) 601 + { 602 + int pos; 603 + 604 + pos = pci_find_aer_capability(dev); 605 + 606 + /* The device might not support AER */ 607 + if (!pos) 608 + return AER_SUCCESS; 609 + 610 + if (info->severity == AER_CORRECTABLE) { 611 + pci_read_config_dword(dev, pos + PCI_ERR_COR_STATUS, 612 + &info->status); 613 + if (!(info->status & ERR_CORRECTABLE_ERROR_MASK)) 614 + return AER_UNSUCCESS; 615 + } else if (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE || 616 + info->severity == AER_NONFATAL) { 617 + 618 + /* Link is still healthy for IO reads */ 619 + pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, 620 + &info->status); 621 + if (!(info->status & ERR_UNCORRECTABLE_ERROR_MASK)) 622 + return AER_UNSUCCESS; 623 + 624 + if (info->status & AER_LOG_TLP_MASKS) { 625 + info->flags |= AER_TLP_HEADER_VALID_FLAG; 626 + pci_read_config_dword(dev, 627 + pos + PCI_ERR_HEADER_LOG, &info->tlp.dw0); 628 + pci_read_config_dword(dev, 629 + pos + PCI_ERR_HEADER_LOG + 4, &info->tlp.dw1); 630 + pci_read_config_dword(dev, 631 + pos + PCI_ERR_HEADER_LOG + 8, &info->tlp.dw2); 632 + pci_read_config_dword(dev, 633 + pos + PCI_ERR_HEADER_LOG + 12, &info->tlp.dw3); 634 + } 635 + } 636 + 637 + return AER_SUCCESS; 638 + } 639 + 640 + /** 641 + * aer_isr_one_error - consume an error detected by root port 642 + * @p_device: pointer to error root port service device 643 + * @e_src: pointer to an error source 644 + **/ 645 + static void aer_isr_one_error(struct pcie_device *p_device, 646 + struct aer_err_source *e_src) 647 + { 648 + struct device *s_device; 649 + struct aer_err_info e_info = {0, 0, 0,}; 650 + int i; 651 + u16 id; 652 + 653 + /* 654 + * There is a possibility that both correctable error and 655 + * uncorrectable error being logged. Report correctable error first. 656 + */ 657 + for (i = 1; i & ROOT_ERR_STATUS_MASKS ; i <<= 2) { 658 + if (i > 4) 659 + break; 660 + if (!(e_src->status & i)) 661 + continue; 662 + 663 + /* Init comprehensive error information */ 664 + if (i & PCI_ERR_ROOT_COR_RCV) { 665 + id = ERR_COR_ID(e_src->id); 666 + e_info.severity = AER_CORRECTABLE; 667 + } else { 668 + id = ERR_UNCOR_ID(e_src->id); 669 + e_info.severity = ((e_src->status >> 6) & 1); 670 + } 671 + if (e_src->status & 672 + (PCI_ERR_ROOT_MULTI_COR_RCV | 673 + PCI_ERR_ROOT_MULTI_UNCOR_RCV)) 674 + e_info.flags |= AER_MULTI_ERROR_VALID_FLAG; 675 + if (!(s_device = find_source_device(p_device->port, id))) { 676 + printk(KERN_DEBUG "%s->can't find device of ID%04x\n", 677 + __FUNCTION__, id); 678 + continue; 679 + } 680 + if (get_device_error_info(to_pci_dev(s_device), &e_info) == 681 + AER_SUCCESS) { 682 + aer_print_error(to_pci_dev(s_device), &e_info); 683 + handle_error_source(p_device, 684 + to_pci_dev(s_device), 685 + e_info); 686 + } 687 + } 688 + } 689 + 690 + /** 691 + * aer_isr - consume errors detected by root port 692 + * @context: pointer to a private data of pcie device 693 + * 694 + * Invoked, as DPC, when root port records new detected error 695 + **/ 696 + void aer_isr(void *context) 697 + { 698 + struct pcie_device *p_device = (struct pcie_device *) context; 699 + struct aer_rpc *rpc = get_service_data(p_device); 700 + struct aer_err_source *e_src; 701 + 702 + mutex_lock(&rpc->rpc_mutex); 703 + e_src = get_e_source(rpc); 704 + while (e_src) { 705 + aer_isr_one_error(p_device, e_src); 706 + e_src = get_e_source(rpc); 707 + } 708 + mutex_unlock(&rpc->rpc_mutex); 709 + 710 + wake_up(&rpc->wait_release); 711 + } 712 + 713 + /** 714 + * aer_delete_rootport - disable root port aer and delete service data 715 + * @rpc: pointer to a root port device being deleted 716 + * 717 + * Invoked when AER service unloaded on a specific Root Port 718 + **/ 719 + void aer_delete_rootport(struct aer_rpc *rpc) 720 + { 721 + /* Disable root port AER itself */ 722 + disable_root_aer(rpc); 723 + 724 + kfree(rpc); 725 + } 726 + 727 + /** 728 + * aer_init - provide AER initialization 729 + * @dev: pointer to AER pcie device 730 + * 731 + * Invoked when AER service driver is loaded. 732 + **/ 733 + int aer_init(struct pcie_device *dev) 734 + { 735 + int status; 736 + 737 + /* Run _OSC Method */ 738 + status = aer_osc_setup(dev->port); 739 + 740 + if(status != OSC_METHOD_RUN_SUCCESS) { 741 + printk(KERN_DEBUG "%s: AER service init fails - %s\n", 742 + __FUNCTION__, 743 + (status == OSC_METHOD_NOT_SUPPORTED) ? 744 + "No ACPI _OSC support" : "Run ACPI _OSC fails"); 745 + 746 + if (!forceload) 747 + return status; 748 + } 749 + 750 + return AER_SUCCESS; 751 + } 752 + 753 + EXPORT_SYMBOL_GPL(pci_find_aer_capability); 754 + EXPORT_SYMBOL_GPL(pci_enable_pcie_error_reporting); 755 + EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting); 756 + EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status); 757 +
+248
drivers/pci/pcie/aer/aerdrv_errprint.c
··· 1 + /* 2 + * drivers/pci/pcie/aer/aerdrv_errprint.c 3 + * 4 + * This file is subject to the terms and conditions of the GNU General Public 5 + * License. See the file "COPYING" in the main directory of this archive 6 + * for more details. 7 + * 8 + * Format error messages and print them to console. 9 + * 10 + * Copyright (C) 2006 Intel Corp. 11 + * Tom Long Nguyen (tom.l.nguyen@intel.com) 12 + * Zhang Yanmin (yanmin.zhang@intel.com) 13 + * 14 + */ 15 + 16 + #include <linux/module.h> 17 + #include <linux/pci.h> 18 + #include <linux/kernel.h> 19 + #include <linux/errno.h> 20 + #include <linux/pm.h> 21 + #include <linux/suspend.h> 22 + 23 + #include "aerdrv.h" 24 + 25 + #define AER_AGENT_RECEIVER 0 26 + #define AER_AGENT_REQUESTER 1 27 + #define AER_AGENT_COMPLETER 2 28 + #define AER_AGENT_TRANSMITTER 3 29 + 30 + #define AER_AGENT_REQUESTER_MASK (PCI_ERR_UNC_COMP_TIME| \ 31 + PCI_ERR_UNC_UNSUP) 32 + 33 + #define AER_AGENT_COMPLETER_MASK PCI_ERR_UNC_COMP_ABORT 34 + 35 + #define AER_AGENT_TRANSMITTER_MASK(t, e) (e & (PCI_ERR_COR_REP_ROLL| \ 36 + ((t == AER_CORRECTABLE) ? PCI_ERR_COR_REP_TIMER: 0))) 37 + 38 + #define AER_GET_AGENT(t, e) \ 39 + ((e & AER_AGENT_COMPLETER_MASK) ? AER_AGENT_COMPLETER : \ 40 + (e & AER_AGENT_REQUESTER_MASK) ? AER_AGENT_REQUESTER : \ 41 + (AER_AGENT_TRANSMITTER_MASK(t, e)) ? AER_AGENT_TRANSMITTER : \ 42 + AER_AGENT_RECEIVER) 43 + 44 + #define AER_PHYSICAL_LAYER_ERROR_MASK PCI_ERR_COR_RCVR 45 + #define AER_DATA_LINK_LAYER_ERROR_MASK(t, e) \ 46 + (PCI_ERR_UNC_DLP| \ 47 + PCI_ERR_COR_BAD_TLP| \ 48 + PCI_ERR_COR_BAD_DLLP| \ 49 + PCI_ERR_COR_REP_ROLL| \ 50 + ((t == AER_CORRECTABLE) ? \ 51 + PCI_ERR_COR_REP_TIMER: 0)) 52 + 53 + #define AER_PHYSICAL_LAYER_ERROR 0 54 + #define AER_DATA_LINK_LAYER_ERROR 1 55 + #define AER_TRANSACTION_LAYER_ERROR 2 56 + 57 + #define AER_GET_LAYER_ERROR(t, e) \ 58 + ((e & AER_PHYSICAL_LAYER_ERROR_MASK) ? \ 59 + AER_PHYSICAL_LAYER_ERROR : \ 60 + (e & AER_DATA_LINK_LAYER_ERROR_MASK(t, e)) ? \ 61 + AER_DATA_LINK_LAYER_ERROR : \ 62 + AER_TRANSACTION_LAYER_ERROR) 63 + 64 + /* 65 + * AER error strings 66 + */ 67 + static char* aer_error_severity_string[] = { 68 + "Uncorrected (Non-Fatal)", 69 + "Uncorrected (Fatal)", 70 + "Corrected" 71 + }; 72 + 73 + static char* aer_error_layer[] = { 74 + "Physical Layer", 75 + "Data Link Layer", 76 + "Transaction Layer" 77 + }; 78 + static char* aer_correctable_error_string[] = { 79 + "Receiver Error ", /* Bit Position 0 */ 80 + NULL, 81 + NULL, 82 + NULL, 83 + NULL, 84 + NULL, 85 + "Bad TLP ", /* Bit Position 6 */ 86 + "Bad DLLP ", /* Bit Position 7 */ 87 + "RELAY_NUM Rollover ", /* Bit Position 8 */ 88 + NULL, 89 + NULL, 90 + NULL, 91 + "Replay Timer Timeout ", /* Bit Position 12 */ 92 + "Advisory Non-Fatal ", /* Bit Position 13 */ 93 + NULL, 94 + NULL, 95 + NULL, 96 + NULL, 97 + NULL, 98 + NULL, 99 + NULL, 100 + NULL, 101 + NULL, 102 + NULL, 103 + NULL, 104 + NULL, 105 + NULL, 106 + NULL, 107 + NULL, 108 + NULL, 109 + NULL, 110 + NULL, 111 + }; 112 + 113 + static char* aer_uncorrectable_error_string[] = { 114 + NULL, 115 + NULL, 116 + NULL, 117 + NULL, 118 + "Data Link Protocol ", /* Bit Position 4 */ 119 + NULL, 120 + NULL, 121 + NULL, 122 + NULL, 123 + NULL, 124 + NULL, 125 + NULL, 126 + "Poisoned TLP ", /* Bit Position 12 */ 127 + "Flow Control Protocol ", /* Bit Position 13 */ 128 + "Completion Timeout ", /* Bit Position 14 */ 129 + "Completer Abort ", /* Bit Position 15 */ 130 + "Unexpected Completion ", /* Bit Position 16 */ 131 + "Receiver Overflow ", /* Bit Position 17 */ 132 + "Malformed TLP ", /* Bit Position 18 */ 133 + "ECRC ", /* Bit Position 19 */ 134 + "Unsupported Request ", /* Bit Position 20 */ 135 + NULL, 136 + NULL, 137 + NULL, 138 + NULL, 139 + NULL, 140 + NULL, 141 + NULL, 142 + NULL, 143 + NULL, 144 + NULL, 145 + NULL, 146 + }; 147 + 148 + static char* aer_agent_string[] = { 149 + "Receiver ID", 150 + "Requester ID", 151 + "Completer ID", 152 + "Transmitter ID" 153 + }; 154 + 155 + static char * aer_get_error_source_name(int severity, 156 + unsigned int status, 157 + char errmsg_buff[]) 158 + { 159 + int i; 160 + char * errmsg = NULL; 161 + 162 + for (i = 0; i < 32; i++) { 163 + if (!(status & (1 << i))) 164 + continue; 165 + 166 + if (severity == AER_CORRECTABLE) 167 + errmsg = aer_correctable_error_string[i]; 168 + else 169 + errmsg = aer_uncorrectable_error_string[i]; 170 + 171 + if (!errmsg) { 172 + sprintf(errmsg_buff, "Unknown Error Bit %2d ", i); 173 + errmsg = errmsg_buff; 174 + } 175 + 176 + break; 177 + } 178 + 179 + return errmsg; 180 + } 181 + 182 + static DEFINE_SPINLOCK(logbuf_lock); 183 + static char errmsg_buff[100]; 184 + void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) 185 + { 186 + char * errmsg; 187 + int err_layer, agent; 188 + char * loglevel; 189 + 190 + if (info->severity == AER_CORRECTABLE) 191 + loglevel = KERN_WARNING; 192 + else 193 + loglevel = KERN_ERR; 194 + 195 + printk("%s+------ PCI-Express Device Error ------+\n", loglevel); 196 + printk("%sError Severity\t\t: %s\n", loglevel, 197 + aer_error_severity_string[info->severity]); 198 + 199 + if ( info->status == 0) { 200 + printk("%sPCIE Bus Error type\t: (Unaccessible)\n", loglevel); 201 + printk("%sUnaccessible Received\t: %s\n", loglevel, 202 + info->flags & AER_MULTI_ERROR_VALID_FLAG ? 203 + "Multiple" : "First"); 204 + printk("%sUnregistered Agent ID\t: %04x\n", loglevel, 205 + (dev->bus->number << 8) | dev->devfn); 206 + } else { 207 + err_layer = AER_GET_LAYER_ERROR(info->severity, info->status); 208 + printk("%sPCIE Bus Error type\t: %s\n", loglevel, 209 + aer_error_layer[err_layer]); 210 + 211 + spin_lock(&logbuf_lock); 212 + errmsg = aer_get_error_source_name(info->severity, 213 + info->status, 214 + errmsg_buff); 215 + printk("%s%s\t: %s\n", loglevel, errmsg, 216 + info->flags & AER_MULTI_ERROR_VALID_FLAG ? 217 + "Multiple" : "First"); 218 + spin_unlock(&logbuf_lock); 219 + 220 + agent = AER_GET_AGENT(info->severity, info->status); 221 + printk("%s%s\t\t: %04x\n", loglevel, 222 + aer_agent_string[agent], 223 + (dev->bus->number << 8) | dev->devfn); 224 + 225 + printk("%sVendorID=%04xh, DeviceID=%04xh," 226 + " Bus=%02xh, Device=%02xh, Function=%02xh\n", 227 + loglevel, 228 + dev->vendor, 229 + dev->device, 230 + dev->bus->number, 231 + PCI_SLOT(dev->devfn), 232 + PCI_FUNC(dev->devfn)); 233 + 234 + if (info->flags & AER_TLP_HEADER_VALID_FLAG) { 235 + unsigned char *tlp = (unsigned char *) &info->tlp; 236 + printk("%sTLB Header:\n", loglevel); 237 + printk("%s%02x%02x%02x%02x %02x%02x%02x%02x" 238 + " %02x%02x%02x%02x %02x%02x%02x%02x\n", 239 + loglevel, 240 + *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, 241 + *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), 242 + *(tlp + 11), *(tlp + 10), *(tlp + 9), 243 + *(tlp + 8), *(tlp + 15), *(tlp + 14), 244 + *(tlp + 13), *(tlp + 12)); 245 + } 246 + } 247 + } 248 +
+24
include/linux/aer.h
··· 1 + /* 2 + * Copyright (C) 2006 Intel Corp. 3 + * Tom Long Nguyen (tom.l.nguyen@intel.com) 4 + * Zhang Yanmin (yanmin.zhang@intel.com) 5 + */ 6 + 7 + #ifndef _AER_H_ 8 + #define _AER_H_ 9 + 10 + #if defined(CONFIG_PCIEAER) 11 + /* pci-e port driver needs this function to enable aer */ 12 + extern int pci_enable_pcie_error_reporting(struct pci_dev *dev); 13 + extern int pci_find_aer_capability(struct pci_dev *dev); 14 + extern int pci_disable_pcie_error_reporting(struct pci_dev *dev); 15 + extern int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); 16 + #else 17 + #define pci_enable_pcie_error_reporting(dev) do { } while (0) 18 + #define pci_find_aer_capability(dev) do { } while (0) 19 + #define pci_disable_pcie_error_reporting(dev) do { } while (0) 20 + #define pci_cleanup_aer_uncorrect_error_status(dev) do { } while (0) 21 + #endif 22 + 23 + #endif //_AER_H_ 24 +
+6
include/linux/pcieport_if.h
··· 62 62 int (*suspend) (struct pcie_device *dev, pm_message_t state); 63 63 int (*resume) (struct pcie_device *dev); 64 64 65 + /* Service Error Recovery Handler */ 66 + struct pci_error_handlers *err_handler; 67 + 68 + /* Link Reset Capability - AER service driver specific */ 69 + pci_ers_result_t (*reset_link) (struct pci_dev *dev); 70 + 65 71 const struct pcie_port_service_id *id_table; 66 72 struct device_driver driver; 67 73 };