Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

PCI AER: software error injection

Debugging PCIE AER code can be very difficult because it is hard
to trigger various real hardware errors. This patch provide a
software based error injection tool, which can fake various PCIE
errors with a user space helper tool named "aer-inject". Which
can be gotten from:

http://www.kernel.org/pub/linux/kernel/people/yhuang/

The patch fakes AER error by faking some PCIE AER related
registers and an AER interrupt for specified the PCIE device.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>

authored by

Huang Ying and committed by
Jesse Barnes
c465def6 3d5505c5

+495 -1
+1 -1
Documentation/PCI/pcieaer-howto.txt
··· 267 267 268 268 Then, you need a user space tool named aer-inject, which can be gotten 269 269 from: 270 - http://www.kernel.org/pub/linux/kernel/people/yhuang/ 270 + http://www.kernel.org/pub/linux/utils/pci/aer-inject/ 271 271 272 272 More information about aer-inject can be found in the document comes 273 273 with its source code.
+2
drivers/pci/pcie/aer/Kconfig
··· 23 23 (transaction layer end-to-end CRC checking). 24 24 25 25 When in doubt, say N. 26 + 27 + source "drivers/pci/pcie/aer/Kconfig.debug"
+18
drivers/pci/pcie/aer/Kconfig.debug
··· 1 + # 2 + # PCI Express Root Port Device AER Debug Configuration 3 + # 4 + 5 + config PCIEAER_INJECT 6 + tristate "PCIE AER error injector support" 7 + depends on PCIEAER 8 + default n 9 + help 10 + This enables PCI Express Root Port Advanced Error Reporting 11 + (AER) software error injector. 12 + 13 + Debuging PCIE AER code is quite difficult because it is hard 14 + to trigger various real hardware errors. Software based 15 + error injection can fake almost all kinds of errors with the 16 + help of a user space helper tool aer-inject, which can be 17 + gotten from: 18 + http://www.kernel.org/pub/linux/utils/pci/aer-inject/
+1
drivers/pci/pcie/aer/Makefile
··· 9 9 aerdriver-objs := aerdrv_errprint.o aerdrv_core.o aerdrv.o 10 10 aerdriver-$(CONFIG_ACPI) += aerdrv_acpi.o 11 11 12 + obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o
+473
drivers/pci/pcie/aer/aer_inject.c
··· 1 + /* 2 + * PCIE AER software error injection support. 3 + * 4 + * Debuging PCIE AER code is quite difficult because it is hard to 5 + * trigger various real hardware errors. Software based error 6 + * injection can fake almost all kinds of errors with the help of a 7 + * user space helper tool aer-inject, which can be gotten from: 8 + * http://www.kernel.org/pub/linux/utils/pci/aer-inject/ 9 + * 10 + * Copyright 2009 Intel Corporation. 11 + * Huang Ying <ying.huang@intel.com> 12 + * 13 + * This program is free software; you can redistribute it and/or 14 + * modify it under the terms of the GNU General Public License 15 + * as published by the Free Software Foundation; version 2 16 + * of the License. 17 + * 18 + */ 19 + 20 + #include <linux/module.h> 21 + #include <linux/init.h> 22 + #include <linux/miscdevice.h> 23 + #include <linux/pci.h> 24 + #include <linux/fs.h> 25 + #include <asm/uaccess.h> 26 + #include "aerdrv.h" 27 + 28 + struct aer_error_inj 29 + { 30 + u8 bus; 31 + u8 dev; 32 + u8 fn; 33 + u32 uncor_status; 34 + u32 cor_status; 35 + u32 header_log0; 36 + u32 header_log1; 37 + u32 header_log2; 38 + u32 header_log3; 39 + }; 40 + 41 + struct aer_error 42 + { 43 + struct list_head list; 44 + unsigned int bus; 45 + unsigned int devfn; 46 + int pos_cap_err; 47 + 48 + u32 uncor_status; 49 + u32 cor_status; 50 + u32 header_log0; 51 + u32 header_log1; 52 + u32 header_log2; 53 + u32 header_log3; 54 + u32 root_status; 55 + u32 source_id; 56 + }; 57 + 58 + struct pci_bus_ops 59 + { 60 + struct list_head list; 61 + struct pci_bus *bus; 62 + struct pci_ops *ops; 63 + }; 64 + 65 + static LIST_HEAD(einjected); 66 + 67 + static LIST_HEAD(pci_bus_ops_list); 68 + 69 + /* Protect einjected and pci_bus_ops_list */ 70 + static DEFINE_SPINLOCK(inject_lock); 71 + 72 + static void aer_error_init(struct aer_error *err, unsigned int bus, 73 + unsigned int devfn, int pos_cap_err) 74 + { 75 + INIT_LIST_HEAD(&err->list); 76 + err->bus = bus; 77 + err->devfn = devfn; 78 + err->pos_cap_err = pos_cap_err; 79 + } 80 + 81 + /* inject_lock must be held before calling */ 82 + static struct aer_error *__find_aer_error(unsigned int bus, unsigned int devfn) 83 + { 84 + struct aer_error *err; 85 + 86 + list_for_each_entry(err, &einjected, list) { 87 + if (bus == err->bus && devfn == err->devfn) 88 + return err; 89 + } 90 + return NULL; 91 + } 92 + 93 + /* inject_lock must be held before calling */ 94 + static struct aer_error *__find_aer_error_by_dev(struct pci_dev *dev) 95 + { 96 + return __find_aer_error(dev->bus->number, dev->devfn); 97 + } 98 + 99 + /* inject_lock must be held before calling */ 100 + static struct pci_ops *__find_pci_bus_ops(struct pci_bus *bus) 101 + { 102 + struct pci_bus_ops *bus_ops; 103 + 104 + list_for_each_entry(bus_ops, &pci_bus_ops_list, list) { 105 + if (bus_ops->bus == bus) 106 + return bus_ops->ops; 107 + } 108 + return NULL; 109 + } 110 + 111 + static struct pci_bus_ops *pci_bus_ops_pop(void) 112 + { 113 + unsigned long flags; 114 + struct pci_bus_ops *bus_ops = NULL; 115 + 116 + spin_lock_irqsave(&inject_lock, flags); 117 + if (list_empty(&pci_bus_ops_list)) 118 + bus_ops = NULL; 119 + else { 120 + struct list_head *lh = pci_bus_ops_list.next; 121 + list_del(lh); 122 + bus_ops = list_entry(lh, struct pci_bus_ops, list); 123 + } 124 + spin_unlock_irqrestore(&inject_lock, flags); 125 + return bus_ops; 126 + } 127 + 128 + static u32 *find_pci_config_dword(struct aer_error *err, int where, 129 + int *prw1cs) 130 + { 131 + int rw1cs = 0; 132 + u32 *target = NULL; 133 + 134 + if (err->pos_cap_err == -1) 135 + return NULL; 136 + 137 + switch (where - err->pos_cap_err) { 138 + case PCI_ERR_UNCOR_STATUS: 139 + target = &err->uncor_status; 140 + rw1cs = 1; 141 + break; 142 + case PCI_ERR_COR_STATUS: 143 + target = &err->cor_status; 144 + rw1cs = 1; 145 + break; 146 + case PCI_ERR_HEADER_LOG: 147 + target = &err->header_log0; 148 + break; 149 + case PCI_ERR_HEADER_LOG+4: 150 + target = &err->header_log1; 151 + break; 152 + case PCI_ERR_HEADER_LOG+8: 153 + target = &err->header_log2; 154 + break; 155 + case PCI_ERR_HEADER_LOG+12: 156 + target = &err->header_log3; 157 + break; 158 + case PCI_ERR_ROOT_STATUS: 159 + target = &err->root_status; 160 + rw1cs = 1; 161 + break; 162 + case PCI_ERR_ROOT_COR_SRC: 163 + target = &err->source_id; 164 + break; 165 + } 166 + if (prw1cs) 167 + *prw1cs = rw1cs; 168 + return target; 169 + } 170 + 171 + static int pci_read_aer(struct pci_bus *bus, unsigned int devfn, int where, 172 + int size, u32 *val) 173 + { 174 + u32 *sim; 175 + struct aer_error *err; 176 + unsigned long flags; 177 + struct pci_ops *ops; 178 + 179 + spin_lock_irqsave(&inject_lock, flags); 180 + if (size != sizeof(u32)) 181 + goto out; 182 + err = __find_aer_error(bus->number, devfn); 183 + if (!err) 184 + goto out; 185 + 186 + sim = find_pci_config_dword(err, where, NULL); 187 + if (sim) { 188 + *val = *sim; 189 + spin_unlock_irqrestore(&inject_lock, flags); 190 + return 0; 191 + } 192 + out: 193 + ops = __find_pci_bus_ops(bus); 194 + spin_unlock_irqrestore(&inject_lock, flags); 195 + return ops->read(bus, devfn, where, size, val); 196 + } 197 + 198 + int pci_write_aer(struct pci_bus *bus, unsigned int devfn, int where, int size, 199 + u32 val) 200 + { 201 + u32 *sim; 202 + struct aer_error *err; 203 + unsigned long flags; 204 + int rw1cs; 205 + struct pci_ops *ops; 206 + 207 + spin_lock_irqsave(&inject_lock, flags); 208 + if (size != sizeof(u32)) 209 + goto out; 210 + err = __find_aer_error(bus->number, devfn); 211 + if (!err) 212 + goto out; 213 + 214 + sim = find_pci_config_dword(err, where, &rw1cs); 215 + if (sim) { 216 + if (rw1cs) 217 + *sim ^= val; 218 + else 219 + *sim = val; 220 + spin_unlock_irqrestore(&inject_lock, flags); 221 + return 0; 222 + } 223 + out: 224 + ops = __find_pci_bus_ops(bus); 225 + spin_unlock_irqrestore(&inject_lock, flags); 226 + return ops->write(bus, devfn, where, size, val); 227 + } 228 + 229 + static struct pci_ops pci_ops_aer = { 230 + .read = pci_read_aer, 231 + .write = pci_write_aer, 232 + }; 233 + 234 + static void pci_bus_ops_init(struct pci_bus_ops *bus_ops, 235 + struct pci_bus *bus, 236 + struct pci_ops *ops) 237 + { 238 + INIT_LIST_HEAD(&bus_ops->list); 239 + bus_ops->bus = bus; 240 + bus_ops->ops = ops; 241 + } 242 + 243 + static int pci_bus_set_aer_ops(struct pci_bus *bus) 244 + { 245 + struct pci_ops *ops; 246 + struct pci_bus_ops *bus_ops; 247 + unsigned long flags; 248 + 249 + bus_ops = kmalloc(sizeof(*bus_ops), GFP_KERNEL); 250 + if (!bus_ops) 251 + return -ENOMEM; 252 + ops = pci_bus_set_ops(bus, &pci_ops_aer); 253 + spin_lock_irqsave(&inject_lock, flags); 254 + if (ops == &pci_ops_aer) 255 + goto out; 256 + pci_bus_ops_init(bus_ops, bus, ops); 257 + list_add(&bus_ops->list, &pci_bus_ops_list); 258 + bus_ops = NULL; 259 + out: 260 + spin_unlock_irqrestore(&inject_lock, flags); 261 + if (bus_ops) 262 + kfree(bus_ops); 263 + return 0; 264 + } 265 + 266 + static struct pci_dev *pcie_find_root_port(struct pci_dev *dev) 267 + { 268 + while (1) { 269 + if (!dev->is_pcie) 270 + break; 271 + if (dev->pcie_type == PCI_EXP_TYPE_ROOT_PORT) 272 + return dev; 273 + if (!dev->bus->self) 274 + break; 275 + dev = dev->bus->self; 276 + } 277 + return NULL; 278 + } 279 + 280 + static int find_aer_device_iter(struct device *device, void *data) 281 + { 282 + struct pcie_device **result = data; 283 + struct pcie_device *pcie_dev; 284 + 285 + if (device->bus == &pcie_port_bus_type) { 286 + pcie_dev = to_pcie_device(device); 287 + if (pcie_dev->service & PCIE_PORT_SERVICE_AER) { 288 + *result = pcie_dev; 289 + return 1; 290 + } 291 + } 292 + return 0; 293 + } 294 + 295 + static int find_aer_device(struct pci_dev *dev, struct pcie_device **result) 296 + { 297 + return device_for_each_child(&dev->dev, result, find_aer_device_iter); 298 + } 299 + 300 + static int aer_inject(struct aer_error_inj *einj) 301 + { 302 + struct aer_error *err, *rperr; 303 + struct aer_error *err_alloc = NULL, *rperr_alloc = NULL; 304 + struct pci_dev *dev, *rpdev; 305 + struct pcie_device *edev; 306 + unsigned long flags; 307 + unsigned int devfn = PCI_DEVFN(einj->dev, einj->fn); 308 + int pos_cap_err, rp_pos_cap_err; 309 + u32 sever; 310 + int ret = 0; 311 + 312 + dev = pci_get_bus_and_slot(einj->bus, devfn); 313 + if (!dev) 314 + return -EINVAL; 315 + rpdev = pcie_find_root_port(dev); 316 + if (!rpdev) { 317 + ret = -EINVAL; 318 + goto out_put; 319 + } 320 + 321 + pos_cap_err = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); 322 + if (!pos_cap_err) { 323 + ret = -EIO; 324 + goto out_put; 325 + } 326 + pci_read_config_dword(dev, pos_cap_err + PCI_ERR_UNCOR_SEVER, &sever); 327 + 328 + rp_pos_cap_err = pci_find_ext_capability(rpdev, PCI_EXT_CAP_ID_ERR); 329 + if (!rp_pos_cap_err) { 330 + ret = -EIO; 331 + goto out_put; 332 + } 333 + 334 + err_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 335 + if (!err_alloc) { 336 + ret = -ENOMEM; 337 + goto out_put; 338 + } 339 + rperr_alloc = kzalloc(sizeof(struct aer_error), GFP_KERNEL); 340 + if (!rperr_alloc) { 341 + ret = -ENOMEM; 342 + goto out_put; 343 + } 344 + 345 + spin_lock_irqsave(&inject_lock, flags); 346 + 347 + err = __find_aer_error_by_dev(dev); 348 + if (!err) { 349 + err = err_alloc; 350 + err_alloc = NULL; 351 + aer_error_init(err, einj->bus, devfn, pos_cap_err); 352 + list_add(&err->list, &einjected); 353 + } 354 + err->uncor_status |= einj->uncor_status; 355 + err->cor_status |= einj->cor_status; 356 + err->header_log0 = einj->header_log0; 357 + err->header_log1 = einj->header_log1; 358 + err->header_log2 = einj->header_log2; 359 + err->header_log3 = einj->header_log3; 360 + 361 + rperr = __find_aer_error_by_dev(rpdev); 362 + if (!rperr) { 363 + rperr = rperr_alloc; 364 + rperr_alloc = NULL; 365 + aer_error_init(rperr, rpdev->bus->number, rpdev->devfn, 366 + rp_pos_cap_err); 367 + list_add(&rperr->list, &einjected); 368 + } 369 + if (einj->cor_status) { 370 + if (rperr->root_status & PCI_ERR_ROOT_COR_RCV) 371 + rperr->root_status |= PCI_ERR_ROOT_MULTI_COR_RCV; 372 + else 373 + rperr->root_status |= PCI_ERR_ROOT_COR_RCV; 374 + rperr->source_id &= 0xffff0000; 375 + rperr->source_id |= (einj->bus << 8) | devfn; 376 + } 377 + if (einj->uncor_status) { 378 + if (rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV) 379 + rperr->root_status |= PCI_ERR_ROOT_MULTI_UNCOR_RCV; 380 + if (sever & einj->uncor_status) { 381 + rperr->root_status |= PCI_ERR_ROOT_FATAL_RCV; 382 + if (!(rperr->root_status & PCI_ERR_ROOT_UNCOR_RCV)) 383 + rperr->root_status |= PCI_ERR_ROOT_FIRST_FATAL; 384 + } else 385 + rperr->root_status |= PCI_ERR_ROOT_NONFATAL_RCV; 386 + rperr->root_status |= PCI_ERR_ROOT_UNCOR_RCV; 387 + rperr->source_id &= 0x0000ffff; 388 + rperr->source_id |= ((einj->bus << 8) | devfn) << 16; 389 + } 390 + spin_unlock_irqrestore(&inject_lock, flags); 391 + 392 + ret = pci_bus_set_aer_ops(dev->bus); 393 + if (ret) 394 + goto out_put; 395 + ret = pci_bus_set_aer_ops(rpdev->bus); 396 + if (ret) 397 + goto out_put; 398 + 399 + if (find_aer_device(rpdev, &edev)) 400 + aer_irq(-1, edev); 401 + else 402 + ret = -EINVAL; 403 + out_put: 404 + if (err_alloc) 405 + kfree(err_alloc); 406 + if (rperr_alloc) 407 + kfree(rperr_alloc); 408 + pci_dev_put(dev); 409 + return ret; 410 + } 411 + 412 + static ssize_t aer_inject_write(struct file *filp, const char __user *ubuf, 413 + size_t usize, loff_t *off) 414 + { 415 + struct aer_error_inj einj; 416 + int ret; 417 + 418 + if (!capable(CAP_SYS_ADMIN)) 419 + return -EPERM; 420 + 421 + if (usize != sizeof(struct aer_error_inj)) 422 + return -EINVAL; 423 + 424 + if (copy_from_user(&einj, ubuf, usize)) 425 + return -EFAULT; 426 + 427 + ret = aer_inject(&einj); 428 + return ret ? ret : usize; 429 + } 430 + 431 + static const struct file_operations aer_inject_fops = { 432 + .write = aer_inject_write, 433 + .owner = THIS_MODULE, 434 + }; 435 + 436 + static struct miscdevice aer_inject_device = { 437 + .minor = MISC_DYNAMIC_MINOR, 438 + .name = "aer_inject", 439 + .fops = &aer_inject_fops, 440 + }; 441 + 442 + static int __init aer_inject_init(void) 443 + { 444 + return misc_register(&aer_inject_device); 445 + } 446 + 447 + static void __exit aer_inject_exit(void) 448 + { 449 + struct aer_error *err, *err_next; 450 + unsigned long flags; 451 + struct pci_bus_ops *bus_ops; 452 + 453 + misc_deregister(&aer_inject_device); 454 + 455 + while ((bus_ops = pci_bus_ops_pop())) { 456 + pci_bus_set_ops(bus_ops->bus, bus_ops->ops); 457 + kfree(bus_ops); 458 + } 459 + 460 + spin_lock_irqsave(&inject_lock, flags); 461 + list_for_each_entry_safe(err, err_next, 462 + &pci_bus_ops_list, list) { 463 + list_del(&err->list); 464 + kfree(err); 465 + } 466 + spin_unlock_irqrestore(&inject_lock, flags); 467 + } 468 + 469 + module_init(aer_inject_init); 470 + module_exit(aer_inject_exit); 471 + 472 + MODULE_DESCRIPTION("PCIE AER software error injector"); 473 + MODULE_LICENSE("GPL");