Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] ppc64: PCI error event dispatcher

12-eeh-event-dispatcher.patch

ppc64: EEH Recovery dispatcher thread

This patch adds a mechanism to create recovery threads when an
EEH event is received. Since an EEH freeze state may be detected
within an interrupt context, we need to get out of the interrupt
context before starting recovery. This dispatcher does this in
two steps: first, it uses a workqueue to get out, and then
lanuches a kernel thread, so that the recovery routine can
sleep for exteded periods without upseting the keventd.

A kernel thread is created with each EEH event, rather than
having one long-running daemon started at boot time. This is
because it is anticipated that EEH events will be very rare
(very very rare, ideally) and so its pointless to cluter the
process tables with a daemon that will almost never run.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

authored by

Linas Vepstas and committed by
Paul Mackerras
172ca926 7f79da7a

+237 -158
+1 -1
arch/powerpc/platforms/pseries/Makefile
··· 3 3 obj-$(CONFIG_SMP) += smp.o 4 4 obj-$(CONFIG_IBMVIO) += vio.o 5 5 obj-$(CONFIG_XICS) += xics.o 6 - obj-$(CONFIG_EEH) += eeh.o 6 + obj-$(CONFIG_EEH) += eeh.o eeh_event.o
+16 -124
arch/powerpc/platforms/pseries/eeh.c
··· 19 19 20 20 #include <linux/init.h> 21 21 #include <linux/list.h> 22 - #include <linux/notifier.h> 23 22 #include <linux/pci.h> 24 23 #include <linux/proc_fs.h> 25 24 #include <linux/rbtree.h> ··· 26 27 #include <linux/spinlock.h> 27 28 #include <asm/atomic.h> 28 29 #include <asm/eeh.h> 30 + #include <asm/eeh_event.h> 29 31 #include <asm/io.h> 30 32 #include <asm/machdep.h> 31 - #include <asm/rtas.h> 32 - #include <asm/atomic.h> 33 - #include <asm/systemcfg.h> 34 33 #include <asm/ppc-pci.h> 34 + #include <asm/rtas.h> 35 + #include <asm/systemcfg.h> 35 36 36 37 #undef DEBUG 37 38 ··· 68 69 * the slot is found to be isolated, an "EEH Event" is synthesized 69 70 * and sent out for processing. 70 71 */ 71 - 72 - /* EEH event workqueue setup. */ 73 - static DEFINE_SPINLOCK(eeh_eventlist_lock); 74 - LIST_HEAD(eeh_eventlist); 75 - static void eeh_event_handler(void *); 76 - DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL); 77 - 78 - static struct notifier_block *eeh_notifier_chain; 79 72 80 73 /* If a device driver keeps reading an MMIO register in an interrupt 81 74 * handler after a slot isolation event has occurred, we assume it ··· 412 421 } 413 422 414 423 /** 415 - * eeh_register_notifier - Register to find out about EEH events. 416 - * @nb: notifier block to callback on events 417 - */ 418 - int eeh_register_notifier(struct notifier_block *nb) 419 - { 420 - return notifier_chain_register(&eeh_notifier_chain, nb); 421 - } 422 - 423 - /** 424 - * eeh_unregister_notifier - Unregister to an EEH event notifier. 425 - * @nb: notifier block to callback on events 426 - */ 427 - int eeh_unregister_notifier(struct notifier_block *nb) 428 - { 429 - return notifier_chain_unregister(&eeh_notifier_chain, nb); 430 - } 431 - 432 - /** 433 424 * read_slot_reset_state - Read the reset state of a device node's slot 434 425 * @dn: device node to read 435 426 * @rets: array to return results in ··· 431 458 432 459 return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr, 433 460 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); 434 - } 435 - 436 - /** 437 - * eeh_panic - call panic() for an eeh event that cannot be handled. 438 - * The philosophy of this routine is that it is better to panic and 439 - * halt the OS than it is to risk possible data corruption by 440 - * oblivious device drivers that don't know better. 441 - * 442 - * @dev pci device that had an eeh event 443 - * @reset_state current reset state of the device slot 444 - */ 445 - static void eeh_panic(struct pci_dev *dev, int reset_state) 446 - { 447 - /* 448 - * XXX We should create a separate sysctl for this. 449 - * 450 - * Since the panic_on_oops sysctl is used to halt the system 451 - * in light of potential corruption, we can use it here. 452 - */ 453 - if (panic_on_oops) { 454 - struct device_node *dn = pci_device_to_OF_node(dev); 455 - eeh_slot_error_detail (PCI_DN(dn), 2 /* Permanent Error */); 456 - panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, 457 - pci_name(dev)); 458 - } 459 - else { 460 - __get_cpu_var(ignored_failures)++; 461 - printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", 462 - reset_state, pci_name(dev)); 463 - } 464 - } 465 - 466 - /** 467 - * eeh_event_handler - dispatch EEH events. The detection of a frozen 468 - * slot can occur inside an interrupt, where it can be hard to do 469 - * anything about it. The goal of this routine is to pull these 470 - * detection events out of the context of the interrupt handler, and 471 - * re-dispatch them for processing at a later time in a normal context. 472 - * 473 - * @dummy - unused 474 - */ 475 - static void eeh_event_handler(void *dummy) 476 - { 477 - unsigned long flags; 478 - struct eeh_event *event; 479 - 480 - while (1) { 481 - spin_lock_irqsave(&eeh_eventlist_lock, flags); 482 - event = NULL; 483 - if (!list_empty(&eeh_eventlist)) { 484 - event = list_entry(eeh_eventlist.next, struct eeh_event, list); 485 - list_del(&event->list); 486 - } 487 - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); 488 - if (event == NULL) 489 - break; 490 - 491 - printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device " 492 - "%s\n", event->reset_state, 493 - pci_name(event->dev)); 494 - 495 - notifier_call_chain (&eeh_notifier_chain, 496 - EEH_NOTIFY_FREEZE, event); 497 - 498 - pci_dev_put(event->dev); 499 - kfree(event); 500 - } 501 461 } 502 462 503 463 /** ··· 519 613 int ret; 520 614 int rets[3]; 521 615 unsigned long flags; 522 - int reset_state; 523 - struct eeh_event *event; 524 616 struct pci_dn *pdn; 525 617 struct device_node *pe_dn; 526 618 int rc = 0; ··· 626 722 __eeh_mark_slot (pe_dn); 627 723 spin_unlock_irqrestore(&confirm_error_lock, flags); 628 724 629 - reset_state = rets[0]; 630 - 631 - eeh_slot_error_detail (pdn, 1 /* Temporary Error */); 632 - 633 - printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n", 634 - rets[0], dn->name, dn->full_name); 635 - event = kmalloc(sizeof(*event), GFP_ATOMIC); 636 - if (event == NULL) { 637 - eeh_panic(dev, reset_state); 638 - return 1; 639 - } 640 - 641 - event->dev = dev; 642 - event->dn = dn; 643 - event->reset_state = reset_state; 644 - 645 - /* We may or may not be called in an interrupt context */ 646 - spin_lock_irqsave(&eeh_eventlist_lock, flags); 647 - list_add(&event->list, &eeh_eventlist); 648 - spin_unlock_irqrestore(&eeh_eventlist_lock, flags); 649 - 725 + eeh_send_failure_event (dn, dev, rets[0], rets[2]); 726 + 650 727 /* Most EEH events are due to device driver bugs. Having 651 728 * a stack trace will help the device-driver authors figure 652 729 * out what happened. So print that out. */ 653 730 if (rets[0] != 5) dump_stack(); 654 - schedule_work(&eeh_event_wq); 655 - 656 731 return 1; 657 732 658 733 dn_unlock: ··· 675 792 } 676 793 677 794 EXPORT_SYMBOL(eeh_check_failure); 795 + 796 + /* ------------------------------------------------------------- */ 797 + /* The code below deals with enabling EEH for devices during the 798 + * early boot sequence. EEH must be enabled before any PCI probing 799 + * can be done. 800 + */ 801 + 802 + #define EEH_ENABLE 1 678 803 679 804 struct eeh_early_enable_info { 680 805 unsigned int buid_hi; ··· 741 850 /* First register entry is addr (00BBSS00) */ 742 851 /* Try to enable eeh */ 743 852 ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, 744 - regs[0], info->buid_hi, info->buid_lo, 745 - EEH_ENABLE); 853 + regs[0], info->buid_hi, info->buid_lo, 854 + EEH_ENABLE); 855 + 746 856 if (ret == 0) { 747 857 eeh_subsystem_enabled = 1; 748 858 pdn->eeh_mode |= EEH_MODE_SUPPORTED;
+155
arch/powerpc/platforms/pseries/eeh_event.c
··· 1 + /* 2 + * eeh_event.c 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; either version 2 of the License, or 7 + * (at your option) any later version. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 + * 18 + * Copyright (c) 2005 Linas Vepstas <linas@linas.org> 19 + */ 20 + 21 + #include <linux/list.h> 22 + #include <linux/pci.h> 23 + #include <asm/eeh_event.h> 24 + 25 + /** Overview: 26 + * EEH error states may be detected within exception handlers; 27 + * however, the recovery processing needs to occur asynchronously 28 + * in a normal kernel context and not an interrupt context. 29 + * This pair of routines creates an event and queues it onto a 30 + * work-queue, where a worker thread can drive recovery. 31 + */ 32 + 33 + /* EEH event workqueue setup. */ 34 + static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED; 35 + LIST_HEAD(eeh_eventlist); 36 + static void eeh_thread_launcher(void *); 37 + DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); 38 + 39 + /** 40 + * eeh_panic - call panic() for an eeh event that cannot be handled. 41 + * The philosophy of this routine is that it is better to panic and 42 + * halt the OS than it is to risk possible data corruption by 43 + * oblivious device drivers that don't know better. 44 + * 45 + * @dev pci device that had an eeh event 46 + * @reset_state current reset state of the device slot 47 + */ 48 + static void eeh_panic(struct pci_dev *dev, int reset_state) 49 + { 50 + /* 51 + * Since the panic_on_oops sysctl is used to halt the system 52 + * in light of potential corruption, we can use it here. 53 + */ 54 + if (panic_on_oops) { 55 + panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, 56 + pci_name(dev)); 57 + } 58 + else { 59 + printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", 60 + reset_state, pci_name(dev)); 61 + } 62 + } 63 + 64 + /** 65 + * eeh_event_handler - dispatch EEH events. The detection of a frozen 66 + * slot can occur inside an interrupt, where it can be hard to do 67 + * anything about it. The goal of this routine is to pull these 68 + * detection events out of the context of the interrupt handler, and 69 + * re-dispatch them for processing at a later time in a normal context. 70 + * 71 + * @dummy - unused 72 + */ 73 + static int eeh_event_handler(void * dummy) 74 + { 75 + unsigned long flags; 76 + struct eeh_event *event; 77 + 78 + daemonize ("eehd"); 79 + 80 + while (1) { 81 + set_current_state(TASK_INTERRUPTIBLE); 82 + 83 + spin_lock_irqsave(&eeh_eventlist_lock, flags); 84 + event = NULL; 85 + if (!list_empty(&eeh_eventlist)) { 86 + event = list_entry(eeh_eventlist.next, struct eeh_event, list); 87 + list_del(&event->list); 88 + } 89 + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); 90 + if (event == NULL) 91 + break; 92 + 93 + printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", 94 + pci_name(event->dev)); 95 + 96 + eeh_panic (event->dev, event->state); 97 + 98 + kfree(event); 99 + } 100 + 101 + return 0; 102 + } 103 + 104 + /** 105 + * eeh_thread_launcher 106 + * 107 + * @dummy - unused 108 + */ 109 + static void eeh_thread_launcher(void *dummy) 110 + { 111 + if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) 112 + printk(KERN_ERR "Failed to start EEH daemon\n"); 113 + } 114 + 115 + /** 116 + * eeh_send_failure_event - generate a PCI error event 117 + * @dev pci device 118 + * 119 + * This routine can be called within an interrupt context; 120 + * the actual event will be delivered in a normal context 121 + * (from a workqueue). 122 + */ 123 + int eeh_send_failure_event (struct device_node *dn, 124 + struct pci_dev *dev, 125 + int state, 126 + int time_unavail) 127 + { 128 + unsigned long flags; 129 + struct eeh_event *event; 130 + 131 + event = kmalloc(sizeof(*event), GFP_ATOMIC); 132 + if (event == NULL) { 133 + printk (KERN_ERR "EEH: out of memory, event not handled\n"); 134 + return 1; 135 + } 136 + 137 + if (dev) 138 + pci_dev_get(dev); 139 + 140 + event->dn = dn; 141 + event->dev = dev; 142 + event->state = state; 143 + event->time_unavail = time_unavail; 144 + 145 + /* We may or may not be called in an interrupt context */ 146 + spin_lock_irqsave(&eeh_eventlist_lock, flags); 147 + list_add(&event->list, &eeh_eventlist); 148 + spin_unlock_irqrestore(&eeh_eventlist_lock, flags); 149 + 150 + schedule_work(&eeh_event_wq); 151 + 152 + return 0; 153 + } 154 + 155 + /********************** END OF FILE ******************************/
+52
include/asm-powerpc/eeh_event.h
··· 1 + /* 2 + * eeh_event.h 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License as published by 6 + * the Free Software Foundation; either version 2 of the License, or 7 + * (at your option) any later version. 8 + * 9 + * This program is distributed in the hope that it will be useful, 10 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 + * GNU General Public License for more details. 13 + * 14 + * You should have received a copy of the GNU General Public License 15 + * along with this program; if not, write to the Free Software 16 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 + * 18 + * Copyright (c) 2005 Linas Vepstas <linas@linas.org> 19 + */ 20 + 21 + #ifndef ASM_PPC64_EEH_EVENT_H 22 + #define ASM_PPC64_EEH_EVENT_H 23 + 24 + /** EEH event -- structure holding pci controller data that describes 25 + * a change in the isolation status of a PCI slot. A pointer 26 + * to this struct is passed as the data pointer in a notify callback. 27 + */ 28 + struct eeh_event { 29 + struct list_head list; 30 + struct device_node *dn; /* struct device node */ 31 + struct pci_dev *dev; /* affected device */ 32 + int state; 33 + int time_unavail; /* milliseconds until device might be available */ 34 + }; 35 + 36 + /** 37 + * eeh_send_failure_event - generate a PCI error event 38 + * @dev pci device 39 + * 40 + * This routine builds a PCI error event which will be delivered 41 + * to all listeners on the peh_notifier_chain. 42 + * 43 + * This routine can be called within an interrupt context; 44 + * the actual event will be delivered in a normal context 45 + * (from a workqueue). 46 + */ 47 + int eeh_send_failure_event (struct device_node *dn, 48 + struct pci_dev *dev, 49 + int reset_state, 50 + int time_unavail); 51 + 52 + #endif /* ASM_PPC64_EEH_EVENT_H */
+13 -33
include/asm-ppc64/eeh.h
··· 1 - /* 1 + /* 2 2 * eeh.h 3 3 * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation. 4 4 * ··· 6 6 * it under the terms of the GNU General Public License as published by 7 7 * the Free Software Foundation; either version 2 of the License, or 8 8 * (at your option) any later version. 9 - * 9 + * 10 10 * This program is distributed in the hope that it will be useful, 11 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 13 * GNU General Public License for more details. 14 - * 14 + * 15 15 * You should have received a copy of the GNU General Public License 16 16 * along with this program; if not, write to the Free Software 17 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ··· 27 27 28 28 struct pci_dev; 29 29 struct device_node; 30 - struct device_node; 31 - struct notifier_block; 32 30 33 31 #ifdef CONFIG_EEH 34 32 ··· 34 36 #define EEH_MODE_SUPPORTED (1<<0) 35 37 #define EEH_MODE_NOCHECK (1<<1) 36 38 #define EEH_MODE_ISOLATED (1<<2) 39 + 40 + /* Max number of EEH freezes allowed before we consider the device 41 + * to be permanently disabled. */ 42 + #define EEH_MAX_ALLOWED_FREEZES 5 37 43 38 44 void __init eeh_init(void); 39 45 unsigned long eeh_check_failure(const volatile void __iomem *token, ··· 61 59 * eeh_remove_device - undo EEH setup for the indicated pci device 62 60 * @dev: pci device to be removed 63 61 * 64 - * This routine should be when a device is removed from a running 65 - * system (e.g. by hotplug or dlpar). 62 + * This routine should be called when a device is removed from 63 + * a running system (e.g. by hotplug or dlpar). It unregisters 64 + * the PCI device from the EEH subsystem. I/O errors affecting 65 + * this device will no longer be detected after this call; thus, 66 + * i/o errors affecting this slot may leave this device unusable. 66 67 */ 67 68 void eeh_remove_device(struct pci_dev *); 68 - 69 - #define EEH_DISABLE 0 70 - #define EEH_ENABLE 1 71 - #define EEH_RELEASE_LOADSTORE 2 72 - #define EEH_RELEASE_DMA 3 73 - 74 - /** 75 - * Notifier event flags. 76 - */ 77 - #define EEH_NOTIFY_FREEZE 1 78 - 79 - /** EEH event -- structure holding pci slot data that describes 80 - * a change in the isolation status of a PCI slot. A pointer 81 - * to this struct is passed as the data pointer in a notify callback. 82 - */ 83 - struct eeh_event { 84 - struct list_head list; 85 - struct pci_dev *dev; 86 - struct device_node *dn; 87 - int reset_state; 88 - }; 89 - 90 - /** Register to find out about EEH events. */ 91 - int eeh_register_notifier(struct notifier_block *nb); 92 - int eeh_unregister_notifier(struct notifier_block *nb); 93 69 94 70 /** 95 71 * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. ··· 109 129 #define EEH_IO_ERROR_VALUE(size) (-1UL) 110 130 #endif /* CONFIG_EEH */ 111 131 112 - /* 132 + /* 113 133 * MMIO read/write operations with EEH support. 114 134 */ 115 135 static inline u8 eeh_readb(const volatile void __iomem *addr)