Merge branch 'eeh' into next · tjh.dev/kernel@aba0eb8

+3

arch/powerpc/include/asm/device.h

··· 31 31 #ifdef CONFIG_SWIOTLB 32 32 dma_addr_t max_direct_dma_addr; 33 33 #endif 34 + #ifdef CONFIG_EEH 35 + struct eeh_dev *edev; 36 + #endif 34 37 }; 35 38 36 39 struct pdev_archdata {

+106 -28

arch/powerpc/include/asm/eeh.h

··· 1 1 /* 2 - * eeh.h 3 2 * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation. 3 + * Copyright 2001-2012 IBM Corporation. 4 4 * 5 5 * This program is free software; you can redistribute it and/or modify 6 6 * it under the terms of the GNU General Public License as published by ··· 31 31 32 32 #ifdef CONFIG_EEH 33 33 34 + /* 35 + * The struct is used to trace EEH state for the associated 36 + * PCI device node or PCI device. In future, it might 37 + * represent PE as well so that the EEH device to form 38 + * another tree except the currently existing tree of PCI 39 + * buses and PCI devices 40 + */ 41 + #define EEH_MODE_SUPPORTED (1<<0) /* EEH supported on the device */ 42 + #define EEH_MODE_NOCHECK (1<<1) /* EEH check should be skipped */ 43 + #define EEH_MODE_ISOLATED (1<<2) /* The device has been isolated */ 44 + #define EEH_MODE_RECOVERING (1<<3) /* Recovering the device */ 45 + #define EEH_MODE_IRQ_DISABLED (1<<4) /* Interrupt disabled */ 46 + 47 + struct eeh_dev { 48 + int mode; /* EEH mode */ 49 + int class_code; /* Class code of the device */ 50 + int config_addr; /* Config address */ 51 + int pe_config_addr; /* PE config address */ 52 + int check_count; /* Times of ignored error */ 53 + int freeze_count; /* Times of froze up */ 54 + int false_positives; /* Times of reported #ff's */ 55 + u32 config_space[16]; /* Saved PCI config space */ 56 + struct pci_controller *phb; /* Associated PHB */ 57 + struct device_node *dn; /* Associated device node */ 58 + struct pci_dev *pdev; /* Associated PCI device */ 59 + }; 60 + 61 + static inline struct device_node *eeh_dev_to_of_node(struct eeh_dev *edev) 62 + { 63 + return edev->dn; 64 + } 65 + 66 + static inline struct pci_dev *eeh_dev_to_pci_dev(struct eeh_dev *edev) 67 + { 68 + return edev->pdev; 69 + } 70 + 71 + /* 72 + * The struct is used to trace the registered EEH operation 73 + * callback functions. Actually, those operation callback 74 + * functions are heavily platform dependent. That means the 75 + * platform should register its own EEH operation callback 76 + * functions before any EEH further operations. 77 + */ 78 + #define EEH_OPT_DISABLE 0 /* EEH disable */ 79 + #define EEH_OPT_ENABLE 1 /* EEH enable */ 80 + #define EEH_OPT_THAW_MMIO 2 /* MMIO enable */ 81 + #define EEH_OPT_THAW_DMA 3 /* DMA enable */ 82 + #define EEH_STATE_UNAVAILABLE (1 << 0) /* State unavailable */ 83 + #define EEH_STATE_NOT_SUPPORT (1 << 1) /* EEH not supported */ 84 + #define EEH_STATE_RESET_ACTIVE (1 << 2) /* Active reset */ 85 + #define EEH_STATE_MMIO_ACTIVE (1 << 3) /* Active MMIO */ 86 + #define EEH_STATE_DMA_ACTIVE (1 << 4) /* Active DMA */ 87 + #define EEH_STATE_MMIO_ENABLED (1 << 5) /* MMIO enabled */ 88 + #define EEH_STATE_DMA_ENABLED (1 << 6) /* DMA enabled */ 89 + #define EEH_RESET_DEACTIVATE 0 /* Deactivate the PE reset */ 90 + #define EEH_RESET_HOT 1 /* Hot reset */ 91 + #define EEH_RESET_FUNDAMENTAL 3 /* Fundamental reset */ 92 + #define EEH_LOG_TEMP 1 /* EEH temporary error log */ 93 + #define EEH_LOG_PERM 2 /* EEH permanent error log */ 94 + 95 + struct eeh_ops { 96 + char *name; 97 + int (*init)(void); 98 + int (*set_option)(struct device_node *dn, int option); 99 + int (*get_pe_addr)(struct device_node *dn); 100 + int (*get_state)(struct device_node *dn, int *state); 101 + int (*reset)(struct device_node *dn, int option); 102 + int (*wait_state)(struct device_node *dn, int max_wait); 103 + int (*get_log)(struct device_node *dn, int severity, char *drv_log, unsigned long len); 104 + int (*configure_bridge)(struct device_node *dn); 105 + int (*read_config)(struct device_node *dn, int where, int size, u32 *val); 106 + int (*write_config)(struct device_node *dn, int where, int size, u32 val); 107 + }; 108 + 109 + extern struct eeh_ops *eeh_ops; 34 110 extern int eeh_subsystem_enabled; 35 111 36 - /* Values for eeh_mode bits in device_node */ 37 - #define EEH_MODE_SUPPORTED (1<<0) 38 - #define EEH_MODE_NOCHECK (1<<1) 39 - #define EEH_MODE_ISOLATED (1<<2) 40 - #define EEH_MODE_RECOVERING (1<<3) 41 - #define EEH_MODE_IRQ_DISABLED (1<<4) 42 - 43 - /* Max number of EEH freezes allowed before we consider the device 44 - * to be permanently disabled. */ 112 + /* 113 + * Max number of EEH freezes allowed before we consider the device 114 + * to be permanently disabled. 115 + */ 45 116 #define EEH_MAX_ALLOWED_FREEZES 5 46 117 118 + void * __devinit eeh_dev_init(struct device_node *dn, void *data); 119 + void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb); 120 + void __init eeh_dev_phb_init(void); 47 121 void __init eeh_init(void); 122 + #ifdef CONFIG_PPC_PSERIES 123 + int __init eeh_pseries_init(void); 124 + #endif 125 + int __init eeh_ops_register(struct eeh_ops *ops); 126 + int __exit eeh_ops_unregister(const char *name); 48 127 unsigned long eeh_check_failure(const volatile void __iomem *token, 49 128 unsigned long val); 50 129 int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev); 51 130 void __init pci_addr_cache_build(void); 52 - 53 - /** 54 - * eeh_add_device_early 55 - * eeh_add_device_late 56 - * 57 - * Perform eeh initialization for devices added after boot. 58 - * Call eeh_add_device_early before doing any i/o to the 59 - * device (including config space i/o). Call eeh_add_device_late 60 - * to finish the eeh setup for this device. 61 - */ 62 131 void eeh_add_device_tree_early(struct device_node *); 63 132 void eeh_add_device_tree_late(struct pci_bus *); 64 - 65 - /** 66 - * eeh_remove_device_recursive - undo EEH for device & children. 67 - * @dev: pci device to be removed 68 - * 69 - * As above, this removes the device; it also removes child 70 - * pci devices as well. 71 - */ 72 133 void eeh_remove_bus_device(struct pci_dev *); 73 134 74 135 /** ··· 148 87 #define EEH_IO_ERROR_VALUE(size) (~0U >> ((4 - (size)) * 8)) 149 88 150 89 #else /* !CONFIG_EEH */ 90 + 91 + static inline void *eeh_dev_init(struct device_node *dn, void *data) 92 + { 93 + return NULL; 94 + } 95 + 96 + static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { } 97 + 98 + static inline void eeh_dev_phb_init(void) { } 99 + 151 100 static inline void eeh_init(void) { } 101 + 102 + #ifdef CONFIG_PPC_PSERIES 103 + static inline int eeh_pseries_init(void) 104 + { 105 + return 0; 106 + } 107 + #endif /* CONFIG_PPC_PSERIES */ 152 108 153 109 static inline unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val) 154 110 {

+9 -24

arch/powerpc/include/asm/eeh_event.h

··· 1 1 /* 2 - * eeh_event.h 3 - * 4 2 * This program is free software; you can redistribute it and/or modify 5 3 * it under the terms of the GNU General Public License as published by 6 4 * the Free Software Foundation; either version 2 of the License, or ··· 20 22 #define ASM_POWERPC_EEH_EVENT_H 21 23 #ifdef __KERNEL__ 22 24 23 - /** EEH event -- structure holding pci controller data that describes 24 - * a change in the isolation status of a PCI slot. A pointer 25 - * to this struct is passed as the data pointer in a notify callback. 25 + /* 26 + * structure holding pci controller data that describes a 27 + * change in the isolation status of a PCI slot. A pointer 28 + * to this struct is passed as the data pointer in a notify 29 + * callback. 26 30 */ 27 31 struct eeh_event { 28 - struct list_head list; 29 - struct device_node *dn; /* struct device node */ 30 - struct pci_dev *dev; /* affected device */ 32 + struct list_head list; /* to form event queue */ 33 + struct eeh_dev *edev; /* EEH device */ 31 34 }; 32 35 33 - /** 34 - * eeh_send_failure_event - generate a PCI error event 35 - * @dev pci device 36 - * 37 - * This routine builds a PCI error event which will be delivered 38 - * to all listeners on the eeh_notifier_chain. 39 - * 40 - * This routine can be called within an interrupt context; 41 - * the actual event will be delivered in a normal context 42 - * (from a workqueue). 43 - */ 44 - int eeh_send_failure_event (struct device_node *dn, 45 - struct pci_dev *dev); 46 - 47 - /* Main recovery function */ 48 - struct pci_dn * handle_eeh_events (struct eeh_event *); 36 + int eeh_send_failure_event(struct eeh_dev *edev); 37 + struct eeh_dev *handle_eeh_events(struct eeh_event *); 49 38 50 39 #endif /* __KERNEL__ */ 51 40 #endif /* ASM_POWERPC_EEH_EVENT_H */

+9 -80

arch/powerpc/include/asm/ppc-pci.h

··· 47 47 48 48 extern unsigned long pci_probe_only; 49 49 50 - /* ---- EEH internal-use-only related routines ---- */ 51 50 #ifdef CONFIG_EEH 52 51 52 + void pci_addr_cache_build(void); 53 53 void pci_addr_cache_insert_device(struct pci_dev *dev); 54 54 void pci_addr_cache_remove_device(struct pci_dev *dev); 55 - void pci_addr_cache_build(void); 56 - struct pci_dev *pci_get_device_by_addr(unsigned long addr); 57 - 58 - /** 59 - * eeh_slot_error_detail -- record and EEH error condition to the log 60 - * @pdn: pci device node 61 - * @severity: EEH_LOG_TEMP_FAILURE or EEH_LOG_PERM_FAILURE 62 - * 63 - * Obtains the EEH error details from the RTAS subsystem, 64 - * and then logs these details with the RTAS error log system. 65 - */ 66 - #define EEH_LOG_TEMP_FAILURE 1 67 - #define EEH_LOG_PERM_FAILURE 2 68 - void eeh_slot_error_detail (struct pci_dn *pdn, int severity); 69 - 70 - /** 71 - * rtas_pci_enable - enable IO transfers for this slot 72 - * @pdn: pci device node 73 - * @function: either EEH_THAW_MMIO or EEH_THAW_DMA 74 - * 75 - * Enable I/O transfers to this slot 76 - */ 77 - #define EEH_THAW_MMIO 2 78 - #define EEH_THAW_DMA 3 79 - int rtas_pci_enable(struct pci_dn *pdn, int function); 80 - 81 - /** 82 - * rtas_set_slot_reset -- unfreeze a frozen slot 83 - * @pdn: pci device node 84 - * 85 - * Clear the EEH-frozen condition on a slot. This routine 86 - * does this by asserting the PCI #RST line for 1/8th of 87 - * a second; this routine will sleep while the adapter is 88 - * being reset. 89 - * 90 - * Returns a non-zero value if the reset failed. 91 - */ 92 - int rtas_set_slot_reset (struct pci_dn *); 93 - int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs); 94 - 95 - /** 96 - * eeh_restore_bars - Restore device configuration info. 97 - * @pdn: pci device node 98 - * 99 - * A reset of a PCI device will clear out its config space. 100 - * This routines will restore the config space for this 101 - * device, and is children, to values previously obtained 102 - * from the firmware. 103 - */ 104 - void eeh_restore_bars(struct pci_dn *); 105 - 106 - /** 107 - * rtas_configure_bridge -- firmware initialization of pci bridge 108 - * @pdn: pci device node 109 - * 110 - * Ask the firmware to configure all PCI bridges devices 111 - * located behind the indicated node. Required after a 112 - * pci device reset. Does essentially the same hing as 113 - * eeh_restore_bars, but for brdges, and lets firmware 114 - * do the work. 115 - */ 116 - void rtas_configure_bridge(struct pci_dn *); 117 - 55 + struct pci_dev *pci_addr_cache_get_device(unsigned long addr); 56 + void eeh_slot_error_detail(struct eeh_dev *edev, int severity); 57 + int eeh_pci_enable(struct eeh_dev *edev, int function); 58 + int eeh_reset_pe(struct eeh_dev *); 59 + void eeh_restore_bars(struct eeh_dev *); 118 60 int rtas_write_config(struct pci_dn *, int where, int size, u32 val); 119 61 int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); 120 - 121 - /** 122 - * eeh_mark_slot -- set mode flags for pertition endpoint 123 - * @pdn: pci device node 124 - * 125 - * mark and clear slots: find "partition endpoint" PE and set or 126 - * clear the flags for each subnode of the PE. 127 - */ 128 - void eeh_mark_slot (struct device_node *dn, int mode_flag); 129 - void eeh_clear_slot (struct device_node *dn, int mode_flag); 130 - 131 - /** 132 - * find_device_pe -- Find the associated "Partiationable Endpoint" PE 133 - * @pdn: pci device node 134 - */ 135 - struct device_node * find_device_pe(struct device_node *dn); 62 + void eeh_mark_slot(struct device_node *dn, int mode_flag); 63 + void eeh_clear_slot(struct device_node *dn, int mode_flag); 64 + struct device_node *eeh_find_device_pe(struct device_node *dn); 136 65 137 66 void eeh_sysfs_add_device(struct pci_dev *pdev); 138 67 void eeh_sysfs_remove_device(struct pci_dev *pdev);

+5 -1

arch/powerpc/kernel/of_platform.c

··· 21 21 #include <linux/of.h> 22 22 #include <linux/of_device.h> 23 23 #include <linux/of_platform.h> 24 + #include <linux/atomic.h> 24 25 25 26 #include <asm/errno.h> 26 27 #include <asm/topology.h> 27 28 #include <asm/pci-bridge.h> 28 29 #include <asm/ppc-pci.h> 29 - #include <linux/atomic.h> 30 + #include <asm/eeh.h> 30 31 31 32 #ifdef CONFIG_PPC_OF_PLATFORM_PCI 32 33 ··· 66 65 67 66 /* Init pci_dn data structures */ 68 67 pci_devs_phb_init_dynamic(phb); 68 + 69 + /* Create EEH devices for the PHB */ 70 + eeh_dev_phb_init_dynamic(phb); 69 71 70 72 /* Register devices with EEH */ 71 73 #ifdef CONFIG_EEH

+3

arch/powerpc/kernel/rtas_pci.c

··· 275 275 of_node_put(root); 276 276 pci_devs_phb_init(); 277 277 278 + /* Create EEH devices for all PHBs */ 279 + eeh_dev_phb_init(); 280 + 278 281 /* 279 282 * pci_probe_only and pci_assign_all_buses can be set via properties 280 283 * in chosen.

+2 -1

arch/powerpc/platforms/pseries/Makefile

··· 6 6 firmware.o power.o dlpar.o mobility.o 7 7 obj-$(CONFIG_SMP) += smp.o 8 8 obj-$(CONFIG_SCANLOG) += scanlog.o 9 - obj-$(CONFIG_EEH) += eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o 9 + obj-$(CONFIG_EEH) += eeh.o eeh_dev.o eeh_cache.o eeh_driver.o \ 10 + eeh_event.o eeh_sysfs.o eeh_pseries.o 10 11 obj-$(CONFIG_KEXEC) += kexec.o 11 12 obj-$(CONFIG_PCI) += pci.o pci_dlpar.o 12 13 obj-$(CONFIG_PSERIES_MSI) += msi.o

+462 -582

arch/powerpc/platforms/pseries/eeh.c

··· 1 1 /* 2 - * eeh.c 3 2 * Copyright IBM Corporation 2001, 2005, 2006 4 3 * Copyright Dave Engebretsen & Todd Inglett 2001 5 4 * Copyright Linas Vepstas 2005, 2006 5 + * Copyright 2001-2012 IBM Corporation. 6 6 * 7 7 * This program is free software; you can redistribute it and/or modify 8 8 * it under the terms of the GNU General Public License as published by ··· 22 22 */ 23 23 24 24 #include <linux/delay.h> 25 - #include <linux/sched.h> /* for init_mm */ 25 + #include <linux/sched.h> 26 26 #include <linux/init.h> 27 27 #include <linux/list.h> 28 28 #include <linux/pci.h> ··· 86 86 /* Time to wait for a PCI slot to report status, in milliseconds */ 87 87 #define PCI_BUS_RESET_WAIT_MSEC (60*1000) 88 88 89 - /* RTAS tokens */ 90 - static int ibm_set_eeh_option; 91 - static int ibm_set_slot_reset; 92 - static int ibm_read_slot_reset_state; 93 - static int ibm_read_slot_reset_state2; 94 - static int ibm_slot_error_detail; 95 - static int ibm_get_config_addr_info; 96 - static int ibm_get_config_addr_info2; 97 - static int ibm_configure_bridge; 98 - static int ibm_configure_pe; 89 + /* Platform dependent EEH operations */ 90 + struct eeh_ops *eeh_ops = NULL; 99 91 100 92 int eeh_subsystem_enabled; 101 93 EXPORT_SYMBOL(eeh_subsystem_enabled); 102 94 103 95 /* Lock to avoid races due to multiple reports of an error */ 104 96 static DEFINE_RAW_SPINLOCK(confirm_error_lock); 105 - 106 - /* Buffer for reporting slot-error-detail rtas calls. Its here 107 - * in BSS, and not dynamically alloced, so that it ends up in 108 - * RMO where RTAS can access it. 109 - */ 110 - static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; 111 - static DEFINE_SPINLOCK(slot_errbuf_lock); 112 - static int eeh_error_buf_size; 113 97 114 98 /* Buffer for reporting pci register dumps. Its here in BSS, and 115 99 * not dynamically alloced, so that it ends up in RMO where RTAS ··· 102 118 #define EEH_PCI_REGS_LOG_LEN 4096 103 119 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 104 120 105 - /* System monitoring statistics */ 106 - static unsigned long no_device; 107 - static unsigned long no_dn; 108 - static unsigned long no_cfg_addr; 109 - static unsigned long ignored_check; 110 - static unsigned long total_mmio_ffs; 111 - static unsigned long false_positives; 112 - static unsigned long slot_resets; 121 + /* 122 + * The struct is used to maintain the EEH global statistic 123 + * information. Besides, the EEH global statistics will be 124 + * exported to user space through procfs 125 + */ 126 + struct eeh_stats { 127 + u64 no_device; /* PCI device not found */ 128 + u64 no_dn; /* OF node not found */ 129 + u64 no_cfg_addr; /* Config address not found */ 130 + u64 ignored_check; /* EEH check skipped */ 131 + u64 total_mmio_ffs; /* Total EEH checks */ 132 + u64 false_positives; /* Unnecessary EEH checks */ 133 + u64 slot_resets; /* PE reset */ 134 + }; 135 + 136 + static struct eeh_stats eeh_stats; 113 137 114 138 #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) 115 139 116 - /* --------------------------------------------------------------- */ 117 - /* Below lies the EEH event infrastructure */ 118 - 119 - static void rtas_slot_error_detail(struct pci_dn *pdn, int severity, 120 - char *driver_log, size_t loglen) 121 - { 122 - int config_addr; 123 - unsigned long flags; 124 - int rc; 125 - 126 - /* Log the error with the rtas logger */ 127 - spin_lock_irqsave(&slot_errbuf_lock, flags); 128 - memset(slot_errbuf, 0, eeh_error_buf_size); 129 - 130 - /* Use PE configuration address, if present */ 131 - config_addr = pdn->eeh_config_addr; 132 - if (pdn->eeh_pe_config_addr) 133 - config_addr = pdn->eeh_pe_config_addr; 134 - 135 - rc = rtas_call(ibm_slot_error_detail, 136 - 8, 1, NULL, config_addr, 137 - BUID_HI(pdn->phb->buid), 138 - BUID_LO(pdn->phb->buid), 139 - virt_to_phys(driver_log), loglen, 140 - virt_to_phys(slot_errbuf), 141 - eeh_error_buf_size, 142 - severity); 143 - 144 - if (rc == 0) 145 - log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); 146 - spin_unlock_irqrestore(&slot_errbuf_lock, flags); 147 - } 148 - 149 140 /** 150 - * gather_pci_data - copy assorted PCI config space registers to buff 151 - * @pdn: device to report data for 141 + * eeh_gather_pci_data - Copy assorted PCI config space registers to buff 142 + * @edev: device to report data for 152 143 * @buf: point to buffer in which to log 153 144 * @len: amount of room in buffer 154 145 * 155 146 * This routine captures assorted PCI configuration space data, 156 147 * and puts them into a buffer for RTAS error logging. 157 148 */ 158 - static size_t gather_pci_data(struct pci_dn *pdn, char * buf, size_t len) 149 + static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len) 159 150 { 160 - struct pci_dev *dev = pdn->pcidev; 151 + struct device_node *dn = eeh_dev_to_of_node(edev); 152 + struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 161 153 u32 cfg; 162 154 int cap, i; 163 155 int n = 0; 164 156 165 - n += scnprintf(buf+n, len-n, "%s\n", pdn->node->full_name); 166 - printk(KERN_WARNING "EEH: of node=%s\n", pdn->node->full_name); 157 + n += scnprintf(buf+n, len-n, "%s\n", dn->full_name); 158 + printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name); 167 159 168 - rtas_read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 160 + eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg); 169 161 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 170 162 printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg); 171 163 172 - rtas_read_config(pdn, PCI_COMMAND, 4, &cfg); 164 + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg); 173 165 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 174 166 printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg); 175 167 ··· 156 196 157 197 /* Gather bridge-specific registers */ 158 198 if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { 159 - rtas_read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 199 + eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg); 160 200 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 161 201 printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg); 162 202 163 - rtas_read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 203 + eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg); 164 204 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 165 205 printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg); 166 206 } ··· 168 208 /* Dump out the PCI-X command and status regs */ 169 209 cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); 170 210 if (cap) { 171 - rtas_read_config(pdn, cap, 4, &cfg); 211 + eeh_ops->read_config(dn, cap, 4, &cfg); 172 212 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 173 213 printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg); 174 214 175 - rtas_read_config(pdn, cap+4, 4, &cfg); 215 + eeh_ops->read_config(dn, cap+4, 4, &cfg); 176 216 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 177 217 printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg); 178 218 } ··· 185 225 "EEH: PCI-E capabilities and status follow:\n"); 186 226 187 227 for (i=0; i<=8; i++) { 188 - rtas_read_config(pdn, cap+4*i, 4, &cfg); 228 + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 189 229 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 190 230 printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg); 191 231 } ··· 197 237 "EEH: PCI-E AER capability register set follows:\n"); 198 238 199 239 for (i=0; i<14; i++) { 200 - rtas_read_config(pdn, cap+4*i, 4, &cfg); 240 + eeh_ops->read_config(dn, cap+4*i, 4, &cfg); 201 241 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 202 242 printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg); 203 243 } ··· 206 246 207 247 /* Gather status on devices under the bridge */ 208 248 if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) { 209 - struct device_node *dn; 249 + struct device_node *child; 210 250 211 - for_each_child_of_node(pdn->node, dn) { 212 - pdn = PCI_DN(dn); 213 - if (pdn) 214 - n += gather_pci_data(pdn, buf+n, len-n); 251 + for_each_child_of_node(dn, child) { 252 + if (of_node_to_eeh_dev(child)) 253 + n += eeh_gather_pci_data(of_node_to_eeh_dev(child), buf+n, len-n); 215 254 } 216 255 } 217 256 218 257 return n; 219 258 } 220 259 221 - void eeh_slot_error_detail(struct pci_dn *pdn, int severity) 260 + /** 261 + * eeh_slot_error_detail - Generate combined log including driver log and error log 262 + * @edev: device to report error log for 263 + * @severity: temporary or permanent error log 264 + * 265 + * This routine should be called to generate the combined log, which 266 + * is comprised of driver log and error log. The driver log is figured 267 + * out from the config space of the corresponding PCI device, while 268 + * the error log is fetched through platform dependent function call. 269 + */ 270 + void eeh_slot_error_detail(struct eeh_dev *edev, int severity) 222 271 { 223 272 size_t loglen = 0; 224 273 pci_regs_buf[0] = 0; 225 274 226 - rtas_pci_enable(pdn, EEH_THAW_MMIO); 227 - rtas_configure_bridge(pdn); 228 - eeh_restore_bars(pdn); 229 - loglen = gather_pci_data(pdn, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); 275 + eeh_pci_enable(edev, EEH_OPT_THAW_MMIO); 276 + eeh_ops->configure_bridge(eeh_dev_to_of_node(edev)); 277 + eeh_restore_bars(edev); 278 + loglen = eeh_gather_pci_data(edev, pci_regs_buf, EEH_PCI_REGS_LOG_LEN); 230 279 231 - rtas_slot_error_detail(pdn, severity, pci_regs_buf, loglen); 280 + eeh_ops->get_log(eeh_dev_to_of_node(edev), severity, pci_regs_buf, loglen); 232 281 } 233 282 234 283 /** 235 - * read_slot_reset_state - Read the reset state of a device node's slot 236 - * @dn: device node to read 237 - * @rets: array to return results in 238 - */ 239 - static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) 240 - { 241 - int token, outputs; 242 - int config_addr; 243 - 244 - if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { 245 - token = ibm_read_slot_reset_state2; 246 - outputs = 4; 247 - } else { 248 - token = ibm_read_slot_reset_state; 249 - rets[2] = 0; /* fake PE Unavailable info */ 250 - outputs = 3; 251 - } 252 - 253 - /* Use PE configuration address, if present */ 254 - config_addr = pdn->eeh_config_addr; 255 - if (pdn->eeh_pe_config_addr) 256 - config_addr = pdn->eeh_pe_config_addr; 257 - 258 - return rtas_call(token, 3, outputs, rets, config_addr, 259 - BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); 260 - } 261 - 262 - /** 263 - * eeh_wait_for_slot_status - returns error status of slot 264 - * @pdn pci device node 265 - * @max_wait_msecs maximum number to millisecs to wait 284 + * eeh_token_to_phys - Convert EEH address token to phys address 285 + * @token: I/O token, should be address in the form 0xA.... 266 286 * 267 - * Return negative value if a permanent error, else return 268 - * Partition Endpoint (PE) status value. 269 - * 270 - * If @max_wait_msecs is positive, then this routine will 271 - * sleep until a valid status can be obtained, or until 272 - * the max allowed wait time is exceeded, in which case 273 - * a -2 is returned. 274 - */ 275 - int 276 - eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) 277 - { 278 - int rc; 279 - int rets[3]; 280 - int mwait; 281 - 282 - while (1) { 283 - rc = read_slot_reset_state(pdn, rets); 284 - if (rc) return rc; 285 - if (rets[1] == 0) return -1; /* EEH is not supported */ 286 - 287 - if (rets[0] != 5) return rets[0]; /* return actual status */ 288 - 289 - if (rets[2] == 0) return -1; /* permanently unavailable */ 290 - 291 - if (max_wait_msecs <= 0) break; 292 - 293 - mwait = rets[2]; 294 - if (mwait <= 0) { 295 - printk (KERN_WARNING 296 - "EEH: Firmware returned bad wait value=%d\n", mwait); 297 - mwait = 1000; 298 - } else if (mwait > 300*1000) { 299 - printk (KERN_WARNING 300 - "EEH: Firmware is taking too long, time=%d\n", mwait); 301 - mwait = 300*1000; 302 - } 303 - max_wait_msecs -= mwait; 304 - msleep (mwait); 305 - } 306 - 307 - printk(KERN_WARNING "EEH: Timed out waiting for slot status\n"); 308 - return -2; 309 - } 310 - 311 - /** 312 - * eeh_token_to_phys - convert EEH address token to phys address 313 - * @token i/o token, should be address in the form 0xA.... 287 + * This routine should be called to convert virtual I/O address 288 + * to physical one. 314 289 */ 315 290 static inline unsigned long eeh_token_to_phys(unsigned long token) 316 291 { ··· 260 365 return pa | (token & (PAGE_SIZE-1)); 261 366 } 262 367 263 - /** 264 - * Return the "partitionable endpoint" (pe) under which this device lies 368 + /** 369 + * eeh_find_device_pe - Retrieve the PE for the given device 370 + * @dn: device node 371 + * 372 + * Return the PE under which this device lies 265 373 */ 266 - struct device_node * find_device_pe(struct device_node *dn) 374 + struct device_node *eeh_find_device_pe(struct device_node *dn) 267 375 { 268 - while ((dn->parent) && PCI_DN(dn->parent) && 269 - (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { 376 + while (dn->parent && of_node_to_eeh_dev(dn->parent) && 377 + (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { 270 378 dn = dn->parent; 271 379 } 272 380 return dn; 273 381 } 274 382 275 - /** Mark all devices that are children of this device as failed. 276 - * Mark the device driver too, so that it can see the failure 277 - * immediately; this is critical, since some drivers poll 278 - * status registers in interrupts ... If a driver is polling, 279 - * and the slot is frozen, then the driver can deadlock in 280 - * an interrupt context, which is bad. 383 + /** 384 + * __eeh_mark_slot - Mark all child devices as failed 385 + * @parent: parent device 386 + * @mode_flag: failure flag 387 + * 388 + * Mark all devices that are children of this device as failed. 389 + * Mark the device driver too, so that it can see the failure 390 + * immediately; this is critical, since some drivers poll 391 + * status registers in interrupts ... If a driver is polling, 392 + * and the slot is frozen, then the driver can deadlock in 393 + * an interrupt context, which is bad. 281 394 */ 282 - 283 395 static void __eeh_mark_slot(struct device_node *parent, int mode_flag) 284 396 { 285 397 struct device_node *dn; 286 398 287 399 for_each_child_of_node(parent, dn) { 288 - if (PCI_DN(dn)) { 400 + if (of_node_to_eeh_dev(dn)) { 289 401 /* Mark the pci device driver too */ 290 - struct pci_dev *dev = PCI_DN(dn)->pcidev; 402 + struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; 291 403 292 - PCI_DN(dn)->eeh_mode |= mode_flag; 404 + of_node_to_eeh_dev(dn)->mode |= mode_flag; 293 405 294 406 if (dev && dev->driver) 295 407 dev->error_state = pci_channel_io_frozen; ··· 306 404 } 307 405 } 308 406 309 - void eeh_mark_slot (struct device_node *dn, int mode_flag) 407 + /** 408 + * eeh_mark_slot - Mark the indicated device and its children as failed 409 + * @dn: parent device 410 + * @mode_flag: failure flag 411 + * 412 + * Mark the indicated device and its child devices as failed. 413 + * The device drivers are marked as failed as well. 414 + */ 415 + void eeh_mark_slot(struct device_node *dn, int mode_flag) 310 416 { 311 417 struct pci_dev *dev; 312 - dn = find_device_pe (dn); 418 + dn = eeh_find_device_pe(dn); 313 419 314 420 /* Back up one, since config addrs might be shared */ 315 - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) 421 + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) 316 422 dn = dn->parent; 317 423 318 - PCI_DN(dn)->eeh_mode |= mode_flag; 424 + of_node_to_eeh_dev(dn)->mode |= mode_flag; 319 425 320 426 /* Mark the pci device too */ 321 - dev = PCI_DN(dn)->pcidev; 427 + dev = of_node_to_eeh_dev(dn)->pdev; 322 428 if (dev) 323 429 dev->error_state = pci_channel_io_frozen; 324 430 325 431 __eeh_mark_slot(dn, mode_flag); 326 432 } 327 433 434 + /** 435 + * __eeh_clear_slot - Clear failure flag for the child devices 436 + * @parent: parent device 437 + * @mode_flag: flag to be cleared 438 + * 439 + * Clear failure flag for the child devices. 440 + */ 328 441 static void __eeh_clear_slot(struct device_node *parent, int mode_flag) 329 442 { 330 443 struct device_node *dn; 331 444 332 445 for_each_child_of_node(parent, dn) { 333 - if (PCI_DN(dn)) { 334 - PCI_DN(dn)->eeh_mode &= ~mode_flag; 335 - PCI_DN(dn)->eeh_check_count = 0; 446 + if (of_node_to_eeh_dev(dn)) { 447 + of_node_to_eeh_dev(dn)->mode &= ~mode_flag; 448 + of_node_to_eeh_dev(dn)->check_count = 0; 336 449 __eeh_clear_slot(dn, mode_flag); 337 450 } 338 451 } 339 452 } 340 453 341 - void eeh_clear_slot (struct device_node *dn, int mode_flag) 454 + /** 455 + * eeh_clear_slot - Clear failure flag for the indicated device and its children 456 + * @dn: parent device 457 + * @mode_flag: flag to be cleared 458 + * 459 + * Clear failure flag for the indicated device and its children. 460 + */ 461 + void eeh_clear_slot(struct device_node *dn, int mode_flag) 342 462 { 343 463 unsigned long flags; 344 464 raw_spin_lock_irqsave(&confirm_error_lock, flags); 345 465 346 - dn = find_device_pe (dn); 466 + dn = eeh_find_device_pe(dn); 347 467 348 468 /* Back up one, since config addrs might be shared */ 349 - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) 469 + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) 350 470 dn = dn->parent; 351 471 352 - PCI_DN(dn)->eeh_mode &= ~mode_flag; 353 - PCI_DN(dn)->eeh_check_count = 0; 472 + of_node_to_eeh_dev(dn)->mode &= ~mode_flag; 473 + of_node_to_eeh_dev(dn)->check_count = 0; 354 474 __eeh_clear_slot(dn, mode_flag); 355 475 raw_spin_unlock_irqrestore(&confirm_error_lock, flags); 356 476 } 357 477 358 - void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) 359 - { 360 - struct device_node *dn; 361 - 362 - for_each_child_of_node(parent, dn) { 363 - if (PCI_DN(dn)) { 364 - 365 - struct pci_dev *dev = PCI_DN(dn)->pcidev; 366 - 367 - if (dev && dev->driver) 368 - *freset |= dev->needs_freset; 369 - 370 - __eeh_set_pe_freset(dn, freset); 371 - } 372 - } 373 - } 374 - 375 - void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) 376 - { 377 - struct pci_dev *dev; 378 - dn = find_device_pe(dn); 379 - 380 - /* Back up one, since config addrs might be shared */ 381 - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) 382 - dn = dn->parent; 383 - 384 - dev = PCI_DN(dn)->pcidev; 385 - if (dev) 386 - *freset |= dev->needs_freset; 387 - 388 - __eeh_set_pe_freset(dn, freset); 389 - } 390 - 391 478 /** 392 - * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze 393 - * @dn device node 394 - * @dev pci device, if known 479 + * eeh_dn_check_failure - Check if all 1's data is due to EEH slot freeze 480 + * @dn: device node 481 + * @dev: pci device, if known 395 482 * 396 483 * Check for an EEH failure for the given device node. Call this 397 484 * routine if the result of a read was all 0xff's and you want to ··· 395 504 int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) 396 505 { 397 506 int ret; 398 - int rets[3]; 399 507 unsigned long flags; 400 - struct pci_dn *pdn; 508 + struct eeh_dev *edev; 401 509 int rc = 0; 402 510 const char *location; 403 511 404 - total_mmio_ffs++; 512 + eeh_stats.total_mmio_ffs++; 405 513 406 514 if (!eeh_subsystem_enabled) 407 515 return 0; 408 516 409 517 if (!dn) { 410 - no_dn++; 518 + eeh_stats.no_dn++; 411 519 return 0; 412 520 } 413 - dn = find_device_pe(dn); 414 - pdn = PCI_DN(dn); 521 + dn = eeh_find_device_pe(dn); 522 + edev = of_node_to_eeh_dev(dn); 415 523 416 524 /* Access to IO BARs might get this far and still not want checking. */ 417 - if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || 418 - pdn->eeh_mode & EEH_MODE_NOCHECK) { 419 - ignored_check++; 525 + if (!(edev->mode & EEH_MODE_SUPPORTED) || 526 + edev->mode & EEH_MODE_NOCHECK) { 527 + eeh_stats.ignored_check++; 420 528 pr_debug("EEH: Ignored check (%x) for %s %s\n", 421 - pdn->eeh_mode, eeh_pci_name(dev), dn->full_name); 529 + edev->mode, eeh_pci_name(dev), dn->full_name); 422 530 return 0; 423 531 } 424 532 425 - if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) { 426 - no_cfg_addr++; 533 + if (!edev->config_addr && !edev->pe_config_addr) { 534 + eeh_stats.no_cfg_addr++; 427 535 return 0; 428 536 } 429 537 ··· 434 544 */ 435 545 raw_spin_lock_irqsave(&confirm_error_lock, flags); 436 546 rc = 1; 437 - if (pdn->eeh_mode & EEH_MODE_ISOLATED) { 438 - pdn->eeh_check_count ++; 439 - if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) { 547 + if (edev->mode & EEH_MODE_ISOLATED) { 548 + edev->check_count++; 549 + if (edev->check_count % EEH_MAX_FAILS == 0) { 440 550 location = of_get_property(dn, "ibm,loc-code", NULL); 441 - printk (KERN_ERR "EEH: %d reads ignored for recovering device at " 551 + printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 442 552 "location=%s driver=%s pci addr=%s\n", 443 - pdn->eeh_check_count, location, 553 + edev->check_count, location, 444 554 eeh_driver_name(dev), eeh_pci_name(dev)); 445 - printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n", 555 + printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 446 556 eeh_driver_name(dev)); 447 557 dump_stack(); 448 558 } ··· 456 566 * function zero of a multi-function device. 457 567 * In any case they must share a common PHB. 458 568 */ 459 - ret = read_slot_reset_state(pdn, rets); 460 - 461 - /* If the call to firmware failed, punt */ 462 - if (ret != 0) { 463 - printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n", 464 - ret, dn->full_name); 465 - false_positives++; 466 - pdn->eeh_false_positives ++; 467 - rc = 0; 468 - goto dn_unlock; 469 - } 569 + ret = eeh_ops->get_state(dn, NULL); 470 570 471 571 /* Note that config-io to empty slots may fail; 472 - * they are empty when they don't have children. */ 473 - if ((rets[0] == 5) && (rets[2] == 0) && (dn->child == NULL)) { 474 - false_positives++; 475 - pdn->eeh_false_positives ++; 572 + * they are empty when they don't have children. 573 + * We will punt with the following conditions: Failure to get 574 + * PE's state, EEH not support and Permanently unavailable 575 + * state, PE is in good state. 576 + */ 577 + if ((ret < 0) || 578 + (ret == EEH_STATE_NOT_SUPPORT) || 579 + (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 580 + (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 581 + eeh_stats.false_positives++; 582 + edev->false_positives ++; 476 583 rc = 0; 477 584 goto dn_unlock; 478 585 } 479 586 480 - /* If EEH is not supported on this device, punt. */ 481 - if (rets[1] != 1) { 482 - printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", 483 - ret, dn->full_name); 484 - false_positives++; 485 - pdn->eeh_false_positives ++; 486 - rc = 0; 487 - goto dn_unlock; 488 - } 489 - 490 - /* If not the kind of error we know about, punt. */ 491 - if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { 492 - false_positives++; 493 - pdn->eeh_false_positives ++; 494 - rc = 0; 495 - goto dn_unlock; 496 - } 497 - 498 - slot_resets++; 587 + eeh_stats.slot_resets++; 499 588 500 589 /* Avoid repeated reports of this failure, including problems 501 590 * with other functions on this device, and functions under 502 - * bridges. */ 503 - eeh_mark_slot (dn, EEH_MODE_ISOLATED); 591 + * bridges. 592 + */ 593 + eeh_mark_slot(dn, EEH_MODE_ISOLATED); 504 594 raw_spin_unlock_irqrestore(&confirm_error_lock, flags); 505 595 506 - eeh_send_failure_event (dn, dev); 596 + eeh_send_failure_event(edev); 507 597 508 598 /* Most EEH events are due to device driver bugs. Having 509 599 * a stack trace will help the device-driver authors figure 510 - * out what happened. So print that out. */ 600 + * out what happened. So print that out. 601 + */ 511 602 dump_stack(); 512 603 return 1; 513 604 ··· 500 629 EXPORT_SYMBOL_GPL(eeh_dn_check_failure); 501 630 502 631 /** 503 - * eeh_check_failure - check if all 1's data is due to EEH slot freeze 504 - * @token i/o token, should be address in the form 0xA.... 505 - * @val value, should be all 1's (XXX why do we need this arg??) 632 + * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 633 + * @token: I/O token, should be address in the form 0xA.... 634 + * @val: value, should be all 1's (XXX why do we need this arg??) 506 635 * 507 636 * Check for an EEH failure at the given token address. Call this 508 637 * routine if the result of a read was all 0xff's and you want to ··· 519 648 520 649 /* Finding the phys addr + pci device; this is pretty quick. */ 521 650 addr = eeh_token_to_phys((unsigned long __force) token); 522 - dev = pci_get_device_by_addr(addr); 651 + dev = pci_addr_cache_get_device(addr); 523 652 if (!dev) { 524 - no_device++; 653 + eeh_stats.no_device++; 525 654 return val; 526 655 } 527 656 528 657 dn = pci_device_to_OF_node(dev); 529 - eeh_dn_check_failure (dn, dev); 658 + eeh_dn_check_failure(dn, dev); 530 659 531 660 pci_dev_put(dev); 532 661 return val; ··· 534 663 535 664 EXPORT_SYMBOL(eeh_check_failure); 536 665 537 - /* ------------------------------------------------------------- */ 538 - /* The code below deals with error recovery */ 539 666 540 667 /** 541 - * rtas_pci_enable - enable MMIO or DMA transfers for this slot 542 - * @pdn pci device node 668 + * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 669 + * @edev: pci device node 670 + * 671 + * This routine should be called to reenable frozen MMIO or DMA 672 + * so that it would work correctly again. It's useful while doing 673 + * recovery or log collection on the indicated device. 543 674 */ 544 - 545 - int 546 - rtas_pci_enable(struct pci_dn *pdn, int function) 675 + int eeh_pci_enable(struct eeh_dev *edev, int function) 547 676 { 548 - int config_addr; 549 677 int rc; 678 + struct device_node *dn = eeh_dev_to_of_node(edev); 550 679 551 - /* Use PE configuration address, if present */ 552 - config_addr = pdn->eeh_config_addr; 553 - if (pdn->eeh_pe_config_addr) 554 - config_addr = pdn->eeh_pe_config_addr; 555 - 556 - rc = rtas_call(ibm_set_eeh_option, 4, 1, NULL, 557 - config_addr, 558 - BUID_HI(pdn->phb->buid), 559 - BUID_LO(pdn->phb->buid), 560 - function); 561 - 680 + rc = eeh_ops->set_option(dn, function); 562 681 if (rc) 563 682 printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", 564 - function, rc, pdn->node->full_name); 683 + function, rc, dn->full_name); 565 684 566 - rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC); 567 - if ((rc == 4) && (function == EEH_THAW_MMIO)) 685 + rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); 686 + if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) && 687 + (function == EEH_OPT_THAW_MMIO)) 568 688 return 0; 569 689 570 690 return rc; 571 691 } 572 692 573 693 /** 574 - * rtas_pci_slot_reset - raises/lowers the pci #RST line 575 - * @pdn pci device node 576 - * @state: 1/0 to raise/lower the #RST 577 - * 578 - * Clear the EEH-frozen condition on a slot. This routine 579 - * asserts the PCI #RST line if the 'state' argument is '1', 580 - * and drops the #RST line if 'state is '0'. This routine is 581 - * safe to call in an interrupt context. 582 - * 583 - */ 584 - 585 - static void 586 - rtas_pci_slot_reset(struct pci_dn *pdn, int state) 587 - { 588 - int config_addr; 589 - int rc; 590 - 591 - BUG_ON (pdn==NULL); 592 - 593 - if (!pdn->phb) { 594 - printk (KERN_WARNING "EEH: in slot reset, device node %s has no phb\n", 595 - pdn->node->full_name); 596 - return; 597 - } 598 - 599 - /* Use PE configuration address, if present */ 600 - config_addr = pdn->eeh_config_addr; 601 - if (pdn->eeh_pe_config_addr) 602 - config_addr = pdn->eeh_pe_config_addr; 603 - 604 - rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, 605 - config_addr, 606 - BUID_HI(pdn->phb->buid), 607 - BUID_LO(pdn->phb->buid), 608 - state); 609 - 610 - /* Fundamental-reset not supported on this PE, try hot-reset */ 611 - if (rc == -8 && state == 3) { 612 - rc = rtas_call(ibm_set_slot_reset, 4, 1, NULL, 613 - config_addr, 614 - BUID_HI(pdn->phb->buid), 615 - BUID_LO(pdn->phb->buid), 1); 616 - if (rc) 617 - printk(KERN_WARNING 618 - "EEH: Unable to reset the failed slot," 619 - " #RST=%d dn=%s\n", 620 - rc, pdn->node->full_name); 621 - } 622 - } 623 - 624 - /** 625 694 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 626 - * @dev: pci device struct 627 - * @state: reset state to enter 695 + * @dev: pci device struct 696 + * @state: reset state to enter 628 697 * 629 698 * Return value: 630 699 * 0 if success 631 - **/ 700 + */ 632 701 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 633 702 { 634 703 struct device_node *dn = pci_device_to_OF_node(dev); 635 - struct pci_dn *pdn = PCI_DN(dn); 636 704 637 705 switch (state) { 638 706 case pcie_deassert_reset: 639 - rtas_pci_slot_reset(pdn, 0); 707 + eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); 640 708 break; 641 709 case pcie_hot_reset: 642 - rtas_pci_slot_reset(pdn, 1); 710 + eeh_ops->reset(dn, EEH_RESET_HOT); 643 711 break; 644 712 case pcie_warm_reset: 645 - rtas_pci_slot_reset(pdn, 3); 713 + eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); 646 714 break; 647 715 default: 648 716 return -EINVAL; ··· 591 781 } 592 782 593 783 /** 594 - * rtas_set_slot_reset -- assert the pci #RST line for 1/4 second 595 - * @pdn: pci device node to be reset. 784 + * __eeh_set_pe_freset - Check the required reset for child devices 785 + * @parent: parent device 786 + * @freset: return value 787 + * 788 + * Each device might have its preferred reset type: fundamental or 789 + * hot reset. The routine is used to collect the information from 790 + * the child devices so that they could be reset accordingly. 596 791 */ 792 + void __eeh_set_pe_freset(struct device_node *parent, unsigned int *freset) 793 + { 794 + struct device_node *dn; 597 795 598 - static void __rtas_set_slot_reset(struct pci_dn *pdn) 796 + for_each_child_of_node(parent, dn) { 797 + if (of_node_to_eeh_dev(dn)) { 798 + struct pci_dev *dev = of_node_to_eeh_dev(dn)->pdev; 799 + 800 + if (dev && dev->driver) 801 + *freset |= dev->needs_freset; 802 + 803 + __eeh_set_pe_freset(dn, freset); 804 + } 805 + } 806 + } 807 + 808 + /** 809 + * eeh_set_pe_freset - Check the required reset for the indicated device and its children 810 + * @dn: parent device 811 + * @freset: return value 812 + * 813 + * Each device might have its preferred reset type: fundamental or 814 + * hot reset. The routine is used to collected the information for 815 + * the indicated device and its children so that the bunch of the 816 + * devices could be reset properly. 817 + */ 818 + void eeh_set_pe_freset(struct device_node *dn, unsigned int *freset) 819 + { 820 + struct pci_dev *dev; 821 + dn = eeh_find_device_pe(dn); 822 + 823 + /* Back up one, since config addrs might be shared */ 824 + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) 825 + dn = dn->parent; 826 + 827 + dev = of_node_to_eeh_dev(dn)->pdev; 828 + if (dev) 829 + *freset |= dev->needs_freset; 830 + 831 + __eeh_set_pe_freset(dn, freset); 832 + } 833 + 834 + /** 835 + * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 836 + * @edev: pci device node to be reset. 837 + * 838 + * Assert the PCI #RST line for 1/4 second. 839 + */ 840 + static void eeh_reset_pe_once(struct eeh_dev *edev) 599 841 { 600 842 unsigned int freset = 0; 843 + struct device_node *dn = eeh_dev_to_of_node(edev); 601 844 602 845 /* Determine type of EEH reset required for 603 846 * Partitionable Endpoint, a hot-reset (1) ··· 658 795 * A fundamental reset required by any device under 659 796 * Partitionable Endpoint trumps hot-reset. 660 797 */ 661 - eeh_set_pe_freset(pdn->node, &freset); 798 + eeh_set_pe_freset(dn, &freset); 662 799 663 800 if (freset) 664 - rtas_pci_slot_reset(pdn, 3); 801 + eeh_ops->reset(dn, EEH_RESET_FUNDAMENTAL); 665 802 else 666 - rtas_pci_slot_reset(pdn, 1); 803 + eeh_ops->reset(dn, EEH_RESET_HOT); 667 804 668 805 /* The PCI bus requires that the reset be held high for at least 669 - * a 100 milliseconds. We wait a bit longer 'just in case'. */ 670 - 806 + * a 100 milliseconds. We wait a bit longer 'just in case'. 807 + */ 671 808 #define PCI_BUS_RST_HOLD_TIME_MSEC 250 672 - msleep (PCI_BUS_RST_HOLD_TIME_MSEC); 809 + msleep(PCI_BUS_RST_HOLD_TIME_MSEC); 673 810 674 811 /* We might get hit with another EEH freeze as soon as the 675 812 * pci slot reset line is dropped. Make sure we don't miss 676 - * these, and clear the flag now. */ 677 - eeh_clear_slot (pdn->node, EEH_MODE_ISOLATED); 813 + * these, and clear the flag now. 814 + */ 815 + eeh_clear_slot(dn, EEH_MODE_ISOLATED); 678 816 679 - rtas_pci_slot_reset (pdn, 0); 817 + eeh_ops->reset(dn, EEH_RESET_DEACTIVATE); 680 818 681 819 /* After a PCI slot has been reset, the PCI Express spec requires 682 820 * a 1.5 second idle time for the bus to stabilize, before starting 683 - * up traffic. */ 821 + * up traffic. 822 + */ 684 823 #define PCI_BUS_SETTLE_TIME_MSEC 1800 685 - msleep (PCI_BUS_SETTLE_TIME_MSEC); 824 + msleep(PCI_BUS_SETTLE_TIME_MSEC); 686 825 } 687 826 688 - int rtas_set_slot_reset(struct pci_dn *pdn) 827 + /** 828 + * eeh_reset_pe - Reset the indicated PE 829 + * @edev: PCI device associated EEH device 830 + * 831 + * This routine should be called to reset indicated device, including 832 + * PE. A PE might include multiple PCI devices and sometimes PCI bridges 833 + * might be involved as well. 834 + */ 835 + int eeh_reset_pe(struct eeh_dev *edev) 689 836 { 690 837 int i, rc; 838 + struct device_node *dn = eeh_dev_to_of_node(edev); 691 839 692 840 /* Take three shots at resetting the bus */ 693 841 for (i=0; i<3; i++) { 694 - __rtas_set_slot_reset(pdn); 842 + eeh_reset_pe_once(edev); 695 843 696 - rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); 697 - if (rc == 0) 844 + rc = eeh_ops->wait_state(dn, PCI_BUS_RESET_WAIT_MSEC); 845 + if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) 698 846 return 0; 699 847 700 848 if (rc < 0) { 701 849 printk(KERN_ERR "EEH: unrecoverable slot failure %s\n", 702 - pdn->node->full_name); 850 + dn->full_name); 703 851 return -1; 704 852 } 705 853 printk(KERN_ERR "EEH: bus reset %d failed on slot %s, rc=%d\n", 706 - i+1, pdn->node->full_name, rc); 854 + i+1, dn->full_name, rc); 707 855 } 708 856 709 857 return -1; 710 858 } 711 859 712 - /* ------------------------------------------------------- */ 713 860 /** Save and restore of PCI BARs 714 861 * 715 862 * Although firmware will set up BARs during boot, it doesn't ··· 729 856 */ 730 857 731 858 /** 732 - * __restore_bars - Restore the Base Address Registers 733 - * @pdn: pci device node 859 + * eeh_restore_one_device_bars - Restore the Base Address Registers for one device 860 + * @edev: PCI device associated EEH device 734 861 * 735 862 * Loads the PCI configuration space base address registers, 736 863 * the expansion ROM base address, the latency timer, and etc. 737 864 * from the saved values in the device node. 738 865 */ 739 - static inline void __restore_bars (struct pci_dn *pdn) 866 + static inline void eeh_restore_one_device_bars(struct eeh_dev *edev) 740 867 { 741 868 int i; 742 869 u32 cmd; 870 + struct device_node *dn = eeh_dev_to_of_node(edev); 743 871 744 - if (NULL==pdn->phb) return; 872 + if (!edev->phb) 873 + return; 874 + 745 875 for (i=4; i<10; i++) { 746 - rtas_write_config(pdn, i*4, 4, pdn->config_space[i]); 876 + eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]); 747 877 } 748 878 749 879 /* 12 == Expansion ROM Address */ 750 - rtas_write_config(pdn, 12*4, 4, pdn->config_space[12]); 880 + eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]); 751 881 752 882 #define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF)) 753 - #define SAVED_BYTE(OFF) (((u8 *)(pdn->config_space))[BYTE_SWAP(OFF)]) 883 + #define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)]) 754 884 755 - rtas_write_config (pdn, PCI_CACHE_LINE_SIZE, 1, 885 + eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1, 756 886 SAVED_BYTE(PCI_CACHE_LINE_SIZE)); 757 887 758 - rtas_write_config (pdn, PCI_LATENCY_TIMER, 1, 888 + eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1, 759 889 SAVED_BYTE(PCI_LATENCY_TIMER)); 760 890 761 891 /* max latency, min grant, interrupt pin and line */ 762 - rtas_write_config(pdn, 15*4, 4, pdn->config_space[15]); 892 + eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]); 763 893 764 894 /* Restore PERR & SERR bits, some devices require it, 765 - don't touch the other command bits */ 766 - rtas_read_config(pdn, PCI_COMMAND, 4, &cmd); 767 - if (pdn->config_space[1] & PCI_COMMAND_PARITY) 895 + * don't touch the other command bits 896 + */ 897 + eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd); 898 + if (edev->config_space[1] & PCI_COMMAND_PARITY) 768 899 cmd |= PCI_COMMAND_PARITY; 769 900 else 770 901 cmd &= ~PCI_COMMAND_PARITY; 771 - if (pdn->config_space[1] & PCI_COMMAND_SERR) 902 + if (edev->config_space[1] & PCI_COMMAND_SERR) 772 903 cmd |= PCI_COMMAND_SERR; 773 904 else 774 905 cmd &= ~PCI_COMMAND_SERR; 775 - rtas_write_config(pdn, PCI_COMMAND, 4, cmd); 906 + eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd); 776 907 } 777 908 778 909 /** 779 - * eeh_restore_bars - restore the PCI config space info 910 + * eeh_restore_bars - Restore the PCI config space info 911 + * @edev: EEH device 780 912 * 781 913 * This routine performs a recursive walk to the children 782 914 * of this device as well. 783 915 */ 784 - void eeh_restore_bars(struct pci_dn *pdn) 916 + void eeh_restore_bars(struct eeh_dev *edev) 785 917 { 786 918 struct device_node *dn; 787 - if (!pdn) 919 + if (!edev) 788 920 return; 789 921 790 - if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code)) 791 - __restore_bars (pdn); 922 + if ((edev->mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(edev->class_code)) 923 + eeh_restore_one_device_bars(edev); 792 924 793 - for_each_child_of_node(pdn->node, dn) 794 - eeh_restore_bars (PCI_DN(dn)); 925 + for_each_child_of_node(eeh_dev_to_of_node(edev), dn) 926 + eeh_restore_bars(of_node_to_eeh_dev(dn)); 795 927 } 796 928 797 929 /** 798 - * eeh_save_bars - save device bars 930 + * eeh_save_bars - Save device bars 931 + * @edev: PCI device associated EEH device 799 932 * 800 933 * Save the values of the device bars. Unlike the restore 801 934 * routine, this routine is *not* recursive. This is because 802 935 * PCI devices are added individually; but, for the restore, 803 936 * an entire slot is reset at a time. 804 937 */ 805 - static void eeh_save_bars(struct pci_dn *pdn) 938 + static void eeh_save_bars(struct eeh_dev *edev) 806 939 { 807 940 int i; 941 + struct device_node *dn; 808 942 809 - if (!pdn ) 943 + if (!edev) 810 944 return; 945 + dn = eeh_dev_to_of_node(edev); 811 946 812 947 for (i = 0; i < 16; i++) 813 - rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]); 948 + eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]); 814 949 } 815 950 816 - void 817 - rtas_configure_bridge(struct pci_dn *pdn) 818 - { 819 - int config_addr; 820 - int rc; 821 - int token; 822 - 823 - /* Use PE configuration address, if present */ 824 - config_addr = pdn->eeh_config_addr; 825 - if (pdn->eeh_pe_config_addr) 826 - config_addr = pdn->eeh_pe_config_addr; 827 - 828 - /* Use new configure-pe function, if supported */ 829 - if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) 830 - token = ibm_configure_pe; 831 - else 832 - token = ibm_configure_bridge; 833 - 834 - rc = rtas_call(token, 3, 1, NULL, 835 - config_addr, 836 - BUID_HI(pdn->phb->buid), 837 - BUID_LO(pdn->phb->buid)); 838 - if (rc) { 839 - printk (KERN_WARNING "EEH: Unable to configure device bridge (%d) for %s\n", 840 - rc, pdn->node->full_name); 841 - } 842 - } 843 - 844 - /* ------------------------------------------------------------- */ 845 - /* The code below deals with enabling EEH for devices during the 846 - * early boot sequence. EEH must be enabled before any PCI probing 847 - * can be done. 951 + /** 952 + * eeh_early_enable - Early enable EEH on the indicated device 953 + * @dn: device node 954 + * @data: BUID 955 + * 956 + * Enable EEH functionality on the specified PCI device. The function 957 + * is expected to be called before real PCI probing is done. However, 958 + * the PHBs have been initialized at this point. 848 959 */ 849 - 850 - #define EEH_ENABLE 1 851 - 852 - struct eeh_early_enable_info { 853 - unsigned int buid_hi; 854 - unsigned int buid_lo; 855 - }; 856 - 857 - static int get_pe_addr (int config_addr, 858 - struct eeh_early_enable_info *info) 960 + static void *eeh_early_enable(struct device_node *dn, void *data) 859 961 { 860 - unsigned int rets[3]; 861 - int ret; 862 - 863 - /* Use latest config-addr token on power6 */ 864 - if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { 865 - /* Make sure we have a PE in hand */ 866 - ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, 867 - config_addr, info->buid_hi, info->buid_lo, 1); 868 - if (ret || (rets[0]==0)) 869 - return 0; 870 - 871 - ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, 872 - config_addr, info->buid_hi, info->buid_lo, 0); 873 - if (ret) 874 - return 0; 875 - return rets[0]; 876 - } 877 - 878 - /* Use older config-addr token on power5 */ 879 - if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { 880 - ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, 881 - config_addr, info->buid_hi, info->buid_lo, 0); 882 - if (ret) 883 - return 0; 884 - return rets[0]; 885 - } 886 - return 0; 887 - } 888 - 889 - /* Enable eeh for the given device node. */ 890 - static void *early_enable_eeh(struct device_node *dn, void *data) 891 - { 892 - unsigned int rets[3]; 893 - struct eeh_early_enable_info *info = data; 894 962 int ret; 895 963 const u32 *class_code = of_get_property(dn, "class-code", NULL); 896 964 const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL); 897 965 const u32 *device_id = of_get_property(dn, "device-id", NULL); 898 966 const u32 *regs; 899 967 int enable; 900 - struct pci_dn *pdn = PCI_DN(dn); 968 + struct eeh_dev *edev = of_node_to_eeh_dev(dn); 901 969 902 - pdn->class_code = 0; 903 - pdn->eeh_mode = 0; 904 - pdn->eeh_check_count = 0; 905 - pdn->eeh_freeze_count = 0; 906 - pdn->eeh_false_positives = 0; 970 + edev->class_code = 0; 971 + edev->mode = 0; 972 + edev->check_count = 0; 973 + edev->freeze_count = 0; 974 + edev->false_positives = 0; 907 975 908 976 if (!of_device_is_available(dn)) 909 977 return NULL; ··· 855 1041 856 1042 /* There is nothing to check on PCI to ISA bridges */ 857 1043 if (dn->type && !strcmp(dn->type, "isa")) { 858 - pdn->eeh_mode |= EEH_MODE_NOCHECK; 1044 + edev->mode |= EEH_MODE_NOCHECK; 859 1045 return NULL; 860 1046 } 861 - pdn->class_code = *class_code; 1047 + edev->class_code = *class_code; 862 1048 863 1049 /* Ok... see if this device supports EEH. Some do, some don't, 864 - * and the only way to find out is to check each and every one. */ 1050 + * and the only way to find out is to check each and every one. 1051 + */ 865 1052 regs = of_get_property(dn, "reg", NULL); 866 1053 if (regs) { 867 1054 /* First register entry is addr (00BBSS00) */ 868 1055 /* Try to enable eeh */ 869 - ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, 870 - regs[0], info->buid_hi, info->buid_lo, 871 - EEH_ENABLE); 1056 + ret = eeh_ops->set_option(dn, EEH_OPT_ENABLE); 872 1057 873 1058 enable = 0; 874 1059 if (ret == 0) { 875 - pdn->eeh_config_addr = regs[0]; 1060 + edev->config_addr = regs[0]; 876 1061 877 1062 /* If the newer, better, ibm,get-config-addr-info is supported, 878 - * then use that instead. */ 879 - pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info); 1063 + * then use that instead. 1064 + */ 1065 + edev->pe_config_addr = eeh_ops->get_pe_addr(dn); 880 1066 881 1067 /* Some older systems (Power4) allow the 882 1068 * ibm,set-eeh-option call to succeed even on nodes 883 1069 * where EEH is not supported. Verify support 884 - * explicitly. */ 885 - ret = read_slot_reset_state(pdn, rets); 886 - if ((ret == 0) && (rets[1] == 1)) 1070 + * explicitly. 1071 + */ 1072 + ret = eeh_ops->get_state(dn, NULL); 1073 + if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT) 887 1074 enable = 1; 888 1075 } 889 1076 890 1077 if (enable) { 891 1078 eeh_subsystem_enabled = 1; 892 - pdn->eeh_mode |= EEH_MODE_SUPPORTED; 1079 + edev->mode |= EEH_MODE_SUPPORTED; 893 1080 894 1081 pr_debug("EEH: %s: eeh enabled, config=%x pe_config=%x\n", 895 - dn->full_name, pdn->eeh_config_addr, 896 - pdn->eeh_pe_config_addr); 1082 + dn->full_name, edev->config_addr, 1083 + edev->pe_config_addr); 897 1084 } else { 898 1085 899 1086 /* This device doesn't support EEH, but it may have an 900 - * EEH parent, in which case we mark it as supported. */ 901 - if (dn->parent && PCI_DN(dn->parent) 902 - && (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) { 1087 + * EEH parent, in which case we mark it as supported. 1088 + */ 1089 + if (dn->parent && of_node_to_eeh_dev(dn->parent) && 1090 + (of_node_to_eeh_dev(dn->parent)->mode & EEH_MODE_SUPPORTED)) { 903 1091 /* Parent supports EEH. */ 904 - pdn->eeh_mode |= EEH_MODE_SUPPORTED; 905 - pdn->eeh_config_addr = PCI_DN(dn->parent)->eeh_config_addr; 1092 + edev->mode |= EEH_MODE_SUPPORTED; 1093 + edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; 906 1094 return NULL; 907 1095 } 908 1096 } ··· 913 1097 dn->full_name); 914 1098 } 915 1099 916 - eeh_save_bars(pdn); 1100 + eeh_save_bars(edev); 917 1101 return NULL; 918 1102 } 919 1103 920 - /* 1104 + /** 1105 + * eeh_ops_register - Register platform dependent EEH operations 1106 + * @ops: platform dependent EEH operations 1107 + * 1108 + * Register the platform dependent EEH operation callback 1109 + * functions. The platform should call this function before 1110 + * any other EEH operations. 1111 + */ 1112 + int __init eeh_ops_register(struct eeh_ops *ops) 1113 + { 1114 + if (!ops->name) { 1115 + pr_warning("%s: Invalid EEH ops name for %p\n", 1116 + __func__, ops); 1117 + return -EINVAL; 1118 + } 1119 + 1120 + if (eeh_ops && eeh_ops != ops) { 1121 + pr_warning("%s: EEH ops of platform %s already existing (%s)\n", 1122 + __func__, eeh_ops->name, ops->name); 1123 + return -EEXIST; 1124 + } 1125 + 1126 + eeh_ops = ops; 1127 + 1128 + return 0; 1129 + } 1130 + 1131 + /** 1132 + * eeh_ops_unregister - Unreigster platform dependent EEH operations 1133 + * @name: name of EEH platform operations 1134 + * 1135 + * Unregister the platform dependent EEH operation callback 1136 + * functions. 1137 + */ 1138 + int __exit eeh_ops_unregister(const char *name) 1139 + { 1140 + if (!name || !strlen(name)) { 1141 + pr_warning("%s: Invalid EEH ops name\n", 1142 + __func__); 1143 + return -EINVAL; 1144 + } 1145 + 1146 + if (eeh_ops && !strcmp(eeh_ops->name, name)) { 1147 + eeh_ops = NULL; 1148 + return 0; 1149 + } 1150 + 1151 + return -EEXIST; 1152 + } 1153 + 1154 + /** 1155 + * eeh_init - EEH initialization 1156 + * 921 1157 * Initialize EEH by trying to enable it for all of the adapters in the system. 922 1158 * As a side effect we can determine here if eeh is supported at all. 923 1159 * Note that we leave EEH on so failed config cycles won't cause a machine ··· 985 1117 void __init eeh_init(void) 986 1118 { 987 1119 struct device_node *phb, *np; 988 - struct eeh_early_enable_info info; 1120 + int ret; 1121 + 1122 + /* call platform initialization function */ 1123 + if (!eeh_ops) { 1124 + pr_warning("%s: Platform EEH operation not found\n", 1125 + __func__); 1126 + return; 1127 + } else if ((ret = eeh_ops->init())) { 1128 + pr_warning("%s: Failed to call platform init function (%d)\n", 1129 + __func__, ret); 1130 + return; 1131 + } 989 1132 990 1133 raw_spin_lock_init(&confirm_error_lock); 991 - spin_lock_init(&slot_errbuf_lock); 992 1134 993 1135 np = of_find_node_by_path("/rtas"); 994 1136 if (np == NULL) 995 1137 return; 996 - 997 - ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); 998 - ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); 999 - ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); 1000 - ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); 1001 - ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); 1002 - ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); 1003 - ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); 1004 - ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); 1005 - ibm_configure_pe = rtas_token("ibm,configure-pe"); 1006 - 1007 - if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) 1008 - return; 1009 - 1010 - eeh_error_buf_size = rtas_token("rtas-error-log-max"); 1011 - if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { 1012 - eeh_error_buf_size = 1024; 1013 - } 1014 - if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { 1015 - printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated " 1016 - "buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX); 1017 - eeh_error_buf_size = RTAS_ERROR_LOG_MAX; 1018 - } 1019 1138 1020 1139 /* Enable EEH for all adapters. Note that eeh requires buid's */ 1021 1140 for (phb = of_find_node_by_name(NULL, "pci"); phb; ··· 1010 1155 unsigned long buid; 1011 1156 1012 1157 buid = get_phb_buid(phb); 1013 - if (buid == 0 || PCI_DN(phb) == NULL) 1158 + if (buid == 0 || !of_node_to_eeh_dev(phb)) 1014 1159 continue; 1015 1160 1016 - info.buid_lo = BUID_LO(buid); 1017 - info.buid_hi = BUID_HI(buid); 1018 - traverse_pci_devices(phb, early_enable_eeh, &info); 1161 + traverse_pci_devices(phb, eeh_early_enable, NULL); 1019 1162 } 1020 1163 1021 1164 if (eeh_subsystem_enabled) ··· 1023 1170 } 1024 1171 1025 1172 /** 1026 - * eeh_add_device_early - enable EEH for the indicated device_node 1173 + * eeh_add_device_early - Enable EEH for the indicated device_node 1027 1174 * @dn: device node for which to set up EEH 1028 1175 * 1029 1176 * This routine must be used to perform EEH initialization for PCI ··· 1037 1184 static void eeh_add_device_early(struct device_node *dn) 1038 1185 { 1039 1186 struct pci_controller *phb; 1040 - struct eeh_early_enable_info info; 1041 1187 1042 - if (!dn || !PCI_DN(dn)) 1188 + if (!dn || !of_node_to_eeh_dev(dn)) 1043 1189 return; 1044 - phb = PCI_DN(dn)->phb; 1190 + phb = of_node_to_eeh_dev(dn)->phb; 1045 1191 1046 1192 /* USB Bus children of PCI devices will not have BUID's */ 1047 1193 if (NULL == phb || 0 == phb->buid) 1048 1194 return; 1049 1195 1050 - info.buid_hi = BUID_HI(phb->buid); 1051 - info.buid_lo = BUID_LO(phb->buid); 1052 - early_enable_eeh(dn, &info); 1196 + eeh_early_enable(dn, NULL); 1053 1197 } 1054 1198 1199 + /** 1200 + * eeh_add_device_tree_early - Enable EEH for the indicated device 1201 + * @dn: device node 1202 + * 1203 + * This routine must be used to perform EEH initialization for the 1204 + * indicated PCI device that was added after system boot (e.g. 1205 + * hotplug, dlpar). 1206 + */ 1055 1207 void eeh_add_device_tree_early(struct device_node *dn) 1056 1208 { 1057 1209 struct device_node *sib; ··· 1068 1210 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1069 1211 1070 1212 /** 1071 - * eeh_add_device_late - perform EEH initialization for the indicated pci device 1213 + * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1072 1214 * @dev: pci device for which to set up EEH 1073 1215 * 1074 1216 * This routine must be used to complete EEH initialization for PCI ··· 1077 1219 static void eeh_add_device_late(struct pci_dev *dev) 1078 1220 { 1079 1221 struct device_node *dn; 1080 - struct pci_dn *pdn; 1222 + struct eeh_dev *edev; 1081 1223 1082 1224 if (!dev || !eeh_subsystem_enabled) 1083 1225 return; ··· 1085 1227 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1086 1228 1087 1229 dn = pci_device_to_OF_node(dev); 1088 - pdn = PCI_DN(dn); 1089 - if (pdn->pcidev == dev) { 1230 + edev = pci_dev_to_eeh_dev(dev); 1231 + if (edev->pdev == dev) { 1090 1232 pr_debug("EEH: Already referenced !\n"); 1091 1233 return; 1092 1234 } 1093 - WARN_ON(pdn->pcidev); 1235 + WARN_ON(edev->pdev); 1094 1236 1095 - pci_dev_get (dev); 1096 - pdn->pcidev = dev; 1237 + pci_dev_get(dev); 1238 + edev->pdev = dev; 1239 + dev->dev.archdata.edev = edev; 1097 1240 1098 1241 pci_addr_cache_insert_device(dev); 1099 1242 eeh_sysfs_add_device(dev); 1100 1243 } 1101 1244 1245 + /** 1246 + * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1247 + * @bus: PCI bus 1248 + * 1249 + * This routine must be used to perform EEH initialization for PCI 1250 + * devices which are attached to the indicated PCI bus. The PCI bus 1251 + * is added after system boot through hotplug or dlpar. 1252 + */ 1102 1253 void eeh_add_device_tree_late(struct pci_bus *bus) 1103 1254 { 1104 1255 struct pci_dev *dev; ··· 1124 1257 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1125 1258 1126 1259 /** 1127 - * eeh_remove_device - undo EEH setup for the indicated pci device 1260 + * eeh_remove_device - Undo EEH setup for the indicated pci device 1128 1261 * @dev: pci device to be removed 1129 1262 * 1130 1263 * This routine should be called when a device is removed from ··· 1135 1268 */ 1136 1269 static void eeh_remove_device(struct pci_dev *dev) 1137 1270 { 1138 - struct device_node *dn; 1271 + struct eeh_dev *edev; 1272 + 1139 1273 if (!dev || !eeh_subsystem_enabled) 1140 1274 return; 1275 + edev = pci_dev_to_eeh_dev(dev); 1141 1276 1142 1277 /* Unregister the device with the EEH/PCI address search system */ 1143 1278 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1144 1279 1145 - dn = pci_device_to_OF_node(dev); 1146 - if (PCI_DN(dn)->pcidev == NULL) { 1280 + if (!edev || !edev->pdev) { 1147 1281 pr_debug("EEH: Not referenced !\n"); 1148 1282 return; 1149 1283 } 1150 - PCI_DN(dn)->pcidev = NULL; 1151 - pci_dev_put (dev); 1284 + edev->pdev = NULL; 1285 + dev->dev.archdata.edev = NULL; 1286 + pci_dev_put(dev); 1152 1287 1153 1288 pci_addr_cache_remove_device(dev); 1154 1289 eeh_sysfs_remove_device(dev); 1155 1290 } 1156 1291 1292 + /** 1293 + * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device 1294 + * @dev: PCI device 1295 + * 1296 + * This routine must be called when a device is removed from the 1297 + * running system through hotplug or dlpar. The corresponding 1298 + * PCI address cache will be removed. 1299 + */ 1157 1300 void eeh_remove_bus_device(struct pci_dev *dev) 1158 1301 { 1159 1302 struct pci_bus *bus = dev->subordinate; ··· 1182 1305 { 1183 1306 if (0 == eeh_subsystem_enabled) { 1184 1307 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1185 - seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs); 1308 + seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1186 1309 } else { 1187 1310 seq_printf(m, "EEH Subsystem is enabled\n"); 1188 1311 seq_printf(m, 1189 - "no device=%ld\n" 1190 - "no device node=%ld\n" 1191 - "no config address=%ld\n" 1192 - "check not wanted=%ld\n" 1193 - "eeh_total_mmio_ffs=%ld\n" 1194 - "eeh_false_positives=%ld\n" 1195 - "eeh_slot_resets=%ld\n", 1196 - no_device, no_dn, no_cfg_addr, 1197 - ignored_check, total_mmio_ffs, 1198 - false_positives, 1199 - slot_resets); 1312 + "no device=%llu\n" 1313 + "no device node=%llu\n" 1314 + "no config address=%llu\n" 1315 + "check not wanted=%llu\n" 1316 + "eeh_total_mmio_ffs=%llu\n" 1317 + "eeh_false_positives=%llu\n" 1318 + "eeh_slot_resets=%llu\n", 1319 + eeh_stats.no_device, 1320 + eeh_stats.no_dn, 1321 + eeh_stats.no_cfg_addr, 1322 + eeh_stats.ignored_check, 1323 + eeh_stats.total_mmio_ffs, 1324 + eeh_stats.false_positives, 1325 + eeh_stats.slot_resets); 1200 1326 } 1201 1327 1202 1328 return 0;

+27 -17

arch/powerpc/platforms/pseries/eeh_cache.c

··· 1 1 /* 2 - * eeh_cache.c 3 2 * PCI address cache; allows the lookup of PCI devices based on I/O address 4 3 * 5 4 * Copyright IBM Corporation 2004 ··· 46 47 * than any hash algo I could think of for this problem, even 47 48 * with the penalty of slow pointer chases for d-cache misses). 48 49 */ 49 - struct pci_io_addr_range 50 - { 50 + struct pci_io_addr_range { 51 51 struct rb_node rb_node; 52 52 unsigned long addr_lo; 53 53 unsigned long addr_hi; ··· 54 56 unsigned int flags; 55 57 }; 56 58 57 - static struct pci_io_addr_cache 58 - { 59 + static struct pci_io_addr_cache { 59 60 struct rb_root rb_root; 60 61 spinlock_t piar_lock; 61 62 } pci_io_addr_cache_root; 62 63 63 - static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr) 64 + static inline struct pci_dev *__pci_addr_cache_get_device(unsigned long addr) 64 65 { 65 66 struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node; 66 67 ··· 83 86 } 84 87 85 88 /** 86 - * pci_get_device_by_addr - Get device, given only address 89 + * pci_addr_cache_get_device - Get device, given only address 87 90 * @addr: mmio (PIO) phys address or i/o port number 88 91 * 89 92 * Given an mmio phys address, or a port number, find a pci device ··· 92 95 * from zero (that is, they do *not* have pci_io_addr added in). 93 96 * It is safe to call this function within an interrupt. 94 97 */ 95 - struct pci_dev *pci_get_device_by_addr(unsigned long addr) 98 + struct pci_dev *pci_addr_cache_get_device(unsigned long addr) 96 99 { 97 100 struct pci_dev *dev; 98 101 unsigned long flags; 99 102 100 103 spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags); 101 - dev = __pci_get_device_by_addr(addr); 104 + dev = __pci_addr_cache_get_device(addr); 102 105 spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags); 103 106 return dev; 104 107 } ··· 163 166 164 167 #ifdef DEBUG 165 168 printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n", 166 - alo, ahi, pci_name (dev)); 169 + alo, ahi, pci_name(dev)); 167 170 #endif 168 171 169 172 rb_link_node(&piar->rb_node, parent, p); ··· 175 178 static void __pci_addr_cache_insert_device(struct pci_dev *dev) 176 179 { 177 180 struct device_node *dn; 178 - struct pci_dn *pdn; 181 + struct eeh_dev *edev; 179 182 int i; 180 183 181 184 dn = pci_device_to_OF_node(dev); ··· 184 187 return; 185 188 } 186 189 190 + edev = of_node_to_eeh_dev(dn); 191 + if (!edev) { 192 + pr_warning("PCI: no EEH dev found for dn=%s\n", 193 + dn->full_name); 194 + return; 195 + } 196 + 187 197 /* Skip any devices for which EEH is not enabled. */ 188 - pdn = PCI_DN(dn); 189 - if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) || 190 - pdn->eeh_mode & EEH_MODE_NOCHECK) { 198 + if (!(edev->mode & EEH_MODE_SUPPORTED) || 199 + edev->mode & EEH_MODE_NOCHECK) { 191 200 #ifdef DEBUG 192 - printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n", 193 - pci_name(dev), pdn->node->full_name); 201 + pr_info("PCI: skip building address cache for=%s - %s\n", 202 + pci_name(dev), dn->full_name); 194 203 #endif 195 204 return; 196 205 } ··· 287 284 void __init pci_addr_cache_build(void) 288 285 { 289 286 struct device_node *dn; 287 + struct eeh_dev *edev; 290 288 struct pci_dev *dev = NULL; 291 289 292 290 spin_lock_init(&pci_io_addr_cache_root.piar_lock); ··· 298 294 dn = pci_device_to_OF_node(dev); 299 295 if (!dn) 300 296 continue; 297 + 298 + edev = of_node_to_eeh_dev(dn); 299 + if (!edev) 300 + continue; 301 + 301 302 pci_dev_get(dev); /* matching put is in eeh_remove_device() */ 302 - PCI_DN(dn)->pcidev = dev; 303 + dev->dev.archdata.edev = edev; 304 + edev->pdev = dev; 303 305 304 306 eeh_sysfs_add_device(dev); 305 307 }

+102

arch/powerpc/platforms/pseries/eeh_dev.c

··· 1 + /* 2 + * The file intends to implement dynamic creation of EEH device, which will 3 + * be bound with OF node and PCI device simutaneously. The EEH devices would 4 + * be foundamental information for EEH core components to work proerly. Besides, 5 + * We have to support multiple situations where dynamic creation of EEH device 6 + * is required: 7 + * 8 + * 1) Before PCI emunation starts, we need create EEH devices according to the 9 + * PCI sensitive OF nodes. 10 + * 2) When PCI emunation is done, we need do the binding between PCI device and 11 + * the associated EEH device. 12 + * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device 13 + * will be created while PCI sensitive OF node is detected from DR. 14 + * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If 15 + * PHB is newly inserted, we also need create EEH devices accordingly. 16 + * 17 + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012. 18 + * 19 + * This program is free software; you can redistribute it and/or modify 20 + * it under the terms of the GNU General Public License as published by 21 + * the Free Software Foundation; either version 2 of the License, or 22 + * (at your option) any later version. 23 + * 24 + * This program is distributed in the hope that it will be useful, 25 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 26 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 27 + * GNU General Public License for more details. 28 + * 29 + * You should have received a copy of the GNU General Public License 30 + * along with this program; if not, write to the Free Software 31 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 32 + */ 33 + 34 + #include <linux/export.h> 35 + #include <linux/gfp.h> 36 + #include <linux/init.h> 37 + #include <linux/kernel.h> 38 + #include <linux/pci.h> 39 + #include <linux/string.h> 40 + 41 + #include <asm/pci-bridge.h> 42 + #include <asm/ppc-pci.h> 43 + 44 + /** 45 + * eeh_dev_init - Create EEH device according to OF node 46 + * @dn: device node 47 + * @data: PHB 48 + * 49 + * It will create EEH device according to the given OF node. The function 50 + * might be called by PCI emunation, DR, PHB hotplug. 51 + */ 52 + void * __devinit eeh_dev_init(struct device_node *dn, void *data) 53 + { 54 + struct pci_controller *phb = data; 55 + struct eeh_dev *edev; 56 + 57 + /* Allocate EEH device */ 58 + edev = zalloc_maybe_bootmem(sizeof(*edev), GFP_KERNEL); 59 + if (!edev) { 60 + pr_warning("%s: out of memory\n", __func__); 61 + return NULL; 62 + } 63 + 64 + /* Associate EEH device with OF node */ 65 + dn->edev = edev; 66 + edev->dn = dn; 67 + edev->phb = phb; 68 + 69 + return NULL; 70 + } 71 + 72 + /** 73 + * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB 74 + * @phb: PHB 75 + * 76 + * Scan the PHB OF node and its child association, then create the 77 + * EEH devices accordingly 78 + */ 79 + void __devinit eeh_dev_phb_init_dynamic(struct pci_controller *phb) 80 + { 81 + struct device_node *dn = phb->dn; 82 + 83 + /* EEH device for PHB */ 84 + eeh_dev_init(dn, phb); 85 + 86 + /* EEH devices for children OF nodes */ 87 + traverse_pci_devices(dn, eeh_dev_init, phb); 88 + } 89 + 90 + /** 91 + * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs 92 + * 93 + * Scan all the existing PHBs and create EEH devices for their OF 94 + * nodes and their children OF nodes 95 + */ 96 + void __init eeh_dev_phb_init(void) 97 + { 98 + struct pci_controller *phb, *tmp; 99 + 100 + list_for_each_entry_safe(phb, tmp, &hose_list, list_node) 101 + eeh_dev_phb_init_dynamic(phb); 102 + }

+120 -93

arch/powerpc/platforms/pseries/eeh_driver.c

··· 33 33 #include <asm/prom.h> 34 34 #include <asm/rtas.h> 35 35 36 - 37 - static inline const char * pcid_name (struct pci_dev *pdev) 36 + /** 37 + * eeh_pcid_name - Retrieve name of PCI device driver 38 + * @pdev: PCI device 39 + * 40 + * This routine is used to retrieve the name of PCI device driver 41 + * if that's valid. 42 + */ 43 + static inline const char *eeh_pcid_name(struct pci_dev *pdev) 38 44 { 39 45 if (pdev && pdev->dev.driver) 40 46 return pdev->dev.driver->name; ··· 70 64 #endif 71 65 72 66 /** 73 - * eeh_disable_irq - disable interrupt for the recovering device 67 + * eeh_disable_irq - Disable interrupt for the recovering device 68 + * @dev: PCI device 69 + * 70 + * This routine must be called when reporting temporary or permanent 71 + * error to the particular PCI device to disable interrupt of that 72 + * device. If the device has enabled MSI or MSI-X interrupt, we needn't 73 + * do real work because EEH should freeze DMA transfers for those PCI 74 + * devices encountering EEH errors, which includes MSI or MSI-X. 74 75 */ 75 76 static void eeh_disable_irq(struct pci_dev *dev) 76 77 { 77 - struct device_node *dn = pci_device_to_OF_node(dev); 78 + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 78 79 79 80 /* Don't disable MSI and MSI-X interrupts. They are 80 81 * effectively disabled by the DMA Stopped state 81 82 * when an EEH error occurs. 82 - */ 83 + */ 83 84 if (dev->msi_enabled || dev->msix_enabled) 84 85 return; 85 86 86 87 if (!irq_has_action(dev->irq)) 87 88 return; 88 89 89 - PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; 90 + edev->mode |= EEH_MODE_IRQ_DISABLED; 90 91 disable_irq_nosync(dev->irq); 91 92 } 92 93 93 94 /** 94 - * eeh_enable_irq - enable interrupt for the recovering device 95 + * eeh_enable_irq - Enable interrupt for the recovering device 96 + * @dev: PCI device 97 + * 98 + * This routine must be called to enable interrupt while failed 99 + * device could be resumed. 95 100 */ 96 101 static void eeh_enable_irq(struct pci_dev *dev) 97 102 { 98 - struct device_node *dn = pci_device_to_OF_node(dev); 103 + struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 99 104 100 - if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { 101 - PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; 105 + if ((edev->mode) & EEH_MODE_IRQ_DISABLED) { 106 + edev->mode &= ~EEH_MODE_IRQ_DISABLED; 102 107 enable_irq(dev->irq); 103 108 } 104 109 } 105 110 106 - /* ------------------------------------------------------- */ 107 111 /** 108 - * eeh_report_error - report pci error to each device driver 112 + * eeh_report_error - Report pci error to each device driver 113 + * @dev: PCI device 114 + * @userdata: return value 109 115 * 110 116 * Report an EEH error to each device driver, collect up and 111 117 * merge the device driver responses. Cumulative response 112 118 * passed back in "userdata". 113 119 */ 114 - 115 120 static int eeh_report_error(struct pci_dev *dev, void *userdata) 116 121 { 117 122 enum pci_ers_result rc, *res = userdata; ··· 139 122 !driver->err_handler->error_detected) 140 123 return 0; 141 124 142 - rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen); 125 + rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen); 143 126 144 127 /* A driver that needs a reset trumps all others */ 145 128 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; ··· 149 132 } 150 133 151 134 /** 152 - * eeh_report_mmio_enabled - tell drivers that MMIO has been enabled 135 + * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled 136 + * @dev: PCI device 137 + * @userdata: return value 153 138 * 154 139 * Tells each device driver that IO ports, MMIO and config space I/O 155 140 * are now enabled. Collects up and merges the device driver responses. 156 141 * Cumulative response passed back in "userdata". 157 142 */ 158 - 159 143 static int eeh_report_mmio_enabled(struct pci_dev *dev, void *userdata) 160 144 { 161 145 enum pci_ers_result rc, *res = userdata; ··· 167 149 !driver->err_handler->mmio_enabled) 168 150 return 0; 169 151 170 - rc = driver->err_handler->mmio_enabled (dev); 152 + rc = driver->err_handler->mmio_enabled(dev); 171 153 172 154 /* A driver that needs a reset trumps all others */ 173 155 if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc; ··· 177 159 } 178 160 179 161 /** 180 - * eeh_report_reset - tell device that slot has been reset 162 + * eeh_report_reset - Tell device that slot has been reset 163 + * @dev: PCI device 164 + * @userdata: return value 165 + * 166 + * This routine must be called while EEH tries to reset particular 167 + * PCI device so that the associated PCI device driver could take 168 + * some actions, usually to save data the driver needs so that the 169 + * driver can work again while the device is recovered. 181 170 */ 182 - 183 171 static int eeh_report_reset(struct pci_dev *dev, void *userdata) 184 172 { 185 173 enum pci_ers_result rc, *res = userdata; ··· 212 188 } 213 189 214 190 /** 215 - * eeh_report_resume - tell device to resume normal operations 191 + * eeh_report_resume - Tell device to resume normal operations 192 + * @dev: PCI device 193 + * @userdata: return value 194 + * 195 + * This routine must be called to notify the device driver that it 196 + * could resume so that the device driver can do some initialization 197 + * to make the recovered device work again. 216 198 */ 217 - 218 199 static int eeh_report_resume(struct pci_dev *dev, void *userdata) 219 200 { 220 201 struct pci_driver *driver = dev->driver; ··· 241 212 } 242 213 243 214 /** 244 - * eeh_report_failure - tell device driver that device is dead. 215 + * eeh_report_failure - Tell device driver that device is dead. 216 + * @dev: PCI device 217 + * @userdata: return value 245 218 * 246 219 * This informs the device driver that the device is permanently 247 220 * dead, and that no further recovery attempts will be made on it. 248 221 */ 249 - 250 222 static int eeh_report_failure(struct pci_dev *dev, void *userdata) 251 223 { 252 224 struct pci_driver *driver = dev->driver; ··· 268 238 return 0; 269 239 } 270 240 271 - /* ------------------------------------------------------- */ 272 241 /** 273 - * handle_eeh_events -- reset a PCI device after hard lockup. 242 + * eeh_reset_device - Perform actual reset of a pci slot 243 + * @edev: PE associated EEH device 244 + * @bus: PCI bus corresponding to the isolcated slot 274 245 * 275 - * pSeries systems will isolate a PCI slot if the PCI-Host 276 - * bridge detects address or data parity errors, DMA's 277 - * occurring to wild addresses (which usually happen due to 278 - * bugs in device drivers or in PCI adapter firmware). 279 - * Slot isolations also occur if #SERR, #PERR or other misc 280 - * PCI-related errors are detected. 281 - * 282 - * Recovery process consists of unplugging the device driver 283 - * (which generated hotplug events to userspace), then issuing 284 - * a PCI #RST to the device, then reconfiguring the PCI config 285 - * space for all bridges & devices under this slot, and then 286 - * finally restarting the device drivers (which cause a second 287 - * set of hotplug events to go out to userspace). 246 + * This routine must be called to do reset on the indicated PE. 247 + * During the reset, udev might be invoked because those affected 248 + * PCI devices will be removed and then added. 288 249 */ 289 - 290 - /** 291 - * eeh_reset_device() -- perform actual reset of a pci slot 292 - * @bus: pointer to the pci bus structure corresponding 293 - * to the isolated slot. A non-null value will 294 - * cause all devices under the bus to be removed 295 - * and then re-added. 296 - * @pe_dn: pointer to a "Partionable Endpoint" device node. 297 - * This is the top-level structure on which pci 298 - * bus resets can be performed. 299 - */ 300 - 301 - static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus) 250 + static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus) 302 251 { 303 252 struct device_node *dn; 304 253 int cnt, rc; 305 254 306 255 /* pcibios will clear the counter; save the value */ 307 - cnt = pe_dn->eeh_freeze_count; 256 + cnt = edev->freeze_count; 308 257 309 258 if (bus) 310 259 pcibios_remove_pci_devices(bus); 311 260 312 261 /* Reset the pci controller. (Asserts RST#; resets config space). 313 262 * Reconfigure bridges and devices. Don't try to bring the system 314 - * up if the reset failed for some reason. */ 315 - rc = rtas_set_slot_reset(pe_dn); 263 + * up if the reset failed for some reason. 264 + */ 265 + rc = eeh_reset_pe(edev); 316 266 if (rc) 317 267 return rc; 318 268 319 - /* Walk over all functions on this device. */ 320 - dn = pe_dn->node; 321 - if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) 269 + /* Walk over all functions on this device. */ 270 + dn = eeh_dev_to_of_node(edev); 271 + if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent)) 322 272 dn = dn->parent->child; 323 273 324 274 while (dn) { 325 - struct pci_dn *ppe = PCI_DN(dn); 275 + struct eeh_dev *pedev = of_node_to_eeh_dev(dn); 276 + 326 277 /* On Power4, always true because eeh_pe_config_addr=0 */ 327 - if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) { 328 - rtas_configure_bridge(ppe); 329 - eeh_restore_bars(ppe); 278 + if (edev->pe_config_addr == pedev->pe_config_addr) { 279 + eeh_ops->configure_bridge(dn); 280 + eeh_restore_bars(pedev); 330 281 } 331 282 dn = dn->sibling; 332 283 } ··· 319 308 * potentially weird things happen. 320 309 */ 321 310 if (bus) { 322 - ssleep (5); 311 + ssleep(5); 323 312 pcibios_add_pci_devices(bus); 324 313 } 325 - pe_dn->eeh_freeze_count = cnt; 314 + edev->freeze_count = cnt; 326 315 327 316 return 0; 328 317 } ··· 332 321 */ 333 322 #define MAX_WAIT_FOR_RECOVERY 150 334 323 335 - struct pci_dn * handle_eeh_events (struct eeh_event *event) 324 + /** 325 + * eeh_handle_event - Reset a PCI device after hard lockup. 326 + * @event: EEH event 327 + * 328 + * While PHB detects address or data parity errors on particular PCI 329 + * slot, the associated PE will be frozen. Besides, DMA's occurring 330 + * to wild addresses (which usually happen due to bugs in device 331 + * drivers or in PCI adapter firmware) can cause EEH error. #SERR, 332 + * #PERR or other misc PCI-related errors also can trigger EEH errors. 333 + * 334 + * Recovery process consists of unplugging the device driver (which 335 + * generated hotplug events to userspace), then issuing a PCI #RST to 336 + * the device, then reconfiguring the PCI config space for all bridges 337 + * & devices under this slot, and then finally restarting the device 338 + * drivers (which cause a second set of hotplug events to go out to 339 + * userspace). 340 + */ 341 + struct eeh_dev *handle_eeh_events(struct eeh_event *event) 336 342 { 337 343 struct device_node *frozen_dn; 338 - struct pci_dn *frozen_pdn; 344 + struct eeh_dev *frozen_edev; 339 345 struct pci_bus *frozen_bus; 340 346 int rc = 0; 341 347 enum pci_ers_result result = PCI_ERS_RESULT_NONE; 342 348 const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; 343 349 344 - frozen_dn = find_device_pe(event->dn); 350 + frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev)); 345 351 if (!frozen_dn) { 346 - 347 - location = of_get_property(event->dn, "ibm,loc-code", NULL); 352 + location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL); 348 353 location = location ? location : "unknown"; 349 354 printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " 350 355 "for location=%s pci addr=%s\n", 351 - location, eeh_pci_name(event->dev)); 356 + location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev))); 352 357 return NULL; 353 358 } 354 359 ··· 377 350 * which was always an EADS pci bridge. In the new style, 378 351 * there might not be any EADS bridges, and even when there are, 379 352 * the firmware marks them as "EEH incapable". So another 380 - * two-step is needed to find the pci bus.. */ 353 + * two-step is needed to find the pci bus.. 354 + */ 381 355 if (!frozen_bus) 382 - frozen_bus = pcibios_find_pci_bus (frozen_dn->parent); 356 + frozen_bus = pcibios_find_pci_bus(frozen_dn->parent); 383 357 384 358 if (!frozen_bus) { 385 359 printk(KERN_ERR "EEH: Cannot find PCI bus " ··· 389 361 return NULL; 390 362 } 391 363 392 - frozen_pdn = PCI_DN(frozen_dn); 393 - frozen_pdn->eeh_freeze_count++; 364 + frozen_edev = of_node_to_eeh_dev(frozen_dn); 365 + frozen_edev->freeze_count++; 366 + pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev)); 367 + drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev)); 394 368 395 - pci_str = eeh_pci_name(event->dev); 396 - drv_str = pcid_name(event->dev); 397 - 398 - if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES) 369 + if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES) 399 370 goto excess_failures; 400 371 401 372 printk(KERN_WARNING 402 373 "EEH: This PCI device has failed %d times in the last hour:\n", 403 - frozen_pdn->eeh_freeze_count); 374 + frozen_edev->freeze_count); 404 375 405 - if (frozen_pdn->pcidev) { 406 - bus_pci_str = pci_name(frozen_pdn->pcidev); 407 - bus_drv_str = pcid_name(frozen_pdn->pcidev); 376 + if (frozen_edev->pdev) { 377 + bus_pci_str = pci_name(frozen_edev->pdev); 378 + bus_drv_str = eeh_pcid_name(frozen_edev->pdev); 408 379 printk(KERN_WARNING 409 380 "EEH: Bus location=%s driver=%s pci addr=%s\n", 410 381 location, bus_drv_str, bus_pci_str); ··· 422 395 pci_walk_bus(frozen_bus, eeh_report_error, &result); 423 396 424 397 /* Get the current PCI slot state. This can take a long time, 425 - * sometimes over 3 seconds for certain systems. */ 426 - rc = eeh_wait_for_slot_status (frozen_pdn, MAX_WAIT_FOR_RECOVERY*1000); 427 - if (rc < 0) { 398 + * sometimes over 3 seconds for certain systems. 399 + */ 400 + rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000); 401 + if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 428 402 printk(KERN_WARNING "EEH: Permanent failure\n"); 429 403 goto hard_fail; 430 404 } ··· 434 406 * don't post the error log until after all dev drivers 435 407 * have been informed. 436 408 */ 437 - eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP_FAILURE); 409 + eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP); 438 410 439 411 /* If all device drivers were EEH-unaware, then shut 440 412 * down all of the device drivers, and hope they 441 413 * go down willingly, without panicing the system. 442 414 */ 443 415 if (result == PCI_ERS_RESULT_NONE) { 444 - rc = eeh_reset_device(frozen_pdn, frozen_bus); 416 + rc = eeh_reset_device(frozen_edev, frozen_bus); 445 417 if (rc) { 446 418 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); 447 419 goto hard_fail; ··· 450 422 451 423 /* If all devices reported they can proceed, then re-enable MMIO */ 452 424 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 453 - rc = rtas_pci_enable(frozen_pdn, EEH_THAW_MMIO); 425 + rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO); 454 426 455 427 if (rc < 0) 456 428 goto hard_fail; ··· 464 436 465 437 /* If all devices reported they can proceed, then re-enable DMA */ 466 438 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 467 - rc = rtas_pci_enable(frozen_pdn, EEH_THAW_DMA); 439 + rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA); 468 440 469 441 if (rc < 0) 470 442 goto hard_fail; ··· 482 454 483 455 /* If any device called out for a reset, then reset the slot */ 484 456 if (result == PCI_ERS_RESULT_NEED_RESET) { 485 - rc = eeh_reset_device(frozen_pdn, NULL); 457 + rc = eeh_reset_device(frozen_edev, NULL); 486 458 if (rc) { 487 459 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); 488 460 goto hard_fail; ··· 501 473 /* Tell all device drivers that they can resume operations */ 502 474 pci_walk_bus(frozen_bus, eeh_report_resume, NULL); 503 475 504 - return frozen_pdn; 476 + return frozen_edev; 505 477 506 478 excess_failures: 507 479 /* ··· 514 486 "has failed %d times in the last hour " 515 487 "and has been permanently disabled.\n" 516 488 "Please try reseating this device or replacing it.\n", 517 - location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); 489 + location, drv_str, pci_str, frozen_edev->freeze_count); 518 490 goto perm_error; 519 491 520 492 hard_fail: ··· 525 497 location, drv_str, pci_str); 526 498 527 499 perm_error: 528 - eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM_FAILURE); 500 + eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM); 529 501 530 502 /* Notify all devices that they're about to go down. */ 531 503 pci_walk_bus(frozen_bus, eeh_report_failure, NULL); ··· 536 508 return NULL; 537 509 } 538 510 539 - /* ---------- end of file ---------- */

+28 -27

arch/powerpc/platforms/pseries/eeh_event.c

··· 1 1 /* 2 - * eeh_event.c 3 - * 4 2 * This program is free software; you can redistribute it and/or modify 5 3 * it under the terms of the GNU General Public License as published by 6 4 * the Free Software Foundation; either version 2 of the License, or ··· 44 46 DEFINE_MUTEX(eeh_event_mutex); 45 47 46 48 /** 47 - * eeh_event_handler - dispatch EEH events. 49 + * eeh_event_handler - Dispatch EEH events. 48 50 * @dummy - unused 49 51 * 50 52 * The detection of a frozen slot can occur inside an interrupt, ··· 56 58 static int eeh_event_handler(void * dummy) 57 59 { 58 60 unsigned long flags; 59 - struct eeh_event *event; 60 - struct pci_dn *pdn; 61 + struct eeh_event *event; 62 + struct eeh_dev *edev; 61 63 62 - daemonize ("eehd"); 64 + daemonize("eehd"); 63 65 set_current_state(TASK_INTERRUPTIBLE); 64 66 65 67 spin_lock_irqsave(&eeh_eventlist_lock, flags); ··· 77 79 78 80 /* Serialize processing of EEH events */ 79 81 mutex_lock(&eeh_event_mutex); 80 - eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); 82 + edev = event->edev; 83 + eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); 81 84 82 85 printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", 83 - eeh_pci_name(event->dev)); 86 + eeh_pci_name(edev->pdev)); 84 87 85 - pdn = handle_eeh_events(event); 88 + edev = handle_eeh_events(event); 86 89 87 - eeh_clear_slot(event->dn, EEH_MODE_RECOVERING); 88 - pci_dev_put(event->dev); 90 + eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING); 91 + pci_dev_put(edev->pdev); 92 + 89 93 kfree(event); 90 94 mutex_unlock(&eeh_event_mutex); 91 95 92 96 /* If there are no new errors after an hour, clear the counter. */ 93 - if (pdn && pdn->eeh_freeze_count>0) { 94 - msleep_interruptible (3600*1000); 95 - if (pdn->eeh_freeze_count>0) 96 - pdn->eeh_freeze_count--; 97 + if (edev && edev->freeze_count>0) { 98 + msleep_interruptible(3600*1000); 99 + if (edev->freeze_count>0) 100 + edev->freeze_count--; 101 + 97 102 } 98 103 99 104 return 0; 100 105 } 101 106 102 107 /** 103 - * eeh_thread_launcher 108 + * eeh_thread_launcher - Start kernel thread to handle EEH events 104 109 * @dummy - unused 110 + * 111 + * This routine is called to start the kernel thread for processing 112 + * EEH event. 105 113 */ 106 114 static void eeh_thread_launcher(struct work_struct *dummy) 107 115 { ··· 116 112 } 117 113 118 114 /** 119 - * eeh_send_failure_event - generate a PCI error event 120 - * @dev pci device 115 + * eeh_send_failure_event - Generate a PCI error event 116 + * @edev: EEH device 121 117 * 122 118 * This routine can be called within an interrupt context; 123 119 * the actual event will be delivered in a normal context 124 120 * (from a workqueue). 125 121 */ 126 - int eeh_send_failure_event (struct device_node *dn, 127 - struct pci_dev *dev) 122 + int eeh_send_failure_event(struct eeh_dev *edev) 128 123 { 129 124 unsigned long flags; 130 125 struct eeh_event *event; 126 + struct device_node *dn = eeh_dev_to_of_node(edev); 131 127 const char *location; 132 128 133 129 if (!mem_init_done) { ··· 139 135 } 140 136 event = kmalloc(sizeof(*event), GFP_ATOMIC); 141 137 if (event == NULL) { 142 - printk (KERN_ERR "EEH: out of memory, event not handled\n"); 138 + printk(KERN_ERR "EEH: out of memory, event not handled\n"); 143 139 return 1; 144 140 } 145 141 146 - if (dev) 147 - pci_dev_get(dev); 142 + if (edev->pdev) 143 + pci_dev_get(edev->pdev); 148 144 149 - event->dn = dn; 150 - event->dev = dev; 145 + event->edev = edev; 151 146 152 147 /* We may or may not be called in an interrupt context */ 153 148 spin_lock_irqsave(&eeh_eventlist_lock, flags); ··· 157 154 158 155 return 0; 159 156 } 160 - 161 - /********************** END OF FILE ******************************/

+565

arch/powerpc/platforms/pseries/eeh_pseries.c

··· 1 + /* 2 + * The file intends to implement the platform dependent EEH operations on pseries. 3 + * Actually, the pseries platform is built based on RTAS heavily. That means the 4 + * pseries platform dependent EEH operations will be built on RTAS calls. The functions 5 + * are devired from arch/powerpc/platforms/pseries/eeh.c and necessary cleanup has 6 + * been done. 7 + * 8 + * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2011. 9 + * Copyright IBM Corporation 2001, 2005, 2006 10 + * Copyright Dave Engebretsen & Todd Inglett 2001 11 + * Copyright Linas Vepstas 2005, 2006 12 + * 13 + * This program is free software; you can redistribute it and/or modify 14 + * it under the terms of the GNU General Public License as published by 15 + * the Free Software Foundation; either version 2 of the License, or 16 + * (at your option) any later version. 17 + * 18 + * This program is distributed in the hope that it will be useful, 19 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 20 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21 + * GNU General Public License for more details. 22 + * 23 + * You should have received a copy of the GNU General Public License 24 + * along with this program; if not, write to the Free Software 25 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 + */ 27 + 28 + #include <linux/atomic.h> 29 + #include <linux/delay.h> 30 + #include <linux/export.h> 31 + #include <linux/init.h> 32 + #include <linux/list.h> 33 + #include <linux/of.h> 34 + #include <linux/pci.h> 35 + #include <linux/proc_fs.h> 36 + #include <linux/rbtree.h> 37 + #include <linux/sched.h> 38 + #include <linux/seq_file.h> 39 + #include <linux/spinlock.h> 40 + 41 + #include <asm/eeh.h> 42 + #include <asm/eeh_event.h> 43 + #include <asm/io.h> 44 + #include <asm/machdep.h> 45 + #include <asm/ppc-pci.h> 46 + #include <asm/rtas.h> 47 + 48 + /* RTAS tokens */ 49 + static int ibm_set_eeh_option; 50 + static int ibm_set_slot_reset; 51 + static int ibm_read_slot_reset_state; 52 + static int ibm_read_slot_reset_state2; 53 + static int ibm_slot_error_detail; 54 + static int ibm_get_config_addr_info; 55 + static int ibm_get_config_addr_info2; 56 + static int ibm_configure_bridge; 57 + static int ibm_configure_pe; 58 + 59 + /* 60 + * Buffer for reporting slot-error-detail rtas calls. Its here 61 + * in BSS, and not dynamically alloced, so that it ends up in 62 + * RMO where RTAS can access it. 63 + */ 64 + static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX]; 65 + static DEFINE_SPINLOCK(slot_errbuf_lock); 66 + static int eeh_error_buf_size; 67 + 68 + /** 69 + * pseries_eeh_init - EEH platform dependent initialization 70 + * 71 + * EEH platform dependent initialization on pseries. 72 + */ 73 + static int pseries_eeh_init(void) 74 + { 75 + /* figure out EEH RTAS function call tokens */ 76 + ibm_set_eeh_option = rtas_token("ibm,set-eeh-option"); 77 + ibm_set_slot_reset = rtas_token("ibm,set-slot-reset"); 78 + ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2"); 79 + ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); 80 + ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); 81 + ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); 82 + ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); 83 + ibm_configure_pe = rtas_token("ibm,configure-pe"); 84 + ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); 85 + 86 + /* necessary sanity check */ 87 + if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) { 88 + pr_warning("%s: RTAS service <ibm,set-eeh-option> invalid\n", 89 + __func__); 90 + return -EINVAL; 91 + } else if (ibm_set_slot_reset == RTAS_UNKNOWN_SERVICE) { 92 + pr_warning("%s: RTAS service <ibm, set-slot-reset> invalid\n", 93 + __func__); 94 + return -EINVAL; 95 + } else if (ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE && 96 + ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) { 97 + pr_warning("%s: RTAS service <ibm,read-slot-reset-state2> and " 98 + "<ibm,read-slot-reset-state> invalid\n", 99 + __func__); 100 + return -EINVAL; 101 + } else if (ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE) { 102 + pr_warning("%s: RTAS service <ibm,slot-error-detail> invalid\n", 103 + __func__); 104 + return -EINVAL; 105 + } else if (ibm_get_config_addr_info2 == RTAS_UNKNOWN_SERVICE && 106 + ibm_get_config_addr_info == RTAS_UNKNOWN_SERVICE) { 107 + pr_warning("%s: RTAS service <ibm,get-config-addr-info2> and " 108 + "<ibm,get-config-addr-info> invalid\n", 109 + __func__); 110 + return -EINVAL; 111 + } else if (ibm_configure_pe == RTAS_UNKNOWN_SERVICE && 112 + ibm_configure_bridge == RTAS_UNKNOWN_SERVICE) { 113 + pr_warning("%s: RTAS service <ibm,configure-pe> and " 114 + "<ibm,configure-bridge> invalid\n", 115 + __func__); 116 + return -EINVAL; 117 + } 118 + 119 + /* Initialize error log lock and size */ 120 + spin_lock_init(&slot_errbuf_lock); 121 + eeh_error_buf_size = rtas_token("rtas-error-log-max"); 122 + if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) { 123 + pr_warning("%s: unknown EEH error log size\n", 124 + __func__); 125 + eeh_error_buf_size = 1024; 126 + } else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) { 127 + pr_warning("%s: EEH error log size %d exceeds the maximal %d\n", 128 + __func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX); 129 + eeh_error_buf_size = RTAS_ERROR_LOG_MAX; 130 + } 131 + 132 + return 0; 133 + } 134 + 135 + /** 136 + * pseries_eeh_set_option - Initialize EEH or MMIO/DMA reenable 137 + * @dn: device node 138 + * @option: operation to be issued 139 + * 140 + * The function is used to control the EEH functionality globally. 141 + * Currently, following options are support according to PAPR: 142 + * Enable EEH, Disable EEH, Enable MMIO and Enable DMA 143 + */ 144 + static int pseries_eeh_set_option(struct device_node *dn, int option) 145 + { 146 + int ret = 0; 147 + struct eeh_dev *edev; 148 + const u32 *reg; 149 + int config_addr; 150 + 151 + edev = of_node_to_eeh_dev(dn); 152 + 153 + /* 154 + * When we're enabling or disabling EEH functioality on 155 + * the particular PE, the PE config address is possibly 156 + * unavailable. Therefore, we have to figure it out from 157 + * the FDT node. 158 + */ 159 + switch (option) { 160 + case EEH_OPT_DISABLE: 161 + case EEH_OPT_ENABLE: 162 + reg = of_get_property(dn, "reg", NULL); 163 + config_addr = reg[0]; 164 + break; 165 + 166 + case EEH_OPT_THAW_MMIO: 167 + case EEH_OPT_THAW_DMA: 168 + config_addr = edev->config_addr; 169 + if (edev->pe_config_addr) 170 + config_addr = edev->pe_config_addr; 171 + break; 172 + 173 + default: 174 + pr_err("%s: Invalid option %d\n", 175 + __func__, option); 176 + return -EINVAL; 177 + } 178 + 179 + ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, 180 + config_addr, BUID_HI(edev->phb->buid), 181 + BUID_LO(edev->phb->buid), option); 182 + 183 + return ret; 184 + } 185 + 186 + /** 187 + * pseries_eeh_get_pe_addr - Retrieve PE address 188 + * @dn: device node 189 + * 190 + * Retrieve the assocated PE address. Actually, there're 2 RTAS 191 + * function calls dedicated for the purpose. We need implement 192 + * it through the new function and then the old one. Besides, 193 + * you should make sure the config address is figured out from 194 + * FDT node before calling the function. 195 + * 196 + * It's notable that zero'ed return value means invalid PE config 197 + * address. 198 + */ 199 + static int pseries_eeh_get_pe_addr(struct device_node *dn) 200 + { 201 + struct eeh_dev *edev; 202 + int ret = 0; 203 + int rets[3]; 204 + 205 + edev = of_node_to_eeh_dev(dn); 206 + 207 + if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { 208 + /* 209 + * First of all, we need to make sure there has one PE 210 + * associated with the device. Otherwise, PE address is 211 + * meaningless. 212 + */ 213 + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, 214 + edev->config_addr, BUID_HI(edev->phb->buid), 215 + BUID_LO(edev->phb->buid), 1); 216 + if (ret || (rets[0] == 0)) 217 + return 0; 218 + 219 + /* Retrieve the associated PE config address */ 220 + ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets, 221 + edev->config_addr, BUID_HI(edev->phb->buid), 222 + BUID_LO(edev->phb->buid), 0); 223 + if (ret) { 224 + pr_warning("%s: Failed to get PE address for %s\n", 225 + __func__, dn->full_name); 226 + return 0; 227 + } 228 + 229 + return rets[0]; 230 + } 231 + 232 + if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { 233 + ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets, 234 + edev->config_addr, BUID_HI(edev->phb->buid), 235 + BUID_LO(edev->phb->buid), 0); 236 + if (ret) { 237 + pr_warning("%s: Failed to get PE address for %s\n", 238 + __func__, dn->full_name); 239 + return 0; 240 + } 241 + 242 + return rets[0]; 243 + } 244 + 245 + return ret; 246 + } 247 + 248 + /** 249 + * pseries_eeh_get_state - Retrieve PE state 250 + * @dn: PE associated device node 251 + * @state: return value 252 + * 253 + * Retrieve the state of the specified PE. On RTAS compliant 254 + * pseries platform, there already has one dedicated RTAS function 255 + * for the purpose. It's notable that the associated PE config address 256 + * might be ready when calling the function. Therefore, endeavour to 257 + * use the PE config address if possible. Further more, there're 2 258 + * RTAS calls for the purpose, we need to try the new one and back 259 + * to the old one if the new one couldn't work properly. 260 + */ 261 + static int pseries_eeh_get_state(struct device_node *dn, int *state) 262 + { 263 + struct eeh_dev *edev; 264 + int config_addr; 265 + int ret; 266 + int rets[4]; 267 + int result; 268 + 269 + /* Figure out PE config address if possible */ 270 + edev = of_node_to_eeh_dev(dn); 271 + config_addr = edev->config_addr; 272 + if (edev->pe_config_addr) 273 + config_addr = edev->pe_config_addr; 274 + 275 + if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) { 276 + ret = rtas_call(ibm_read_slot_reset_state2, 3, 4, rets, 277 + config_addr, BUID_HI(edev->phb->buid), 278 + BUID_LO(edev->phb->buid)); 279 + } else if (ibm_read_slot_reset_state != RTAS_UNKNOWN_SERVICE) { 280 + /* Fake PE unavailable info */ 281 + rets[2] = 0; 282 + ret = rtas_call(ibm_read_slot_reset_state, 3, 3, rets, 283 + config_addr, BUID_HI(edev->phb->buid), 284 + BUID_LO(edev->phb->buid)); 285 + } else { 286 + return EEH_STATE_NOT_SUPPORT; 287 + } 288 + 289 + if (ret) 290 + return ret; 291 + 292 + /* Parse the result out */ 293 + result = 0; 294 + if (rets[1]) { 295 + switch(rets[0]) { 296 + case 0: 297 + result &= ~EEH_STATE_RESET_ACTIVE; 298 + result |= EEH_STATE_MMIO_ACTIVE; 299 + result |= EEH_STATE_DMA_ACTIVE; 300 + break; 301 + case 1: 302 + result |= EEH_STATE_RESET_ACTIVE; 303 + result |= EEH_STATE_MMIO_ACTIVE; 304 + result |= EEH_STATE_DMA_ACTIVE; 305 + break; 306 + case 2: 307 + result &= ~EEH_STATE_RESET_ACTIVE; 308 + result &= ~EEH_STATE_MMIO_ACTIVE; 309 + result &= ~EEH_STATE_DMA_ACTIVE; 310 + break; 311 + case 4: 312 + result &= ~EEH_STATE_RESET_ACTIVE; 313 + result &= ~EEH_STATE_MMIO_ACTIVE; 314 + result &= ~EEH_STATE_DMA_ACTIVE; 315 + result |= EEH_STATE_MMIO_ENABLED; 316 + break; 317 + case 5: 318 + if (rets[2]) { 319 + if (state) *state = rets[2]; 320 + result = EEH_STATE_UNAVAILABLE; 321 + } else { 322 + result = EEH_STATE_NOT_SUPPORT; 323 + } 324 + default: 325 + result = EEH_STATE_NOT_SUPPORT; 326 + } 327 + } else { 328 + result = EEH_STATE_NOT_SUPPORT; 329 + } 330 + 331 + return result; 332 + } 333 + 334 + /** 335 + * pseries_eeh_reset - Reset the specified PE 336 + * @dn: PE associated device node 337 + * @option: reset option 338 + * 339 + * Reset the specified PE 340 + */ 341 + static int pseries_eeh_reset(struct device_node *dn, int option) 342 + { 343 + struct eeh_dev *edev; 344 + int config_addr; 345 + int ret; 346 + 347 + /* Figure out PE address */ 348 + edev = of_node_to_eeh_dev(dn); 349 + config_addr = edev->config_addr; 350 + if (edev->pe_config_addr) 351 + config_addr = edev->pe_config_addr; 352 + 353 + /* Reset PE through RTAS call */ 354 + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, 355 + config_addr, BUID_HI(edev->phb->buid), 356 + BUID_LO(edev->phb->buid), option); 357 + 358 + /* If fundamental-reset not supported, try hot-reset */ 359 + if (option == EEH_RESET_FUNDAMENTAL && 360 + ret == -8) { 361 + ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL, 362 + config_addr, BUID_HI(edev->phb->buid), 363 + BUID_LO(edev->phb->buid), EEH_RESET_HOT); 364 + } 365 + 366 + return ret; 367 + } 368 + 369 + /** 370 + * pseries_eeh_wait_state - Wait for PE state 371 + * @dn: PE associated device node 372 + * @max_wait: maximal period in microsecond 373 + * 374 + * Wait for the state of associated PE. It might take some time 375 + * to retrieve the PE's state. 376 + */ 377 + static int pseries_eeh_wait_state(struct device_node *dn, int max_wait) 378 + { 379 + int ret; 380 + int mwait; 381 + 382 + /* 383 + * According to PAPR, the state of PE might be temporarily 384 + * unavailable. Under the circumstance, we have to wait 385 + * for indicated time determined by firmware. The maximal 386 + * wait time is 5 minutes, which is acquired from the original 387 + * EEH implementation. Also, the original implementation 388 + * also defined the minimal wait time as 1 second. 389 + */ 390 + #define EEH_STATE_MIN_WAIT_TIME (1000) 391 + #define EEH_STATE_MAX_WAIT_TIME (300 * 1000) 392 + 393 + while (1) { 394 + ret = pseries_eeh_get_state(dn, &mwait); 395 + 396 + /* 397 + * If the PE's state is temporarily unavailable, 398 + * we have to wait for the specified time. Otherwise, 399 + * the PE's state will be returned immediately. 400 + */ 401 + if (ret != EEH_STATE_UNAVAILABLE) 402 + return ret; 403 + 404 + if (max_wait <= 0) { 405 + pr_warning("%s: Timeout when getting PE's state (%d)\n", 406 + __func__, max_wait); 407 + return EEH_STATE_NOT_SUPPORT; 408 + } 409 + 410 + if (mwait <= 0) { 411 + pr_warning("%s: Firmware returned bad wait value %d\n", 412 + __func__, mwait); 413 + mwait = EEH_STATE_MIN_WAIT_TIME; 414 + } else if (mwait > EEH_STATE_MAX_WAIT_TIME) { 415 + pr_warning("%s: Firmware returned too long wait value %d\n", 416 + __func__, mwait); 417 + mwait = EEH_STATE_MAX_WAIT_TIME; 418 + } 419 + 420 + max_wait -= mwait; 421 + msleep(mwait); 422 + } 423 + 424 + return EEH_STATE_NOT_SUPPORT; 425 + } 426 + 427 + /** 428 + * pseries_eeh_get_log - Retrieve error log 429 + * @dn: device node 430 + * @severity: temporary or permanent error log 431 + * @drv_log: driver log to be combined with retrieved error log 432 + * @len: length of driver log 433 + * 434 + * Retrieve the temporary or permanent error from the PE. 435 + * Actually, the error will be retrieved through the dedicated 436 + * RTAS call. 437 + */ 438 + static int pseries_eeh_get_log(struct device_node *dn, int severity, char *drv_log, unsigned long len) 439 + { 440 + struct eeh_dev *edev; 441 + int config_addr; 442 + unsigned long flags; 443 + int ret; 444 + 445 + edev = of_node_to_eeh_dev(dn); 446 + spin_lock_irqsave(&slot_errbuf_lock, flags); 447 + memset(slot_errbuf, 0, eeh_error_buf_size); 448 + 449 + /* Figure out the PE address */ 450 + config_addr = edev->config_addr; 451 + if (edev->pe_config_addr) 452 + config_addr = edev->pe_config_addr; 453 + 454 + ret = rtas_call(ibm_slot_error_detail, 8, 1, NULL, config_addr, 455 + BUID_HI(edev->phb->buid), BUID_LO(edev->phb->buid), 456 + virt_to_phys(drv_log), len, 457 + virt_to_phys(slot_errbuf), eeh_error_buf_size, 458 + severity); 459 + if (!ret) 460 + log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0); 461 + spin_unlock_irqrestore(&slot_errbuf_lock, flags); 462 + 463 + return ret; 464 + } 465 + 466 + /** 467 + * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE 468 + * @dn: PE associated device node 469 + * 470 + * The function will be called to reconfigure the bridges included 471 + * in the specified PE so that the mulfunctional PE would be recovered 472 + * again. 473 + */ 474 + static int pseries_eeh_configure_bridge(struct device_node *dn) 475 + { 476 + struct eeh_dev *edev; 477 + int config_addr; 478 + int ret; 479 + 480 + /* Figure out the PE address */ 481 + edev = of_node_to_eeh_dev(dn); 482 + config_addr = edev->config_addr; 483 + if (edev->pe_config_addr) 484 + config_addr = edev->pe_config_addr; 485 + 486 + /* Use new configure-pe function, if supported */ 487 + if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) { 488 + ret = rtas_call(ibm_configure_pe, 3, 1, NULL, 489 + config_addr, BUID_HI(edev->phb->buid), 490 + BUID_LO(edev->phb->buid)); 491 + } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) { 492 + ret = rtas_call(ibm_configure_bridge, 3, 1, NULL, 493 + config_addr, BUID_HI(edev->phb->buid), 494 + BUID_LO(edev->phb->buid)); 495 + } else { 496 + return -EFAULT; 497 + } 498 + 499 + if (ret) 500 + pr_warning("%s: Unable to configure bridge %d for %s\n", 501 + __func__, ret, dn->full_name); 502 + 503 + return ret; 504 + } 505 + 506 + /** 507 + * pseries_eeh_read_config - Read PCI config space 508 + * @dn: device node 509 + * @where: PCI address 510 + * @size: size to read 511 + * @val: return value 512 + * 513 + * Read config space from the speicifed device 514 + */ 515 + static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val) 516 + { 517 + struct pci_dn *pdn; 518 + 519 + pdn = PCI_DN(dn); 520 + 521 + return rtas_read_config(pdn, where, size, val); 522 + } 523 + 524 + /** 525 + * pseries_eeh_write_config - Write PCI config space 526 + * @dn: device node 527 + * @where: PCI address 528 + * @size: size to write 529 + * @val: value to be written 530 + * 531 + * Write config space to the specified device 532 + */ 533 + static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val) 534 + { 535 + struct pci_dn *pdn; 536 + 537 + pdn = PCI_DN(dn); 538 + 539 + return rtas_write_config(pdn, where, size, val); 540 + } 541 + 542 + static struct eeh_ops pseries_eeh_ops = { 543 + .name = "pseries", 544 + .init = pseries_eeh_init, 545 + .set_option = pseries_eeh_set_option, 546 + .get_pe_addr = pseries_eeh_get_pe_addr, 547 + .get_state = pseries_eeh_get_state, 548 + .reset = pseries_eeh_reset, 549 + .wait_state = pseries_eeh_wait_state, 550 + .get_log = pseries_eeh_get_log, 551 + .configure_bridge = pseries_eeh_configure_bridge, 552 + .read_config = pseries_eeh_read_config, 553 + .write_config = pseries_eeh_write_config 554 + }; 555 + 556 + /** 557 + * eeh_pseries_init - Register platform dependent EEH operations 558 + * 559 + * EEH initialization on pseries platform. This function should be 560 + * called before any EEH related functions. 561 + */ 562 + int __init eeh_pseries_init(void) 563 + { 564 + return eeh_ops_register(&pseries_eeh_ops); 565 + }

+11 -14

arch/powerpc/platforms/pseries/eeh_sysfs.c

··· 28 28 #include <asm/pci-bridge.h> 29 29 30 30 /** 31 - * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic 31 + * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic 32 32 * @_name: name of file in sysfs directory 33 33 * @_memb: name of member in struct pci_dn to access 34 34 * @_format: printf format for display ··· 41 41 struct device_attribute *attr, char *buf) \ 42 42 { \ 43 43 struct pci_dev *pdev = to_pci_dev(dev); \ 44 - struct device_node *dn = pci_device_to_OF_node(pdev); \ 45 - struct pci_dn *pdn; \ 44 + struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev); \ 46 45 \ 47 - if (!dn || PCI_DN(dn) == NULL) \ 48 - return 0; \ 46 + if (!edev) \ 47 + return 0; \ 49 48 \ 50 - pdn = PCI_DN(dn); \ 51 - return sprintf(buf, _format "\n", pdn->_memb); \ 49 + return sprintf(buf, _format "\n", edev->_memb); \ 52 50 } \ 53 51 static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL); 54 52 55 - 56 - EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x"); 57 - EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x"); 58 - EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x"); 59 - EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d"); 60 - EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d"); 61 - EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d"); 53 + EEH_SHOW_ATTR(eeh_mode, mode, "0x%x"); 54 + EEH_SHOW_ATTR(eeh_config_addr, config_addr, "0x%x"); 55 + EEH_SHOW_ATTR(eeh_pe_config_addr, pe_config_addr, "0x%x"); 56 + EEH_SHOW_ATTR(eeh_check_count, check_count, "%d" ); 57 + EEH_SHOW_ATTR(eeh_freeze_count, freeze_count, "%d" ); 58 + EEH_SHOW_ATTR(eeh_false_positives, false_positives, "%d" ); 62 59 63 60 void eeh_sysfs_add_device(struct pci_dev *pdev) 64 61 {

+1 -1

arch/powerpc/platforms/pseries/msi.c

··· 217 217 if (!dn) 218 218 return NULL; 219 219 220 - dn = find_device_pe(dn); 220 + dn = eeh_find_device_pe(dn); 221 221 if (!dn) 222 222 return NULL; 223 223

+3

arch/powerpc/platforms/pseries/pci_dlpar.c

··· 147 147 148 148 pci_devs_phb_init_dynamic(phb); 149 149 150 + /* Create EEH devices for the PHB */ 151 + eeh_dev_phb_init_dynamic(phb); 152 + 150 153 if (dn->child) 151 154 eeh_add_device_tree_early(dn); 152 155

+6 -1

arch/powerpc/platforms/pseries/setup.c

··· 260 260 switch (action) { 261 261 case PSERIES_RECONFIG_ADD: 262 262 pci = np->parent->data; 263 - if (pci) 263 + if (pci) { 264 264 update_dn_pci_info(np, pci->phb); 265 + 266 + /* Create EEH device for the OF node */ 267 + eeh_dev_init(np, pci->phb); 268 + } 265 269 break; 266 270 default: 267 271 err = NOTIFY_DONE; ··· 385 381 386 382 /* Find and initialize PCI host bridges */ 387 383 init_pci_config_tokens(); 384 + eeh_pseries_init(); 388 385 find_and_init_phbs(); 389 386 pSeries_reconfig_notifier_register(&pci_dn_reconfig_nb); 390 387 eeh_init();

+10

include/linux/of.h

··· 58 58 struct kref kref; 59 59 unsigned long _flags; 60 60 void *data; 61 + #if defined(CONFIG_EEH) 62 + struct eeh_dev *edev; 63 + #endif 61 64 #if defined(CONFIG_SPARC) 62 65 char *path_component_name; 63 66 unsigned int unique_id; ··· 74 71 int args_count; 75 72 uint32_t args[MAX_PHANDLE_ARGS]; 76 73 }; 74 + 75 + #if defined(CONFIG_EEH) 76 + static inline struct eeh_dev *of_node_to_eeh_dev(struct device_node *dn) 77 + { 78 + return dn->edev; 79 + } 80 + #endif 77 81 78 82 #if defined(CONFIG_SPARC) || !defined(CONFIG_OF) 79 83 /* Dummy ref counting routines - to be implemented later */

+7

include/linux/pci.h

··· 1647 1647 static inline void pci_release_bus_of_node(struct pci_bus *bus) { } 1648 1648 #endif /* CONFIG_OF */ 1649 1649 1650 + #ifdef CONFIG_EEH 1651 + static inline struct eeh_dev *pci_dev_to_eeh_dev(struct pci_dev *pdev) 1652 + { 1653 + return pdev->dev.archdata.edev; 1654 + } 1655 + #endif 1656 + 1650 1657 /** 1651 1658 * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device 1652 1659 * @pdev: the PCI device