···244244extern long mol_trampoline;245245EXPORT_SYMBOL(mol_trampoline); /* For MOL */246246EXPORT_SYMBOL(flush_hash_pages); /* For MOL */247247-EXPORT_SYMBOL_GPL(__handle_mm_fault); /* For MOL */248247#ifdef CONFIG_SMP249248extern int mmu_hash_lock;250249EXPORT_SYMBOL(mmu_hash_lock); /* For MOL */
+1-1
arch/powerpc/kernel/rtas_pci.c
···7272 return 0;7373}74747575-static int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)7575+int rtas_read_config(struct pci_dn *pdn, int where, int size, u32 *val)7676{7777 int returnval = -1;7878 unsigned long buid, addr;
-2
arch/powerpc/kernel/setup_32.c
···299299 if (ppc_md.init_early)300300 ppc_md.init_early();301301302302-#ifdef CONFIG_SERIAL_8250303302 find_legacy_serial_ports();304304-#endif305303 finish_device_tree();306304307305 smp_setup_cpu_maps();
-2
arch/powerpc/kernel/setup_64.c
···472472 * hash table management for us, thus ioremap works. We do that early473473 * so that further code can be debugged474474 */475475-#ifdef CONFIG_SERIAL_8250476475 find_legacy_serial_ports();477477-#endif478476479477 /*480478 * "Finish" the device-tree, that is do the actual parsing of
-24
arch/powerpc/kernel/sys_ppc32.c
···552552 return ret;553553}554554555555-asmlinkage int compat_sys_pciconfig_read(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)556556-{557557- return sys_pciconfig_read((unsigned long) bus,558558- (unsigned long) dfn,559559- (unsigned long) off,560560- (unsigned long) len,561561- compat_ptr(ubuf));562562-}563563-564564-asmlinkage int compat_sys_pciconfig_write(u32 bus, u32 dfn, u32 off, u32 len, u32 ubuf)565565-{566566- return sys_pciconfig_write((unsigned long) bus,567567- (unsigned long) dfn,568568- (unsigned long) off,569569- (unsigned long) len,570570- compat_ptr(ubuf));571571-}572572-573573-asmlinkage int compat_sys_pciconfig_iobase(u32 which, u32 in_bus, u32 in_devfn)574574-{575575- return sys_pciconfig_iobase(which, in_bus, in_devfn);576576-}577577-578578-579555/* Note: it is necessary to treat mode as an unsigned int,580556 * with the corresponding cast to a signed int to insure that the 581557 * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode)
···7676 */7777#define EEH_MAX_FAILS 10000078787979-/* Misc forward declaraions */8080-static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn);8181-8279/* RTAS tokens */8380static int ibm_set_eeh_option;8481static int ibm_set_slot_reset;8582static int ibm_read_slot_reset_state;8683static int ibm_read_slot_reset_state2;8784static int ibm_slot_error_detail;8585+static int ibm_get_config_addr_info;8686+static int ibm_configure_bridge;88878988int eeh_subsystem_enabled;9089EXPORT_SYMBOL(eeh_subsystem_enabled);···9798static int eeh_error_buf_size;989999100/* System monitoring statistics */100100-static DEFINE_PER_CPU(unsigned long, no_device);101101-static DEFINE_PER_CPU(unsigned long, no_dn);102102-static DEFINE_PER_CPU(unsigned long, no_cfg_addr);103103-static DEFINE_PER_CPU(unsigned long, ignored_check);104104-static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);105105-static DEFINE_PER_CPU(unsigned long, false_positives);106106-static DEFINE_PER_CPU(unsigned long, ignored_failures);107107-static DEFINE_PER_CPU(unsigned long, slot_resets);101101+static unsigned long no_device;102102+static unsigned long no_dn;103103+static unsigned long no_cfg_addr;104104+static unsigned long ignored_check;105105+static unsigned long total_mmio_ffs;106106+static unsigned long false_positives;107107+static unsigned long ignored_failures;108108+static unsigned long slot_resets;108109109109-/**110110- * The pci address cache subsystem. This subsystem places111111- * PCI device address resources into a red-black tree, sorted112112- * according to the address range, so that given only an i/o113113- * address, the corresponding PCI device can be **quickly**114114- * found. It is safe to perform an address lookup in an interrupt115115- * context; this ability is an important feature.116116- *117117- * Currently, the only customer of this code is the EEH subsystem;118118- * thus, this code has been somewhat tailored to suit EEH better.119119- * In particular, the cache does *not* hold the addresses of devices120120- * for which EEH is not enabled.121121- *122122- * (Implementation Note: The RB tree seems to be better/faster123123- * than any hash algo I could think of for this problem, even124124- * with the penalty of slow pointer chases for d-cache misses).125125- */126126-struct pci_io_addr_range127127-{128128- struct rb_node rb_node;129129- unsigned long addr_lo;130130- unsigned long addr_hi;131131- struct pci_dev *pcidev;132132- unsigned int flags;133133-};134134-135135-static struct pci_io_addr_cache136136-{137137- struct rb_root rb_root;138138- spinlock_t piar_lock;139139-} pci_io_addr_cache_root;140140-141141-static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)142142-{143143- struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;144144-145145- while (n) {146146- struct pci_io_addr_range *piar;147147- piar = rb_entry(n, struct pci_io_addr_range, rb_node);148148-149149- if (addr < piar->addr_lo) {150150- n = n->rb_left;151151- } else {152152- if (addr > piar->addr_hi) {153153- n = n->rb_right;154154- } else {155155- pci_dev_get(piar->pcidev);156156- return piar->pcidev;157157- }158158- }159159- }160160-161161- return NULL;162162-}163163-164164-/**165165- * pci_get_device_by_addr - Get device, given only address166166- * @addr: mmio (PIO) phys address or i/o port number167167- *168168- * Given an mmio phys address, or a port number, find a pci device169169- * that implements this address. Be sure to pci_dev_put the device170170- * when finished. I/O port numbers are assumed to be offset171171- * from zero (that is, they do *not* have pci_io_addr added in).172172- * It is safe to call this function within an interrupt.173173- */174174-static struct pci_dev *pci_get_device_by_addr(unsigned long addr)175175-{176176- struct pci_dev *dev;177177- unsigned long flags;178178-179179- spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);180180- dev = __pci_get_device_by_addr(addr);181181- spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);182182- return dev;183183-}184184-185185-#ifdef DEBUG186186-/*187187- * Handy-dandy debug print routine, does nothing more188188- * than print out the contents of our addr cache.189189- */190190-static void pci_addr_cache_print(struct pci_io_addr_cache *cache)191191-{192192- struct rb_node *n;193193- int cnt = 0;194194-195195- n = rb_first(&cache->rb_root);196196- while (n) {197197- struct pci_io_addr_range *piar;198198- piar = rb_entry(n, struct pci_io_addr_range, rb_node);199199- printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",200200- (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,201201- piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));202202- cnt++;203203- n = rb_next(n);204204- }205205-}206206-#endif207207-208208-/* Insert address range into the rb tree. */209209-static struct pci_io_addr_range *210210-pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,211211- unsigned long ahi, unsigned int flags)212212-{213213- struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;214214- struct rb_node *parent = NULL;215215- struct pci_io_addr_range *piar;216216-217217- /* Walk tree, find a place to insert into tree */218218- while (*p) {219219- parent = *p;220220- piar = rb_entry(parent, struct pci_io_addr_range, rb_node);221221- if (ahi < piar->addr_lo) {222222- p = &parent->rb_left;223223- } else if (alo > piar->addr_hi) {224224- p = &parent->rb_right;225225- } else {226226- if (dev != piar->pcidev ||227227- alo != piar->addr_lo || ahi != piar->addr_hi) {228228- printk(KERN_WARNING "PIAR: overlapping address range\n");229229- }230230- return piar;231231- }232232- }233233- piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);234234- if (!piar)235235- return NULL;236236-237237- piar->addr_lo = alo;238238- piar->addr_hi = ahi;239239- piar->pcidev = dev;240240- piar->flags = flags;241241-242242-#ifdef DEBUG243243- printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",244244- alo, ahi, pci_name (dev));245245-#endif246246-247247- rb_link_node(&piar->rb_node, parent, p);248248- rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);249249-250250- return piar;251251-}252252-253253-static void __pci_addr_cache_insert_device(struct pci_dev *dev)254254-{255255- struct device_node *dn;256256- struct pci_dn *pdn;257257- int i;258258- int inserted = 0;259259-260260- dn = pci_device_to_OF_node(dev);261261- if (!dn) {262262- printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));263263- return;264264- }265265-266266- /* Skip any devices for which EEH is not enabled. */267267- pdn = PCI_DN(dn);268268- if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||269269- pdn->eeh_mode & EEH_MODE_NOCHECK) {270270-#ifdef DEBUG271271- printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",272272- pci_name(dev), pdn->node->full_name);273273-#endif274274- return;275275- }276276-277277- /* The cache holds a reference to the device... */278278- pci_dev_get(dev);279279-280280- /* Walk resources on this device, poke them into the tree */281281- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {282282- unsigned long start = pci_resource_start(dev,i);283283- unsigned long end = pci_resource_end(dev,i);284284- unsigned int flags = pci_resource_flags(dev,i);285285-286286- /* We are interested only bus addresses, not dma or other stuff */287287- if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))288288- continue;289289- if (start == 0 || ~start == 0 || end == 0 || ~end == 0)290290- continue;291291- pci_addr_cache_insert(dev, start, end, flags);292292- inserted = 1;293293- }294294-295295- /* If there was nothing to add, the cache has no reference... */296296- if (!inserted)297297- pci_dev_put(dev);298298-}299299-300300-/**301301- * pci_addr_cache_insert_device - Add a device to the address cache302302- * @dev: PCI device whose I/O addresses we are interested in.303303- *304304- * In order to support the fast lookup of devices based on addresses,305305- * we maintain a cache of devices that can be quickly searched.306306- * This routine adds a device to that cache.307307- */308308-static void pci_addr_cache_insert_device(struct pci_dev *dev)309309-{310310- unsigned long flags;311311-312312- spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);313313- __pci_addr_cache_insert_device(dev);314314- spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);315315-}316316-317317-static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)318318-{319319- struct rb_node *n;320320- int removed = 0;321321-322322-restart:323323- n = rb_first(&pci_io_addr_cache_root.rb_root);324324- while (n) {325325- struct pci_io_addr_range *piar;326326- piar = rb_entry(n, struct pci_io_addr_range, rb_node);327327-328328- if (piar->pcidev == dev) {329329- rb_erase(n, &pci_io_addr_cache_root.rb_root);330330- removed = 1;331331- kfree(piar);332332- goto restart;333333- }334334- n = rb_next(n);335335- }336336-337337- /* The cache no longer holds its reference to this device... */338338- if (removed)339339- pci_dev_put(dev);340340-}341341-342342-/**343343- * pci_addr_cache_remove_device - remove pci device from addr cache344344- * @dev: device to remove345345- *346346- * Remove a device from the addr-cache tree.347347- * This is potentially expensive, since it will walk348348- * the tree multiple times (once per resource).349349- * But so what; device removal doesn't need to be that fast.350350- */351351-static void pci_addr_cache_remove_device(struct pci_dev *dev)352352-{353353- unsigned long flags;354354-355355- spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);356356- __pci_addr_cache_remove_device(dev);357357- spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);358358-}359359-360360-/**361361- * pci_addr_cache_build - Build a cache of I/O addresses362362- *363363- * Build a cache of pci i/o addresses. This cache will be used to364364- * find the pci device that corresponds to a given address.365365- * This routine scans all pci busses to build the cache.366366- * Must be run late in boot process, after the pci controllers367367- * have been scaned for devices (after all device resources are known).368368- */369369-void __init pci_addr_cache_build(void)370370-{371371- struct device_node *dn;372372- struct pci_dev *dev = NULL;373373-374374- if (!eeh_subsystem_enabled)375375- return;376376-377377- spin_lock_init(&pci_io_addr_cache_root.piar_lock);378378-379379- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {380380- /* Ignore PCI bridges ( XXX why ??) */381381- if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {382382- continue;383383- }384384- pci_addr_cache_insert_device(dev);385385-386386- /* Save the BAR's; firmware doesn't restore these after EEH reset */387387- dn = pci_device_to_OF_node(dev);388388- eeh_save_bars(dev, PCI_DN(dn));389389- }390390-391391-#ifdef DEBUG392392- /* Verify tree built up above, echo back the list of addrs. */393393- pci_addr_cache_print(&pci_io_addr_cache_root);394394-#endif395395-}110110+#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)396111397112/* --------------------------------------------------------------- */398398-/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */113113+/* Below lies the EEH event infrastructure */399114400115void eeh_slot_error_detail (struct pci_dn *pdn, int severity)401116{117117+ int config_addr;402118 unsigned long flags;403119 int rc;404120···121407 spin_lock_irqsave(&slot_errbuf_lock, flags);122408 memset(slot_errbuf, 0, eeh_error_buf_size);123409410410+ /* Use PE configuration address, if present */411411+ config_addr = pdn->eeh_config_addr;412412+ if (pdn->eeh_pe_config_addr)413413+ config_addr = pdn->eeh_pe_config_addr;414414+124415 rc = rtas_call(ibm_slot_error_detail,125125- 8, 1, NULL, pdn->eeh_config_addr,416416+ 8, 1, NULL, config_addr,126417 BUID_HI(pdn->phb->buid),127418 BUID_LO(pdn->phb->buid), NULL, 0,128419 virt_to_phys(slot_errbuf),···147428static int read_slot_reset_state(struct pci_dn *pdn, int rets[])148429{149430 int token, outputs;431431+ int config_addr;150432151433 if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {152434 token = ibm_read_slot_reset_state2;···158438 outputs = 3;159439 }160440161161- return rtas_call(token, 3, outputs, rets, pdn->eeh_config_addr,441441+ /* Use PE configuration address, if present */442442+ config_addr = pdn->eeh_config_addr;443443+ if (pdn->eeh_pe_config_addr)444444+ config_addr = pdn->eeh_pe_config_addr;445445+446446+ return rtas_call(token, 3, outputs, rets, config_addr,162447 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid));163448}164449···187462/** 188463 * Return the "partitionable endpoint" (pe) under which this device lies189464 */190190-static struct device_node * find_device_pe(struct device_node *dn)465465+struct device_node * find_device_pe(struct device_node *dn)191466{192467 while ((dn->parent) && PCI_DN(dn->parent) &&193468 (PCI_DN(dn->parent)->eeh_mode & EEH_MODE_SUPPORTED)) {···210485 if (PCI_DN(dn)) {211486 PCI_DN(dn)->eeh_mode |= mode_flag;212487488488+ /* Mark the pci device driver too */489489+ struct pci_dev *dev = PCI_DN(dn)->pcidev;490490+ if (dev && dev->driver)491491+ dev->error_state = pci_channel_io_frozen;492492+213493 if (dn->child)214494 __eeh_mark_slot (dn->child, mode_flag);215495 }···225495void eeh_mark_slot (struct device_node *dn, int mode_flag)226496{227497 dn = find_device_pe (dn);498498+499499+ /* Back up one, since config addrs might be shared */500500+ if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)501501+ dn = dn->parent;502502+228503 PCI_DN(dn)->eeh_mode |= mode_flag;229504 __eeh_mark_slot (dn->child, mode_flag);230505}···251516{252517 unsigned long flags;253518 spin_lock_irqsave(&confirm_error_lock, flags);519519+254520 dn = find_device_pe (dn);521521+522522+ /* Back up one, since config addrs might be shared */523523+ if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr)524524+ dn = dn->parent;525525+255526 PCI_DN(dn)->eeh_mode &= ~mode_flag;256527 PCI_DN(dn)->eeh_check_count = 0;257528 __eeh_clear_slot (dn->child, mode_flag);···285544 int rets[3];286545 unsigned long flags;287546 struct pci_dn *pdn;547547+ enum pci_channel_state state;288548 int rc = 0;289549290290- __get_cpu_var(total_mmio_ffs)++;550550+ total_mmio_ffs++;291551292552 if (!eeh_subsystem_enabled)293553 return 0;294554295555 if (!dn) {296296- __get_cpu_var(no_dn)++;556556+ no_dn++;297557 return 0;298558 }299559 pdn = PCI_DN(dn);···302560 /* Access to IO BARs might get this far and still not want checking. */303561 if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||304562 pdn->eeh_mode & EEH_MODE_NOCHECK) {305305- __get_cpu_var(ignored_check)++;563563+ ignored_check++;306564#ifdef DEBUG307565 printk ("EEH:ignored check (%x) for %s %s\n", 308566 pdn->eeh_mode, pci_name (dev), dn->full_name);···310568 return 0;311569 }312570313313- if (!pdn->eeh_config_addr) {314314- __get_cpu_var(no_cfg_addr)++;571571+ if (!pdn->eeh_config_addr && !pdn->eeh_pe_config_addr) {572572+ no_cfg_addr++;315573 return 0;316574 }317575···353611 if (ret != 0) {354612 printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",355613 ret, dn->full_name);356356- __get_cpu_var(false_positives)++;614614+ false_positives++;357615 rc = 0;358616 goto dn_unlock;359617 }···362620 if (rets[1] != 1) {363621 printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",364622 ret, dn->full_name);365365- __get_cpu_var(false_positives)++;623623+ false_positives++;366624 rc = 0;367625 goto dn_unlock;368626 }369627370628 /* If not the kind of error we know about, punt. */371629 if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {372372- __get_cpu_var(false_positives)++;630630+ false_positives++;373631 rc = 0;374632 goto dn_unlock;375633 }···377635 /* Note that config-io to empty slots may fail;378636 * we recognize empty because they don't have children. */379637 if ((rets[0] == 5) && (dn->child == NULL)) {380380- __get_cpu_var(false_positives)++;638638+ false_positives++;381639 rc = 0;382640 goto dn_unlock;383641 }384642385385- __get_cpu_var(slot_resets)++;643643+ slot_resets++;386644387645 /* Avoid repeated reports of this failure, including problems388646 * with other functions on this device, and functions under···390648 eeh_mark_slot (dn, EEH_MODE_ISOLATED);391649 spin_unlock_irqrestore(&confirm_error_lock, flags);392650393393- eeh_send_failure_event (dn, dev, rets[0], rets[2]);394394-651651+ state = pci_channel_io_normal;652652+ if ((rets[0] == 2) || (rets[0] == 4))653653+ state = pci_channel_io_frozen;654654+ if (rets[0] == 5)655655+ state = pci_channel_io_perm_failure;656656+ eeh_send_failure_event (dn, dev, state, rets[2]);657657+395658 /* Most EEH events are due to device driver bugs. Having396659 * a stack trace will help the device-driver authors figure397660 * out what happened. So print that out. */···432685 addr = eeh_token_to_phys((unsigned long __force) token);433686 dev = pci_get_device_by_addr(addr);434687 if (!dev) {435435- __get_cpu_var(no_device)++;688688+ no_device++;436689 return val;437690 }438691···463716 if (rc) return rc;464717465718 if (rets[1] == 0) return -1; /* EEH is not supported */466466- if (rets[0] == 0) return 0; /* Oll Korrect */719719+ if (rets[0] == 0) return 0; /* Oll Korrect */467720 if (rets[0] == 5) {468721 if (rets[2] == 0) return -1; /* permanently unavailable */469722 return rets[2]; /* number of millisecs to wait */470723 }724724+ if (rets[0] == 1)725725+ return 250;726726+727727+ printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n",728728+ rc, rets[0], rets[1], rets[2]);471729 return -1;472730}473731···489737static void490738rtas_pci_slot_reset(struct pci_dn *pdn, int state)491739{740740+ int config_addr;492741 int rc;493742494743 BUG_ON (pdn==NULL); ···500747 return;501748 }502749750750+ /* Use PE configuration address, if present */751751+ config_addr = pdn->eeh_config_addr;752752+ if (pdn->eeh_pe_config_addr)753753+ config_addr = pdn->eeh_pe_config_addr;754754+503755 rc = rtas_call(ibm_set_slot_reset,4,1, NULL,504504- pdn->eeh_config_addr,756756+ config_addr,505757 BUID_HI(pdn->phb->buid),506758 BUID_LO(pdn->phb->buid),507759 state);···519761520762/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second521763 * dn -- device node to be reset.764764+ *765765+ * Return 0 if success, else a non-zero value.522766 */523767524524-void768768+int525769rtas_set_slot_reset(struct pci_dn *pdn)526770{527771 int i, rc;···553793 * ready to be used; if not, wait for recovery. */554794 for (i=0; i<10; i++) {555795 rc = eeh_slot_availability (pdn);556556- if (rc <= 0) break;796796+ if (rc < 0)797797+ printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", rc, pdn->node->full_name);798798+ if (rc == 0)799799+ return 0;800800+ if (rc < 0)801801+ return -1;557802558803 msleep (rc+100);559804 }805805+806806+ rc = eeh_slot_availability (pdn);807807+ if (rc)808808+ printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name);809809+810810+ return rc;560811}561812562813/* ------------------------------------------------------- */···622851 if (!pdn) 623852 return;624853625625- if (! pdn->eeh_is_bridge)854854+ if ((pdn->eeh_mode & EEH_MODE_SUPPORTED) && !IS_BRIDGE(pdn->class_code))626855 __restore_bars (pdn);627856628857 dn = pdn->node->child;···640869 * PCI devices are added individuallly; but, for the restore,641870 * an entire slot is reset at a time.642871 */643643-static void eeh_save_bars(struct pci_dev * pdev, struct pci_dn *pdn)872872+static void eeh_save_bars(struct pci_dn *pdn)644873{645874 int i;646875647647- if (!pdev || !pdn )876876+ if (!pdn )648877 return;649878650879 for (i = 0; i < 16; i++)651651- pci_read_config_dword(pdev, i * 4, &pdn->config_space[i]);652652-653653- if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE)654654- pdn->eeh_is_bridge = 1;880880+ rtas_read_config(pdn, i * 4, 4, &pdn->config_space[i]);655881}656882657883void658884rtas_configure_bridge(struct pci_dn *pdn)659885{660660- int token = rtas_token ("ibm,configure-bridge");886886+ int config_addr;661887 int rc;662888663663- if (token == RTAS_UNKNOWN_SERVICE)664664- return;665665- rc = rtas_call(token,3,1, NULL,666666- pdn->eeh_config_addr,889889+ /* Use PE configuration address, if present */890890+ config_addr = pdn->eeh_config_addr;891891+ if (pdn->eeh_pe_config_addr)892892+ config_addr = pdn->eeh_pe_config_addr;893893+894894+ rc = rtas_call(ibm_configure_bridge,3,1, NULL,895895+ config_addr,667896 BUID_HI(pdn->phb->buid),668897 BUID_LO(pdn->phb->buid));669898 if (rc) {···698927 int enable;699928 struct pci_dn *pdn = PCI_DN(dn);700929930930+ pdn->class_code = 0;701931 pdn->eeh_mode = 0;702932 pdn->eeh_check_count = 0;703933 pdn->eeh_freeze_count = 0;···715943 pdn->eeh_mode |= EEH_MODE_NOCHECK;716944 return NULL;717945 }946946+ pdn->class_code = *class_code;718947719948 /*720949 * Now decide if we are going to "Disable" EEH checking···726953 * But there are a few cases like display devices that make sense.727954 */728955 enable = 1; /* i.e. we will do checking */956956+#if 0729957 if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)730958 enable = 0;959959+#endif731960732961 if (!enable)733962 pdn->eeh_mode |= EEH_MODE_NOCHECK;···748973 eeh_subsystem_enabled = 1;749974 pdn->eeh_mode |= EEH_MODE_SUPPORTED;750975 pdn->eeh_config_addr = regs[0];976976+977977+ /* If the newer, better, ibm,get-config-addr-info is supported, 978978+ * then use that instead. */979979+ pdn->eeh_pe_config_addr = 0;980980+ if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {981981+ unsigned int rets[2];982982+ ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, 983983+ pdn->eeh_config_addr, 984984+ info->buid_hi, info->buid_lo,985985+ 0);986986+ if (ret == 0)987987+ pdn->eeh_pe_config_addr = rets[0];988988+ }751989#ifdef DEBUG752752- printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);990990+ printk(KERN_DEBUG "EEH: %s: eeh enabled, config=%x pe_config=%x\n",991991+ dn->full_name, pdn->eeh_config_addr, pdn->eeh_pe_config_addr);753992#endif754993 } else {755994···782993 dn->full_name);783994 }784995996996+ eeh_save_bars(pdn);785997 return NULL;786998}787999···8161026 ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");8171027 ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");8181028 ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");10291029+ ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");10301030+ ibm_configure_bridge = rtas_token ("ibm,configure-bridge");81910318201032 if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)8211033 return;···8721080 if (!dn || !PCI_DN(dn))8731081 return;8741082 phb = PCI_DN(dn)->phb;875875- if (NULL == phb || 0 == phb->buid) {876876- printk(KERN_WARNING "EEH: Expected buid but found none for %s\n",877877- dn->full_name);878878- dump_stack();10831083+10841084+ /* USB Bus children of PCI devices will not have BUID's */10851085+ if (NULL == phb || 0 == phb->buid)8791086 return;880880- }88110878821088 info.buid_hi = BUID_HI(phb->buid);8831089 info.buid_lo = BUID_LO(phb->buid);···9171127 pdn->pcidev = dev;91811289191129 pci_addr_cache_insert_device (dev);920920- eeh_save_bars(dev, pdn);9211130}9221131EXPORT_SYMBOL_GPL(eeh_add_device_late);9231132···96411759651176static int proc_eeh_show(struct seq_file *m, void *v)9661177{967967- unsigned int cpu;968968- unsigned long ffs = 0, positives = 0, failures = 0;969969- unsigned long resets = 0;970970- unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;971971-972972- for_each_cpu(cpu) {973973- ffs += per_cpu(total_mmio_ffs, cpu);974974- positives += per_cpu(false_positives, cpu);975975- failures += per_cpu(ignored_failures, cpu);976976- resets += per_cpu(slot_resets, cpu);977977- no_dev += per_cpu(no_device, cpu);978978- no_dn += per_cpu(no_dn, cpu);979979- no_cfg += per_cpu(no_cfg_addr, cpu);980980- no_check += per_cpu(ignored_check, cpu);981981- }982982-9831178 if (0 == eeh_subsystem_enabled) {9841179 seq_printf(m, "EEH Subsystem is globally disabled\n");985985- seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);11801180+ seq_printf(m, "eeh_total_mmio_ffs=%ld\n", total_mmio_ffs);9861181 } else {9871182 seq_printf(m, "EEH Subsystem is enabled\n");9881183 seq_printf(m,···9781205 "eeh_false_positives=%ld\n"9791206 "eeh_ignored_failures=%ld\n"9801207 "eeh_slot_resets=%ld\n",981981- no_dev, no_dn, no_cfg, no_check,982982- ffs, positives, failures, resets);12081208+ no_device, no_dn, no_cfg_addr, 12091209+ ignored_check, total_mmio_ffs, 12101210+ false_positives, ignored_failures, 12111211+ slot_resets);9831212 }98412139851214 return 0;
+316
arch/powerpc/platforms/pseries/eeh_cache.c
···11+/*22+ * eeh_cache.c33+ * PCI address cache; allows the lookup of PCI devices based on I/O address44+ *55+ * Copyright (C) 2004 Linas Vepstas <linas@austin.ibm.com> IBM Corporation66+ *77+ * This program is free software; you can redistribute it and/or modify88+ * it under the terms of the GNU General Public License as published by99+ * the Free Software Foundation; either version 2 of the License, or1010+ * (at your option) any later version.1111+ *1212+ * This program is distributed in the hope that it will be useful,1313+ * but WITHOUT ANY WARRANTY; without even the implied warranty of1414+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the1515+ * GNU General Public License for more details.1616+ *1717+ * You should have received a copy of the GNU General Public License1818+ * along with this program; if not, write to the Free Software1919+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA2020+ */2121+2222+#include <linux/list.h>2323+#include <linux/pci.h>2424+#include <linux/rbtree.h>2525+#include <linux/spinlock.h>2626+#include <asm/atomic.h>2727+#include <asm/pci-bridge.h>2828+#include <asm/ppc-pci.h>2929+3030+#undef DEBUG3131+3232+/**3333+ * The pci address cache subsystem. This subsystem places3434+ * PCI device address resources into a red-black tree, sorted3535+ * according to the address range, so that given only an i/o3636+ * address, the corresponding PCI device can be **quickly**3737+ * found. It is safe to perform an address lookup in an interrupt3838+ * context; this ability is an important feature.3939+ *4040+ * Currently, the only customer of this code is the EEH subsystem;4141+ * thus, this code has been somewhat tailored to suit EEH better.4242+ * In particular, the cache does *not* hold the addresses of devices4343+ * for which EEH is not enabled.4444+ *4545+ * (Implementation Note: The RB tree seems to be better/faster4646+ * than any hash algo I could think of for this problem, even4747+ * with the penalty of slow pointer chases for d-cache misses).4848+ */4949+struct pci_io_addr_range5050+{5151+ struct rb_node rb_node;5252+ unsigned long addr_lo;5353+ unsigned long addr_hi;5454+ struct pci_dev *pcidev;5555+ unsigned int flags;5656+};5757+5858+static struct pci_io_addr_cache5959+{6060+ struct rb_root rb_root;6161+ spinlock_t piar_lock;6262+} pci_io_addr_cache_root;6363+6464+static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)6565+{6666+ struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;6767+6868+ while (n) {6969+ struct pci_io_addr_range *piar;7070+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);7171+7272+ if (addr < piar->addr_lo) {7373+ n = n->rb_left;7474+ } else {7575+ if (addr > piar->addr_hi) {7676+ n = n->rb_right;7777+ } else {7878+ pci_dev_get(piar->pcidev);7979+ return piar->pcidev;8080+ }8181+ }8282+ }8383+8484+ return NULL;8585+}8686+8787+/**8888+ * pci_get_device_by_addr - Get device, given only address8989+ * @addr: mmio (PIO) phys address or i/o port number9090+ *9191+ * Given an mmio phys address, or a port number, find a pci device9292+ * that implements this address. Be sure to pci_dev_put the device9393+ * when finished. I/O port numbers are assumed to be offset9494+ * from zero (that is, they do *not* have pci_io_addr added in).9595+ * It is safe to call this function within an interrupt.9696+ */9797+struct pci_dev *pci_get_device_by_addr(unsigned long addr)9898+{9999+ struct pci_dev *dev;100100+ unsigned long flags;101101+102102+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);103103+ dev = __pci_get_device_by_addr(addr);104104+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);105105+ return dev;106106+}107107+108108+#ifdef DEBUG109109+/*110110+ * Handy-dandy debug print routine, does nothing more111111+ * than print out the contents of our addr cache.112112+ */113113+static void pci_addr_cache_print(struct pci_io_addr_cache *cache)114114+{115115+ struct rb_node *n;116116+ int cnt = 0;117117+118118+ n = rb_first(&cache->rb_root);119119+ while (n) {120120+ struct pci_io_addr_range *piar;121121+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);122122+ printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n",123123+ (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,124124+ piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));125125+ cnt++;126126+ n = rb_next(n);127127+ }128128+}129129+#endif130130+131131+/* Insert address range into the rb tree. */132132+static struct pci_io_addr_range *133133+pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,134134+ unsigned long ahi, unsigned int flags)135135+{136136+ struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;137137+ struct rb_node *parent = NULL;138138+ struct pci_io_addr_range *piar;139139+140140+ /* Walk tree, find a place to insert into tree */141141+ while (*p) {142142+ parent = *p;143143+ piar = rb_entry(parent, struct pci_io_addr_range, rb_node);144144+ if (ahi < piar->addr_lo) {145145+ p = &parent->rb_left;146146+ } else if (alo > piar->addr_hi) {147147+ p = &parent->rb_right;148148+ } else {149149+ if (dev != piar->pcidev ||150150+ alo != piar->addr_lo || ahi != piar->addr_hi) {151151+ printk(KERN_WARNING "PIAR: overlapping address range\n");152152+ }153153+ return piar;154154+ }155155+ }156156+ piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);157157+ if (!piar)158158+ return NULL;159159+160160+ piar->addr_lo = alo;161161+ piar->addr_hi = ahi;162162+ piar->pcidev = dev;163163+ piar->flags = flags;164164+165165+#ifdef DEBUG166166+ printk(KERN_DEBUG "PIAR: insert range=[%lx:%lx] dev=%s\n",167167+ alo, ahi, pci_name (dev));168168+#endif169169+170170+ rb_link_node(&piar->rb_node, parent, p);171171+ rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);172172+173173+ return piar;174174+}175175+176176+static void __pci_addr_cache_insert_device(struct pci_dev *dev)177177+{178178+ struct device_node *dn;179179+ struct pci_dn *pdn;180180+ int i;181181+ int inserted = 0;182182+183183+ dn = pci_device_to_OF_node(dev);184184+ if (!dn) {185185+ printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", pci_name(dev));186186+ return;187187+ }188188+189189+ /* Skip any devices for which EEH is not enabled. */190190+ pdn = PCI_DN(dn);191191+ if (!(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||192192+ pdn->eeh_mode & EEH_MODE_NOCHECK) {193193+#ifdef DEBUG194194+ printk(KERN_INFO "PCI: skip building address cache for=%s - %s\n",195195+ pci_name(dev), pdn->node->full_name);196196+#endif197197+ return;198198+ }199199+200200+ /* The cache holds a reference to the device... */201201+ pci_dev_get(dev);202202+203203+ /* Walk resources on this device, poke them into the tree */204204+ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {205205+ unsigned long start = pci_resource_start(dev,i);206206+ unsigned long end = pci_resource_end(dev,i);207207+ unsigned int flags = pci_resource_flags(dev,i);208208+209209+ /* We are interested only bus addresses, not dma or other stuff */210210+ if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))211211+ continue;212212+ if (start == 0 || ~start == 0 || end == 0 || ~end == 0)213213+ continue;214214+ pci_addr_cache_insert(dev, start, end, flags);215215+ inserted = 1;216216+ }217217+218218+ /* If there was nothing to add, the cache has no reference... */219219+ if (!inserted)220220+ pci_dev_put(dev);221221+}222222+223223+/**224224+ * pci_addr_cache_insert_device - Add a device to the address cache225225+ * @dev: PCI device whose I/O addresses we are interested in.226226+ *227227+ * In order to support the fast lookup of devices based on addresses,228228+ * we maintain a cache of devices that can be quickly searched.229229+ * This routine adds a device to that cache.230230+ */231231+void pci_addr_cache_insert_device(struct pci_dev *dev)232232+{233233+ unsigned long flags;234234+235235+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);236236+ __pci_addr_cache_insert_device(dev);237237+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);238238+}239239+240240+static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)241241+{242242+ struct rb_node *n;243243+ int removed = 0;244244+245245+restart:246246+ n = rb_first(&pci_io_addr_cache_root.rb_root);247247+ while (n) {248248+ struct pci_io_addr_range *piar;249249+ piar = rb_entry(n, struct pci_io_addr_range, rb_node);250250+251251+ if (piar->pcidev == dev) {252252+ rb_erase(n, &pci_io_addr_cache_root.rb_root);253253+ removed = 1;254254+ kfree(piar);255255+ goto restart;256256+ }257257+ n = rb_next(n);258258+ }259259+260260+ /* The cache no longer holds its reference to this device... */261261+ if (removed)262262+ pci_dev_put(dev);263263+}264264+265265+/**266266+ * pci_addr_cache_remove_device - remove pci device from addr cache267267+ * @dev: device to remove268268+ *269269+ * Remove a device from the addr-cache tree.270270+ * This is potentially expensive, since it will walk271271+ * the tree multiple times (once per resource).272272+ * But so what; device removal doesn't need to be that fast.273273+ */274274+void pci_addr_cache_remove_device(struct pci_dev *dev)275275+{276276+ unsigned long flags;277277+278278+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);279279+ __pci_addr_cache_remove_device(dev);280280+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);281281+}282282+283283+/**284284+ * pci_addr_cache_build - Build a cache of I/O addresses285285+ *286286+ * Build a cache of pci i/o addresses. This cache will be used to287287+ * find the pci device that corresponds to a given address.288288+ * This routine scans all pci busses to build the cache.289289+ * Must be run late in boot process, after the pci controllers290290+ * have been scaned for devices (after all device resources are known).291291+ */292292+void __init pci_addr_cache_build(void)293293+{294294+ struct device_node *dn;295295+ struct pci_dev *dev = NULL;296296+297297+ spin_lock_init(&pci_io_addr_cache_root.piar_lock);298298+299299+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {300300+ /* Ignore PCI bridges */301301+ if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)302302+ continue;303303+304304+ pci_addr_cache_insert_device(dev);305305+306306+ dn = pci_device_to_OF_node(dev);307307+ pci_dev_get (dev); /* matching put is in eeh_remove_device() */308308+ PCI_DN(dn)->pcidev = dev;309309+ }310310+311311+#ifdef DEBUG312312+ /* Verify tree built up above, echo back the list of addrs. */313313+ pci_addr_cache_print(&pci_io_addr_cache_root);314314+#endif315315+}316316+
+376
arch/powerpc/platforms/pseries/eeh_driver.c
···11+/*22+ * PCI Error Recovery Driver for RPA-compliant PPC64 platform.33+ * Copyright (C) 2004, 2005 Linas Vepstas <linas@linas.org>44+ *55+ * All rights reserved.66+ *77+ * This program is free software; you can redistribute it and/or modify88+ * it under the terms of the GNU General Public License as published by99+ * the Free Software Foundation; either version 2 of the License, or (at1010+ * your option) any later version.1111+ *1212+ * This program is distributed in the hope that it will be useful, but1313+ * WITHOUT ANY WARRANTY; without even the implied warranty of1414+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or1515+ * NON INFRINGEMENT. See the GNU General Public License for more1616+ * details.1717+ *1818+ * You should have received a copy of the GNU General Public License1919+ * along with this program; if not, write to the Free Software2020+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.2121+ *2222+ * Send feedback to <linas@us.ibm.com>2323+ *2424+ */2525+#include <linux/delay.h>2626+#include <linux/irq.h>2727+#include <linux/interrupt.h>2828+#include <linux/notifier.h>2929+#include <linux/pci.h>3030+#include <asm/eeh.h>3131+#include <asm/eeh_event.h>3232+#include <asm/ppc-pci.h>3333+#include <asm/pci-bridge.h>3434+#include <asm/prom.h>3535+#include <asm/rtas.h>3636+3737+3838+static inline const char * pcid_name (struct pci_dev *pdev)3939+{4040+ if (pdev->dev.driver)4141+ return pdev->dev.driver->name;4242+ return "";4343+}4444+4545+#ifdef DEBUG4646+static void print_device_node_tree (struct pci_dn *pdn, int dent)4747+{4848+ int i;4949+ if (!pdn) return;5050+ for (i=0;i<dent; i++)5151+ printk(" ");5252+ printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",5353+ pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,5454+ pdn->eeh_pe_config_addr, pdn->node->full_name);5555+ dent += 3;5656+ struct device_node *pc = pdn->node->child;5757+ while (pc) {5858+ print_device_node_tree(PCI_DN(pc), dent);5959+ pc = pc->sibling;6060+ }6161+}6262+#endif6363+6464+/** 6565+ * irq_in_use - return true if this irq is being used 6666+ */6767+static int irq_in_use(unsigned int irq)6868+{6969+ int rc = 0;7070+ unsigned long flags;7171+ struct irq_desc *desc = irq_desc + irq;7272+7373+ spin_lock_irqsave(&desc->lock, flags);7474+ if (desc->action)7575+ rc = 1;7676+ spin_unlock_irqrestore(&desc->lock, flags);7777+ return rc;7878+}7979+8080+/* ------------------------------------------------------- */8181+/** eeh_report_error - report an EEH error to each device,8282+ * collect up and merge the device responses.8383+ */8484+8585+static void eeh_report_error(struct pci_dev *dev, void *userdata)8686+{8787+ enum pci_ers_result rc, *res = userdata;8888+ struct pci_driver *driver = dev->driver;8989+9090+ dev->error_state = pci_channel_io_frozen;9191+9292+ if (!driver)9393+ return;9494+9595+ if (irq_in_use (dev->irq)) {9696+ struct device_node *dn = pci_device_to_OF_node(dev);9797+ PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;9898+ disable_irq_nosync(dev->irq);9999+ }100100+ if (!driver->err_handler)101101+ return;102102+ if (!driver->err_handler->error_detected)103103+ return;104104+105105+ rc = driver->err_handler->error_detected (dev, pci_channel_io_frozen);106106+ if (*res == PCI_ERS_RESULT_NONE) *res = rc;107107+ if (*res == PCI_ERS_RESULT_NEED_RESET) return;108108+ if (*res == PCI_ERS_RESULT_DISCONNECT &&109109+ rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;110110+}111111+112112+/** eeh_report_reset -- tell this device that the pci slot113113+ * has been reset.114114+ */115115+116116+static void eeh_report_reset(struct pci_dev *dev, void *userdata)117117+{118118+ struct pci_driver *driver = dev->driver;119119+ struct device_node *dn = pci_device_to_OF_node(dev);120120+121121+ if (!driver)122122+ return;123123+124124+ if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) {125125+ PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED;126126+ enable_irq(dev->irq);127127+ }128128+ if (!driver->err_handler)129129+ return;130130+ if (!driver->err_handler->slot_reset)131131+ return;132132+133133+ driver->err_handler->slot_reset(dev);134134+}135135+136136+static void eeh_report_resume(struct pci_dev *dev, void *userdata)137137+{138138+ struct pci_driver *driver = dev->driver;139139+140140+ dev->error_state = pci_channel_io_normal;141141+142142+ if (!driver)143143+ return;144144+ if (!driver->err_handler)145145+ return;146146+ if (!driver->err_handler->resume)147147+ return;148148+149149+ driver->err_handler->resume(dev);150150+}151151+152152+static void eeh_report_failure(struct pci_dev *dev, void *userdata)153153+{154154+ struct pci_driver *driver = dev->driver;155155+156156+ dev->error_state = pci_channel_io_perm_failure;157157+158158+ if (!driver)159159+ return;160160+161161+ if (irq_in_use (dev->irq)) {162162+ struct device_node *dn = pci_device_to_OF_node(dev);163163+ PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;164164+ disable_irq_nosync(dev->irq);165165+ }166166+ if (!driver->err_handler)167167+ return;168168+ if (!driver->err_handler->error_detected)169169+ return;170170+ driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);171171+}172172+173173+/* ------------------------------------------------------- */174174+/**175175+ * handle_eeh_events -- reset a PCI device after hard lockup.176176+ *177177+ * pSeries systems will isolate a PCI slot if the PCI-Host178178+ * bridge detects address or data parity errors, DMA's179179+ * occuring to wild addresses (which usually happen due to180180+ * bugs in device drivers or in PCI adapter firmware).181181+ * Slot isolations also occur if #SERR, #PERR or other misc182182+ * PCI-related errors are detected.183183+ *184184+ * Recovery process consists of unplugging the device driver185185+ * (which generated hotplug events to userspace), then issuing186186+ * a PCI #RST to the device, then reconfiguring the PCI config187187+ * space for all bridges & devices under this slot, and then188188+ * finally restarting the device drivers (which cause a second189189+ * set of hotplug events to go out to userspace).190190+ */191191+192192+/**193193+ * eeh_reset_device() -- perform actual reset of a pci slot194194+ * Args: bus: pointer to the pci bus structure corresponding195195+ * to the isolated slot. A non-null value will196196+ * cause all devices under the bus to be removed197197+ * and then re-added.198198+ * pe_dn: pointer to a "Partionable Endpoint" device node.199199+ * This is the top-level structure on which pci200200+ * bus resets can be performed.201201+ */202202+203203+static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)204204+{205205+ int rc;206206+ if (bus)207207+ pcibios_remove_pci_devices(bus);208208+209209+ /* Reset the pci controller. (Asserts RST#; resets config space).210210+ * Reconfigure bridges and devices. Don't try to bring the system211211+ * up if the reset failed for some reason. */212212+ rc = rtas_set_slot_reset(pe_dn);213213+ if (rc)214214+ return rc;215215+216216+ /* New-style config addrs might be shared across multiple devices,217217+ * Walk over all functions on this device */218218+ if (pe_dn->eeh_pe_config_addr) {219219+ struct device_node *pe = pe_dn->node;220220+ pe = pe->parent->child;221221+ while (pe) {222222+ struct pci_dn *ppe = PCI_DN(pe);223223+ if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) {224224+ rtas_configure_bridge(ppe);225225+ eeh_restore_bars(ppe);226226+ }227227+ pe = pe->sibling;228228+ }229229+ } else {230230+ rtas_configure_bridge(pe_dn);231231+ eeh_restore_bars(pe_dn);232232+ }233233+234234+ /* Give the system 5 seconds to finish running the user-space235235+ * hotplug shutdown scripts, e.g. ifdown for ethernet. Yes, 236236+ * this is a hack, but if we don't do this, and try to bring 237237+ * the device up before the scripts have taken it down, 238238+ * potentially weird things happen.239239+ */240240+ if (bus) {241241+ ssleep (5);242242+ pcibios_add_pci_devices(bus);243243+ }244244+245245+ return 0;246246+}247247+248248+/* The longest amount of time to wait for a pci device249249+ * to come back on line, in seconds.250250+ */251251+#define MAX_WAIT_FOR_RECOVERY 15252252+253253+void handle_eeh_events (struct eeh_event *event)254254+{255255+ struct device_node *frozen_dn;256256+ struct pci_dn *frozen_pdn;257257+ struct pci_bus *frozen_bus;258258+ int rc = 0;259259+ enum pci_ers_result result = PCI_ERS_RESULT_NONE;260260+261261+ frozen_dn = find_device_pe(event->dn);262262+ frozen_bus = pcibios_find_pci_bus(frozen_dn);263263+264264+ if (!frozen_dn) {265265+ printk(KERN_ERR "EEH: Error: Cannot find partition endpoint for %s\n",266266+ pci_name(event->dev));267267+ return;268268+ }269269+270270+ /* There are two different styles for coming up with the PE.271271+ * In the old style, it was the highest EEH-capable device272272+ * which was always an EADS pci bridge. In the new style,273273+ * there might not be any EADS bridges, and even when there are,274274+ * the firmware marks them as "EEH incapable". So another275275+ * two-step is needed to find the pci bus.. */276276+ if (!frozen_bus)277277+ frozen_bus = pcibios_find_pci_bus (frozen_dn->parent);278278+279279+ if (!frozen_bus) {280280+ printk(KERN_ERR "EEH: Cannot find PCI bus for %s\n",281281+ frozen_dn->full_name);282282+ return;283283+ }284284+285285+#if 0286286+ /* We may get "permanent failure" messages on empty slots.287287+ * These are false alarms. Empty slots have no child dn. */288288+ if ((event->state == pci_channel_io_perm_failure) && (frozen_device == NULL))289289+ return;290290+#endif291291+292292+ frozen_pdn = PCI_DN(frozen_dn);293293+ frozen_pdn->eeh_freeze_count++;294294+295295+ if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)296296+ goto hard_fail;297297+298298+ /* If the reset state is a '5' and the time to reset is 0 (infinity)299299+ * or is more then 15 seconds, then mark this as a permanent failure.300300+ */301301+ if ((event->state == pci_channel_io_perm_failure) &&302302+ ((event->time_unavail <= 0) ||303303+ (event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000)))304304+ goto hard_fail;305305+306306+ eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);307307+ printk(KERN_WARNING308308+ "EEH: This PCI device has failed %d times since last reboot: %s - %s\n",309309+ frozen_pdn->eeh_freeze_count,310310+ pci_name (frozen_pdn->pcidev), 311311+ pcid_name(frozen_pdn->pcidev));312312+313313+ /* Walk the various device drivers attached to this slot through314314+ * a reset sequence, giving each an opportunity to do what it needs315315+ * to accomplish the reset. Each child gets a report of the316316+ * status ... if any child can't handle the reset, then the entire317317+ * slot is dlpar removed and added.318318+ */319319+ pci_walk_bus(frozen_bus, eeh_report_error, &result);320320+321321+ /* If all device drivers were EEH-unaware, then shut322322+ * down all of the device drivers, and hope they323323+ * go down willingly, without panicing the system.324324+ */325325+ if (result == PCI_ERS_RESULT_NONE) {326326+ rc = eeh_reset_device(frozen_pdn, frozen_bus);327327+ if (rc)328328+ goto hard_fail;329329+ }330330+331331+ /* If any device called out for a reset, then reset the slot */332332+ if (result == PCI_ERS_RESULT_NEED_RESET) {333333+ rc = eeh_reset_device(frozen_pdn, NULL);334334+ if (rc)335335+ goto hard_fail;336336+ pci_walk_bus(frozen_bus, eeh_report_reset, 0);337337+ }338338+339339+ /* If all devices reported they can proceed, the re-enable PIO */340340+ if (result == PCI_ERS_RESULT_CAN_RECOVER) {341341+ /* XXX Not supported; we brute-force reset the device */342342+ rc = eeh_reset_device(frozen_pdn, NULL);343343+ if (rc)344344+ goto hard_fail;345345+ pci_walk_bus(frozen_bus, eeh_report_reset, 0);346346+ }347347+348348+ /* Tell all device drivers that they can resume operations */349349+ pci_walk_bus(frozen_bus, eeh_report_resume, 0);350350+351351+ return;352352+353353+hard_fail:354354+ /*355355+ * About 90% of all real-life EEH failures in the field356356+ * are due to poorly seated PCI cards. Only 10% or so are357357+ * due to actual, failed cards.358358+ */359359+ printk(KERN_ERR360360+ "EEH: PCI device %s - %s has failed %d times \n"361361+ "and has been permanently disabled. Please try reseating\n"362362+ "this device or replacing it.\n",363363+ pci_name (frozen_pdn->pcidev), 364364+ pcid_name(frozen_pdn->pcidev), 365365+ frozen_pdn->eeh_freeze_count);366366+367367+ eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */);368368+369369+ /* Notify all devices that they're about to go down. */370370+ pci_walk_bus(frozen_bus, eeh_report_failure, 0);371371+372372+ /* Shut down the device drivers for good. */373373+ pcibios_remove_pci_devices(frozen_bus);374374+}375375+376376+/* ---------- end of file ---------- */
+12-27
arch/powerpc/platforms/pseries/eeh_event.c
···2121#include <linux/list.h>2222#include <linux/pci.h>2323#include <asm/eeh_event.h>2424+#include <asm/ppc-pci.h>24252526/** Overview:2627 * EEH error states may be detected within exception handlers;···3635LIST_HEAD(eeh_eventlist);3736static void eeh_thread_launcher(void *);3837DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL);3939-4040-/**4141- * eeh_panic - call panic() for an eeh event that cannot be handled.4242- * The philosophy of this routine is that it is better to panic and4343- * halt the OS than it is to risk possible data corruption by4444- * oblivious device drivers that don't know better.4545- *4646- * @dev pci device that had an eeh event4747- * @reset_state current reset state of the device slot4848- */4949-static void eeh_panic(struct pci_dev *dev, int reset_state)5050-{5151- /*5252- * Since the panic_on_oops sysctl is used to halt the system5353- * in light of potential corruption, we can use it here.5454- */5555- if (panic_on_oops) {5656- panic("EEH: MMIO failure (%d) on device:%s\n", reset_state,5757- pci_name(dev));5858- }5959- else {6060- printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n",6161- reset_state, pci_name(dev));6262- }6363-}64386539/**6640 * eeh_event_handler - dispatch EEH events. The detection of a frozen···58825983 spin_lock_irqsave(&eeh_eventlist_lock, flags);6084 event = NULL;8585+8686+ /* Unqueue the event, get ready to process. */6187 if (!list_empty(&eeh_eventlist)) {6288 event = list_entry(eeh_eventlist.next, struct eeh_event, list);6389 list_del(&event->list);6490 }9191+9292+ if (event)9393+ eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);9494+6595 spin_unlock_irqrestore(&eeh_eventlist_lock, flags);6696 if (event == NULL)6797 break;···7593 printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",7694 pci_name(event->dev));77957878- eeh_panic (event->dev, event->state);9696+ handle_eeh_events(event);79979898+ eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);9999+100100+ pci_dev_put(event->dev);80101 kfree(event);81102 }82103···107122 */108123int eeh_send_failure_event (struct device_node *dn,109124 struct pci_dev *dev,110110- int state,125125+ enum pci_channel_state state,111126 int time_unavail)112127{113128 unsigned long flags;
···3434extern int eeh_subsystem_enabled;35353636/* Values for eeh_mode bits in device_node */3737-#define EEH_MODE_SUPPORTED (1<<0)3838-#define EEH_MODE_NOCHECK (1<<1)3939-#define EEH_MODE_ISOLATED (1<<2)3737+#define EEH_MODE_SUPPORTED (1<<0)3838+#define EEH_MODE_NOCHECK (1<<1)3939+#define EEH_MODE_ISOLATED (1<<2)4040+#define EEH_MODE_RECOVERING (1<<3)4141+#define EEH_MODE_IRQ_DISABLED (1<<4)40424143/* Max number of EEH freezes allowed before we consider the device4244 * to be permanently disabled. */
+5-2
include/asm-powerpc/eeh_event.h
···3030 struct list_head list;3131 struct device_node *dn; /* struct device node */3232 struct pci_dev *dev; /* affected device */3333- int state;3333+ enum pci_channel_state state; /* PCI bus state for the affected device */3434 int time_unavail; /* milliseconds until device might be available */3535};3636···4747 */4848int eeh_send_failure_event (struct device_node *dn,4949 struct pci_dev *dev,5050- int reset_state,5050+ enum pci_channel_state state,5151 int time_unavail);5252+5353+/* Main recovery function */5454+void handle_eeh_events (struct eeh_event *);52555356#endif /* __KERNEL__ */5457#endif /* ASM_PPC64_EEH_EVENT_H */
+5-4
include/asm-powerpc/pci-bridge.h
···6161struct iommu_table;62626363struct pci_dn {6464- int busno; /* for pci devices */6565- int bussubno; /* for pci devices */6666- int devfn; /* for pci devices */6464+ int busno; /* pci bus number */6565+ int bussubno; /* pci subordinate bus number */6666+ int devfn; /* pci device and function number */6767+ int class_code; /* pci device class */67686869#ifdef CONFIG_PPC_PSERIES6970 int eeh_mode; /* See eeh.h for possible EEH_MODEs */7071 int eeh_config_addr;7272+ int eeh_pe_config_addr; /* new-style partition endpoint address */7173 int eeh_check_count; /* # times driver ignored error */7274 int eeh_freeze_count; /* # times this device froze up. */7373- int eeh_is_bridge; /* device is pci-to-pci bridge */7475#endif7576 int pci_ext_config_space; /* for pci devices */7677 struct pci_controller *phb; /* for pci devices */
+22-1
include/asm-powerpc/ppc-pci.h
···52525353/* ---- EEH internal-use-only related routines ---- */5454#ifdef CONFIG_EEH5555+5656+void pci_addr_cache_insert_device(struct pci_dev *dev);5757+void pci_addr_cache_remove_device(struct pci_dev *dev);5858+void pci_addr_cache_build(void);5959+struct pci_dev *pci_get_device_by_addr(unsigned long addr);6060+6161+/**6262+ * eeh_slot_error_detail -- record and EEH error condition to the log6363+ * @severity: 1 if temporary, 2 if permanent failure.6464+ *6565+ * Obtains the the EEH error details from the RTAS subsystem,6666+ * and then logs these details with the RTAS error log system.6767+ */6868+void eeh_slot_error_detail (struct pci_dn *pdn, int severity);6969+5570/**5671 * rtas_set_slot_reset -- unfreeze a frozen slot5772 *···7459 * does this by asserting the PCI #RST line for 1/8th of7560 * a second; this routine will sleep while the adapter is7661 * being reset.6262+ *6363+ * Returns a non-zero value if the reset failed.7764 */7878-void rtas_set_slot_reset (struct pci_dn *);6565+int rtas_set_slot_reset (struct pci_dn *);79668067/** 8168 * eeh_restore_bars - Restore device configuration info.···10184void rtas_configure_bridge(struct pci_dn *);1028510386int rtas_write_config(struct pci_dn *, int where, int size, u32 val);8787+int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);1048810589/**10690 * mark and clear slots: find "partition endpoint" PE and set or ···10991 */11092void eeh_mark_slot (struct device_node *dn, int mode_flag);11193void eeh_clear_slot (struct device_node *dn, int mode_flag);9494+9595+/* Find the associated "Partiationable Endpoint" PE */9696+struct device_node * find_device_pe(struct device_node *dn);1129711398#endif11499
+4
include/asm-powerpc/serial.h
···1515/* Default baud base if not found in device-tree */1616#define BASE_BAUD ( 1843200 / 16 )17171818+#ifdef CONFIG_PPC_UDBG_165501819extern void find_legacy_serial_ports(void);2020+#else2121+#define find_legacy_serial_ports() do { } while (0)2222+#endif19232024#endif /* _PPC64_SERIAL_H */