Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[POWERPC] EEH: wait for slot status

Modify routine that returns PCI slot status to wait for slot status
to become available. This is needed, as slots that are in some remote
card cage may go offline for extended periods of time. New users for
this routine in following patches.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>

authored by

Linas Vepstas and committed by
Paul Mackerras
9c547768 90375f53

+61 -52
+59 -51
arch/powerpc/platforms/pseries/eeh.c
··· 76 76 */ 77 77 #define EEH_MAX_FAILS 2100000 78 78 79 + /* Time to wait for a PCI slot to retport status, in milliseconds */ 80 + #define PCI_BUS_RESET_WAIT_MSEC (60*1000) 81 + 79 82 /* RTAS tokens */ 80 83 static int ibm_set_eeh_option; 81 84 static int ibm_set_slot_reset; ··· 169 166 170 167 return rtas_call(token, 3, outputs, rets, config_addr, 171 168 BUID_HI(pdn->phb->buid), BUID_LO(pdn->phb->buid)); 169 + } 170 + 171 + /** 172 + * eeh_wait_for_slot_status - returns error status of slot 173 + * @pdn pci device node 174 + * @max_wait_msecs maximum number to millisecs to wait 175 + * 176 + * Return negative value if a permanent error, else return 177 + * Partition Endpoint (PE) status value. 178 + * 179 + * If @max_wait_msecs is positive, then this routine will 180 + * sleep until a valid status can be obtained, or until 181 + * the max allowed wait time is exceeded, in which case 182 + * a -2 is returned. 183 + */ 184 + int 185 + eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) 186 + { 187 + int rc; 188 + int rets[3]; 189 + int mwait; 190 + 191 + while (1) { 192 + rc = read_slot_reset_state(pdn, rets); 193 + if (rc) return rc; 194 + if (rets[1] == 0) return -1; /* EEH is not supported */ 195 + 196 + if (rets[0] != 5) return rets[0]; /* return actual status */ 197 + 198 + if (rets[2] == 0) return -1; /* permanently unavailable */ 199 + 200 + if (max_wait_msecs <= 0) return -1; 201 + 202 + mwait = rets[2]; 203 + if (mwait <= 0) { 204 + printk (KERN_WARNING 205 + "EEH: Firmware returned bad wait value=%d\n", mwait); 206 + mwait = 1000; 207 + } else if (mwait > 300*1000) { 208 + printk (KERN_WARNING 209 + "EEH: Firmware is taking too long, time=%d\n", mwait); 210 + mwait = 300*1000; 211 + } 212 + max_wait_msecs -= mwait; 213 + msleep (mwait); 214 + } 215 + 216 + printk(KERN_WARNING "EEH: Timed out waiting for slot status\n"); 217 + return -2; 172 218 } 173 219 174 220 /** ··· 511 459 /* The code below deals with error recovery */ 512 460 513 461 /** 514 - * eeh_slot_availability - returns error status of slot 515 - * @pdn pci device node 516 - * 517 - * Return negative value if a permanent error, else return 518 - * a number of milliseconds to wait until the PCI slot is 519 - * ready to be used. 520 - */ 521 - static int 522 - eeh_slot_availability(struct pci_dn *pdn) 523 - { 524 - int rc; 525 - int rets[3]; 526 - 527 - rc = read_slot_reset_state(pdn, rets); 528 - 529 - if (rc) return rc; 530 - 531 - if (rets[1] == 0) return -1; /* EEH is not supported */ 532 - if (rets[0] == 0) return 0; /* Oll Korrect */ 533 - if (rets[0] == 5) { 534 - if (rets[2] == 0) return -1; /* permanently unavailable */ 535 - return rets[2]; /* number of millisecs to wait */ 536 - } 537 - if (rets[0] == 1) 538 - return 250; 539 - 540 - printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n", 541 - rc, rets[0], rets[1], rets[2]); 542 - return -2; 543 - } 544 - 545 - /** 546 462 * rtas_pci_enable - enable MMIO or DMA transfers for this slot 547 463 * @pdn pci device node 548 464 */ ··· 616 596 { 617 597 int i, rc; 618 598 619 - __rtas_set_slot_reset(pdn); 599 + /* Take three shots at resetting the bus */ 600 + for (i=0; i<3; i++) { 601 + __rtas_set_slot_reset(pdn); 620 602 621 - /* Now double check with the firmware to make sure the device is 622 - * ready to be used; if not, wait for recovery. */ 623 - for (i=0; i<10; i++) { 624 - rc = eeh_slot_availability (pdn); 603 + rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); 625 604 if (rc == 0) 626 605 return 0; 627 - 628 - if (rc == -2) { 629 - printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", 630 - i, pdn->node->full_name); 631 - __rtas_set_slot_reset(pdn); 632 - continue; 633 - } 634 606 635 607 if (rc < 0) { 636 608 printk (KERN_ERR "EEH: unrecoverable slot failure %s\n", 637 609 pdn->node->full_name); 638 610 return -1; 639 611 } 640 - 641 - msleep (rc+100); 612 + printk (KERN_ERR "EEH: bus reset %d failed on slot %s\n", 613 + i+1, pdn->node->full_name); 642 614 } 643 615 644 - rc = eeh_slot_availability (pdn); 645 - if (rc) 646 - printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name); 647 - 648 - return rc; 616 + return -1; 649 617 } 650 618 651 619 /* ------------------------------------------------------- */
+2 -1
include/asm-powerpc/ppc-pci.h
··· 70 70 void eeh_slot_error_detail (struct pci_dn *pdn, int severity); 71 71 72 72 /** 73 - * rtas_pci_enableo - enable IO transfers for this slot 73 + * rtas_pci_enable - enable IO transfers for this slot 74 74 * @pdn: pci device node 75 75 * @function: either EEH_THAW_MMIO or EEH_THAW_DMA 76 76 * ··· 91 91 * Returns a non-zero value if the reset failed. 92 92 */ 93 93 int rtas_set_slot_reset (struct pci_dn *); 94 + int eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs); 94 95 95 96 /** 96 97 * eeh_restore_bars - Restore device configuration info.