Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

s2io: add PCI error recovery support

This patch adds PCI error recovery support to the
s2io 10-Gigabit ethernet device driver. Third revision,
blocks interrupts and the watchdog.

Tested, seems to work well.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Acked-by: Ramkrishna Vepa <Ramkrishna.Vepa@neterion.com>
Cc: Raghavendra Koushik <raghavendra.koushik@neterion.com>
Cc: Wen Xiong <wenxiong@us.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>

authored by

Linas Vepstas and committed by
Jeff Garzik
d796fdb7 bd5824f1

+116 -5
+111 -5
drivers/net/s2io.c
··· 469 469 470 470 MODULE_DEVICE_TABLE(pci, s2io_tbl); 471 471 472 + static struct pci_error_handlers s2io_err_handler = { 473 + .error_detected = s2io_io_error_detected, 474 + .slot_reset = s2io_io_slot_reset, 475 + .resume = s2io_io_resume, 476 + }; 477 + 472 478 static struct pci_driver s2io_driver = { 473 479 .name = "S2IO", 474 480 .id_table = s2io_tbl, 475 481 .probe = s2io_init_nic, 476 482 .remove = __devexit_p(s2io_rem_nic), 483 + .err_handler = &s2io_err_handler, 477 484 }; 478 485 479 486 /* A simplifier macro used both by init and free shared_mem Fns(). */ ··· 2696 2689 u64 val64 = 0xFFFFFFFFFFFFFFFFULL; 2697 2690 int i; 2698 2691 2692 + if (pci_channel_offline(nic->pdev)) 2693 + return; 2694 + 2699 2695 disable_irq(dev->irq); 2700 2696 2701 2697 atomic_inc(&nic->isr_cnt); ··· 3224 3214 u64 cnt; 3225 3215 int i; 3226 3216 if (atomic_read(&nic->card_state) == CARD_DOWN) 3217 + return; 3218 + if (pci_channel_offline(nic->pdev)) 3227 3219 return; 3228 3220 nic->mac_control.stats_info->sw_stat.ring_full_cnt = 0; 3229 3221 /* Handling the XPAK counters update */ ··· 4325 4313 u64 reason = 0; 4326 4314 struct mac_info *mac_control; 4327 4315 struct config_param *config; 4316 + 4317 + /* Pretend we handled any irq's from a disconnected card */ 4318 + if (pci_channel_offline(sp->pdev)) 4319 + return IRQ_NONE; 4328 4320 4329 4321 atomic_inc(&sp->isr_cnt); 4330 4322 mac_control = &sp->mac_control; ··· 6585 6569 } while(cnt < 5); 6586 6570 } 6587 6571 6588 - static void s2io_card_down(struct s2io_nic * sp) 6572 + static void do_s2io_card_down(struct s2io_nic * sp, int do_io) 6589 6573 { 6590 6574 int cnt = 0; 6591 6575 struct XENA_dev_config __iomem *bar0 = sp->bar0; ··· 6600 6584 atomic_set(&sp->card_state, CARD_DOWN); 6601 6585 6602 6586 /* disable Tx and Rx traffic on the NIC */ 6603 - stop_nic(sp); 6587 + if (do_io) 6588 + stop_nic(sp); 6604 6589 6605 6590 s2io_rem_isr(sp); 6606 6591 ··· 6609 6592 tasklet_kill(&sp->task); 6610 6593 6611 6594 /* Check if the device is Quiescent and then Reset the NIC */ 6612 - do { 6595 + while(do_io) { 6613 6596 /* As per the HW requirement we need to replenish the 6614 6597 * receive buffer to avoid the ring bump. Since there is 6615 6598 * no intention of processing the Rx frame at this pointwe are ··· 6634 6617 (unsigned long long) val64); 6635 6618 break; 6636 6619 } 6637 - } while (1); 6638 - s2io_reset(sp); 6620 + } 6621 + if (do_io) 6622 + s2io_reset(sp); 6639 6623 6640 6624 spin_lock_irqsave(&sp->tx_lock, flags); 6641 6625 /* Free all Tx buffers */ ··· 6649 6631 spin_unlock_irqrestore(&sp->rx_lock, flags); 6650 6632 6651 6633 clear_bit(0, &(sp->link_state)); 6634 + } 6635 + 6636 + static void s2io_card_down(struct s2io_nic * sp) 6637 + { 6638 + do_s2io_card_down(sp, 1); 6652 6639 } 6653 6640 6654 6641 static int s2io_card_up(struct s2io_nic * sp) ··· 8032 8009 lro->last_frag = skb; 8033 8010 sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++; 8034 8011 return; 8012 + } 8013 + 8014 + /** 8015 + * s2io_io_error_detected - called when PCI error is detected 8016 + * @pdev: Pointer to PCI device 8017 + * @state: The current pci conneection state 8018 + * 8019 + * This function is called after a PCI bus error affecting 8020 + * this device has been detected. 8021 + */ 8022 + static pci_ers_result_t s2io_io_error_detected(struct pci_dev *pdev, 8023 + pci_channel_state_t state) 8024 + { 8025 + struct net_device *netdev = pci_get_drvdata(pdev); 8026 + struct s2io_nic *sp = netdev->priv; 8027 + 8028 + netif_device_detach(netdev); 8029 + 8030 + if (netif_running(netdev)) { 8031 + /* Bring down the card, while avoiding PCI I/O */ 8032 + do_s2io_card_down(sp, 0); 8033 + sp->device_close_flag = TRUE; /* Device is shut down. */ 8034 + } 8035 + pci_disable_device(pdev); 8036 + 8037 + return PCI_ERS_RESULT_NEED_RESET; 8038 + } 8039 + 8040 + /** 8041 + * s2io_io_slot_reset - called after the pci bus has been reset. 8042 + * @pdev: Pointer to PCI device 8043 + * 8044 + * Restart the card from scratch, as if from a cold-boot. 8045 + * At this point, the card has exprienced a hard reset, 8046 + * followed by fixups by BIOS, and has its config space 8047 + * set up identically to what it was at cold boot. 8048 + */ 8049 + static pci_ers_result_t s2io_io_slot_reset(struct pci_dev *pdev) 8050 + { 8051 + struct net_device *netdev = pci_get_drvdata(pdev); 8052 + struct s2io_nic *sp = netdev->priv; 8053 + 8054 + if (pci_enable_device(pdev)) { 8055 + printk(KERN_ERR "s2io: " 8056 + "Cannot re-enable PCI device after reset.\n"); 8057 + return PCI_ERS_RESULT_DISCONNECT; 8058 + } 8059 + 8060 + pci_set_master(pdev); 8061 + s2io_reset(sp); 8062 + 8063 + return PCI_ERS_RESULT_RECOVERED; 8064 + } 8065 + 8066 + /** 8067 + * s2io_io_resume - called when traffic can start flowing again. 8068 + * @pdev: Pointer to PCI device 8069 + * 8070 + * This callback is called when the error recovery driver tells 8071 + * us that its OK to resume normal operation. 8072 + */ 8073 + static void s2io_io_resume(struct pci_dev *pdev) 8074 + { 8075 + struct net_device *netdev = pci_get_drvdata(pdev); 8076 + struct s2io_nic *sp = netdev->priv; 8077 + 8078 + if (netif_running(netdev)) { 8079 + if (s2io_card_up(sp)) { 8080 + printk(KERN_ERR "s2io: " 8081 + "Can't bring device back up after reset.\n"); 8082 + return; 8083 + } 8084 + 8085 + if (s2io_set_mac_addr(netdev, netdev->dev_addr) == FAILURE) { 8086 + s2io_card_down(sp); 8087 + printk(KERN_ERR "s2io: " 8088 + "Can't resetore mac addr after reset.\n"); 8089 + return; 8090 + } 8091 + } 8092 + 8093 + netif_device_attach(netdev); 8094 + netif_wake_queue(netdev); 8035 8095 }
+5
drivers/net/s2io.h
··· 1052 1052 struct sk_buff *skb, u32 tcp_len); 1053 1053 static int rts_ds_steer(struct s2io_nic *nic, u8 ds_codepoint, u8 ring); 1054 1054 1055 + static pci_ers_result_t s2io_io_error_detected(struct pci_dev *pdev, 1056 + pci_channel_state_t state); 1057 + static pci_ers_result_t s2io_io_slot_reset(struct pci_dev *pdev); 1058 + static void s2io_io_resume(struct pci_dev *pdev); 1059 + 1055 1060 #define s2io_tcp_mss(skb) skb_shinfo(skb)->gso_size 1056 1061 #define s2io_udp_mss(skb) skb_shinfo(skb)->gso_size 1057 1062 #define s2io_offload_type(skb) skb_shinfo(skb)->gso_type