bnx2: Add workaround to handle missed MSI.

The bnx2 chips do not support per MSI vector masking. On 5706/5708, new MSI
address/data are stored only when the MSI enable bit is toggled. As a result,
SMP affinity no longer works in the latest kernel. A more serious problem is
that the driver will no longer receive interrupts when the MSI receiving CPU
goes offline.

The workaround in this patch only addresses the problem of CPU going offline.
When that happens, the driver's timer function will detect that it is making
no forward progress on pending interrupt events and will recover from it.

Eric Dumazet reported the problem.

We also found that if an interrupt is internally asserted while MSI and INTA
are disabled, the chip will end up in the same state after MSI is re-enabled.
The same workaround is needed for this problem.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Tested-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Michael Chan and committed by
David S. Miller
efba0180 d5654efd

+38 -3
+32 -3
drivers/net/bnx2.c
··· 3144 return 0; 3145 } 3146 3147 static void bnx2_poll_link(struct bnx2 *bp, struct bnx2_napi *bnapi) 3148 { 3149 struct status_block *sblk = bnapi->status_blk.msi; ··· 3240 3241 work_done = bnx2_poll_work(bp, bnapi, work_done, budget); 3242 3243 - if (unlikely(work_done >= budget)) 3244 - break; 3245 - 3246 /* bnapi->last_status_idx is used below to tell the hw how 3247 * much work has been processed, so we must read it before 3248 * checking for more work. 3249 */ 3250 bnapi->last_status_idx = sblk->status_idx; 3251 rmb(); 3252 if (likely(!bnx2_has_work(bnapi))) { 3253 netif_rx_complete(bp->dev, napi); ··· 4593 for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) 4594 bp->bnx2_napi[i].last_status_idx = 0; 4595 4596 bp->rx_mode = BNX2_EMAC_RX_MODE_SORT_MODE; 4597 4598 /* Set up how to generate a link change interrupt. */ ··· 5742 5743 if (atomic_read(&bp->intr_sem) != 0) 5744 goto bnx2_restart_timer; 5745 5746 bnx2_send_heart_beat(bp); 5747
··· 3144 return 0; 3145 } 3146 3147 + static void 3148 + bnx2_chk_missed_msi(struct bnx2 *bp) 3149 + { 3150 + struct bnx2_napi *bnapi = &bp->bnx2_napi[0]; 3151 + u32 msi_ctrl; 3152 + 3153 + if (bnx2_has_work(bnapi)) { 3154 + msi_ctrl = REG_RD(bp, BNX2_PCICFG_MSI_CONTROL); 3155 + if (!(msi_ctrl & BNX2_PCICFG_MSI_CONTROL_ENABLE)) 3156 + return; 3157 + 3158 + if (bnapi->last_status_idx == bp->idle_chk_status_idx) { 3159 + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl & 3160 + ~BNX2_PCICFG_MSI_CONTROL_ENABLE); 3161 + REG_WR(bp, BNX2_PCICFG_MSI_CONTROL, msi_ctrl); 3162 + bnx2_msi(bp->irq_tbl[0].vector, bnapi); 3163 + } 3164 + } 3165 + 3166 + bp->idle_chk_status_idx = bnapi->last_status_idx; 3167 + } 3168 + 3169 static void bnx2_poll_link(struct bnx2 *bp, struct bnx2_napi *bnapi) 3170 { 3171 struct status_block *sblk = bnapi->status_blk.msi; ··· 3218 3219 work_done = bnx2_poll_work(bp, bnapi, work_done, budget); 3220 3221 /* bnapi->last_status_idx is used below to tell the hw how 3222 * much work has been processed, so we must read it before 3223 * checking for more work. 3224 */ 3225 bnapi->last_status_idx = sblk->status_idx; 3226 + 3227 + if (unlikely(work_done >= budget)) 3228 + break; 3229 + 3230 rmb(); 3231 if (likely(!bnx2_has_work(bnapi))) { 3232 netif_rx_complete(bp->dev, napi); ··· 4570 for (i = 0; i < BNX2_MAX_MSIX_VEC; i++) 4571 bp->bnx2_napi[i].last_status_idx = 0; 4572 4573 + bp->idle_chk_status_idx = 0xffff; 4574 + 4575 bp->rx_mode = BNX2_EMAC_RX_MODE_SORT_MODE; 4576 4577 /* Set up how to generate a link change interrupt. */ ··· 5717 5718 if (atomic_read(&bp->intr_sem) != 0) 5719 goto bnx2_restart_timer; 5720 + 5721 + if ((bp->flags & (BNX2_FLAG_USING_MSI | BNX2_FLAG_ONE_SHOT_MSI)) == 5722 + BNX2_FLAG_USING_MSI) 5723 + bnx2_chk_missed_msi(bp); 5724 5725 bnx2_send_heart_beat(bp); 5726
+6
drivers/net/bnx2.h
··· 378 * pci_config_l definition 379 * offset: 0000 380 */ 381 #define BNX2_PCICFG_MISC_CONFIG 0x00000068 382 #define BNX2_PCICFG_MISC_CONFIG_TARGET_BYTE_SWAP (1L<<2) 383 #define BNX2_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP (1L<<3) ··· 6866 6867 u8 num_tx_rings; 6868 u8 num_rx_rings; 6869 }; 6870 6871 #define REG_RD(bp, offset) \
··· 378 * pci_config_l definition 379 * offset: 0000 380 */ 381 + #define BNX2_PCICFG_MSI_CONTROL 0x00000058 382 + #define BNX2_PCICFG_MSI_CONTROL_ENABLE (1L<<16) 383 + 384 #define BNX2_PCICFG_MISC_CONFIG 0x00000068 385 #define BNX2_PCICFG_MISC_CONFIG_TARGET_BYTE_SWAP (1L<<2) 386 #define BNX2_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP (1L<<3) ··· 6863 6864 u8 num_tx_rings; 6865 u8 num_rx_rings; 6866 + 6867 + u32 idle_chk_status_idx; 6868 + 6869 }; 6870 6871 #define REG_RD(bp, offset) \