Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/mlx5: PCI error recovery health care simulation

In case that the kernel PCI error handlers are not called, we will
trigger our own recovery flow.

The health work will give priority to the kernel pci error handlers to
recover the PCI by waiting for a small period, if the pci error handlers
are not triggered the manual recovery flow will be executed.

We don't save pci state in case of manual recovery because it will ruin the
pci configuration space and we will lose dma sync.

Fixes: 89d44f0a6c73 ('net/mlx5_core: Add pci error handlers to mlx5_core driver')
Signed-off-by: Mohamad Haj Yahia <mohamad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Mohamad Haj Yahia and committed by
David S. Miller
04c0c1ab 05ac2c0b

+56 -9
+41 -4
drivers/net/ethernet/mellanox/mlx5/core/health.c
··· 61 61 enum { 62 62 MLX5_NIC_IFC_FULL = 0, 63 63 MLX5_NIC_IFC_DISABLED = 1, 64 - MLX5_NIC_IFC_NO_DRAM_NIC = 2 64 + MLX5_NIC_IFC_NO_DRAM_NIC = 2, 65 + MLX5_NIC_IFC_INVALID = 3 65 66 }; 66 67 67 68 enum { 68 69 MLX5_DROP_NEW_HEALTH_WORK, 69 70 }; 70 71 71 - static u8 get_nic_interface(struct mlx5_core_dev *dev) 72 + static u8 get_nic_state(struct mlx5_core_dev *dev) 72 73 { 73 74 return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; 74 75 } ··· 102 101 struct mlx5_core_health *health = &dev->priv.health; 103 102 struct health_buffer __iomem *h = health->health; 104 103 105 - if (get_nic_interface(dev) == MLX5_NIC_IFC_DISABLED) 104 + if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) 106 105 return 1; 107 106 108 107 if (ioread32be(&h->fw_ver) == 0xffffffff) ··· 132 131 133 132 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) 134 133 { 135 - u8 nic_interface = get_nic_interface(dev); 134 + u8 nic_interface = get_nic_state(dev); 136 135 137 136 switch (nic_interface) { 138 137 case MLX5_NIC_IFC_FULL: ··· 154 153 mlx5_disable_device(dev); 155 154 } 156 155 156 + static void health_recover(struct work_struct *work) 157 + { 158 + struct mlx5_core_health *health; 159 + struct delayed_work *dwork; 160 + struct mlx5_core_dev *dev; 161 + struct mlx5_priv *priv; 162 + u8 nic_state; 163 + 164 + dwork = container_of(work, struct delayed_work, work); 165 + health = container_of(dwork, struct mlx5_core_health, recover_work); 166 + priv = container_of(health, struct mlx5_priv, health); 167 + dev = container_of(priv, struct mlx5_core_dev, priv); 168 + 169 + nic_state = get_nic_state(dev); 170 + if (nic_state == MLX5_NIC_IFC_INVALID) { 171 + dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n"); 172 + return; 173 + } 174 + 175 + dev_err(&dev->pdev->dev, "starting health recovery flow\n"); 176 + mlx5_recover_device(dev); 177 + } 178 + 179 + /* How much time to wait until health resetting the driver (in msecs) */ 180 + #define MLX5_RECOVERY_DELAY_MSECS 60000 157 181 static void health_care(struct work_struct *work) 158 182 { 183 + unsigned long recover_delay = msecs_to_jiffies(MLX5_RECOVERY_DELAY_MSECS); 159 184 struct mlx5_core_health *health; 160 185 struct mlx5_core_dev *dev; 161 186 struct mlx5_priv *priv; ··· 191 164 dev = container_of(priv, struct mlx5_core_dev, priv); 192 165 mlx5_core_warn(dev, "handling bad device here\n"); 193 166 mlx5_handle_bad_state(dev); 167 + 168 + spin_lock(&health->wq_lock); 169 + if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) 170 + schedule_delayed_work(&health->recover_work, recover_delay); 171 + else 172 + dev_err(&dev->pdev->dev, 173 + "new health works are not permitted at this stage\n"); 174 + spin_unlock(&health->wq_lock); 194 175 } 195 176 196 177 static const char *hsynd_str(u8 synd) ··· 351 316 spin_lock(&health->wq_lock); 352 317 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); 353 318 spin_unlock(&health->wq_lock); 319 + cancel_delayed_work_sync(&health->recover_work); 354 320 cancel_work_sync(&health->work); 355 321 } 356 322 ··· 380 344 return -ENOMEM; 381 345 spin_lock_init(&health->wq_lock); 382 346 INIT_WORK(&health->work, health_care); 347 + INIT_DELAYED_WORK(&health->recover_work, health_recover); 383 348 384 349 return 0; 385 350 }
+13 -5
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 1313 1313 struct mlx5_priv *priv = &dev->priv; 1314 1314 1315 1315 dev_info(&pdev->dev, "%s was called\n", __func__); 1316 + 1316 1317 mlx5_enter_error_state(dev); 1317 1318 mlx5_unload_one(dev, priv, false); 1318 1319 /* In case of kernel call save the pci state and drain health wq */ ··· 1379 1378 return PCI_ERS_RESULT_RECOVERED; 1380 1379 } 1381 1380 1382 - void mlx5_disable_device(struct mlx5_core_dev *dev) 1383 - { 1384 - mlx5_pci_err_detected(dev->pdev, 0); 1385 - } 1386 - 1387 1381 static void mlx5_pci_resume(struct pci_dev *pdev) 1388 1382 { 1389 1383 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); ··· 1427 1431 }; 1428 1432 1429 1433 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); 1434 + 1435 + void mlx5_disable_device(struct mlx5_core_dev *dev) 1436 + { 1437 + mlx5_pci_err_detected(dev->pdev, 0); 1438 + } 1439 + 1440 + void mlx5_recover_device(struct mlx5_core_dev *dev) 1441 + { 1442 + mlx5_pci_disable_device(dev); 1443 + if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED) 1444 + mlx5_pci_resume(dev->pdev); 1445 + } 1430 1446 1431 1447 static struct pci_driver mlx5_core_driver = { 1432 1448 .name = DRIVER_NAME,
+1
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
··· 83 83 unsigned long param); 84 84 void mlx5_enter_error_state(struct mlx5_core_dev *dev); 85 85 void mlx5_disable_device(struct mlx5_core_dev *dev); 86 + void mlx5_recover_device(struct mlx5_core_dev *dev); 86 87 int mlx5_sriov_init(struct mlx5_core_dev *dev); 87 88 void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); 88 89 int mlx5_sriov_attach(struct mlx5_core_dev *dev);
+1
include/linux/mlx5/driver.h
··· 423 423 struct workqueue_struct *wq; 424 424 unsigned long flags; 425 425 struct work_struct work; 426 + struct delayed_work recover_work; 426 427 }; 427 428 428 429 struct mlx5_cq_table {