Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/mlx5e: Add tx timeout support for mlx5e tx reporter

With this patch, ndo_tx_timeout callback will be redirected to the tx
reporter in order to detect a tx timeout error and report it to the
devlink health. (The watchdog detects tx timeouts, but the driver verify
the issue still exists before launching any recover method).

In addition, recover from tx timeout in case of lost interrupt was added
to the tx reporter recover method. The tx timeout recover from lost
interrupt is not a new feature in the driver, this patch re-organize the
functionality and move it to the tx reporter recovery flow.

tx timeout example:
(with auto_recover set to false, if set to true, the manual recover and
diagnose sections are irrelevant)

$cat /sys/kernel/debug/tracing/trace
...
devlink_health_report: bus_name=pci dev_name=0000:00:09.0
driver_name=mlx5_core reporter_name=tx: TX timeout on queue: 0, SQ: 0x8a,
CQ: 0x35, SQ Cons: 0x2 SQ Prod: 0x2, usecs since last trans: 14912000

$devlink health show
pci/0000:00:09.0:
name tx
state healthy #err 1 #recover 0 last_dump_ts N/A
parameters:
grace_period 500 auto_recover false

$devlink health diagnose pci/0000:00:09.0 reporter tx -j -p
{
"SQs": [ {
"sqn": 138,
"HW state": 1,
"stopped": true
},{
"sqn": 142,
"HW state": 1,
"stopped": false
} ]
}

$devlink health diagnose pci/0000:00:09.0 reporter tx
SQs:
sqn: 138 HW state: 1 stopped: true
sqn: 142 HW state: 1 stopped: false

$devlink health recover pci/0000:00:09 reporter tx
$devlink health show
pci/0000:00:09.0:
name tx
state healthy #err 1 #recover 1 last_dump_ts N/A
parameters:
grace_period 500 auto_recover false

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eran Ben Elisha and committed by
David S. Miller
7d91126b de8650a8

+55 -37
+1
drivers/net/ethernet/mellanox/mlx5/core/en/reporter.h
··· 10 10 int mlx5e_tx_reporter_create(struct mlx5e_priv *priv); 11 11 void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv); 12 12 void mlx5e_tx_reporter_err_cqe(struct mlx5e_txqsq *sq); 13 + int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq); 13 14 14 15 #endif
+38
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
··· 126 126 &err_ctx); 127 127 } 128 128 129 + static int mlx5e_tx_reporter_timeout_recover(struct mlx5e_txqsq *sq) 130 + { 131 + struct mlx5_eq_comp *eq = sq->cq.mcq.eq; 132 + u32 eqe_count; 133 + int ret; 134 + 135 + netdev_err(sq->channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", 136 + eq->core.eqn, eq->core.cons_index, eq->core.irqn); 137 + 138 + eqe_count = mlx5_eq_poll_irq_disabled(eq); 139 + ret = eqe_count ? true : false; 140 + if (!eqe_count) { 141 + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 142 + return ret; 143 + } 144 + 145 + netdev_err(sq->channel->netdev, "Recover %d eqes on EQ 0x%x\n", 146 + eqe_count, eq->core.eqn); 147 + sq->channel->stats->eq_rearm++; 148 + return ret; 149 + } 150 + 151 + int mlx5e_tx_reporter_timeout(struct mlx5e_txqsq *sq) 152 + { 153 + char err_str[MLX5E_TX_REPORTER_PER_SQ_MAX_LEN]; 154 + struct mlx5e_tx_err_ctx err_ctx; 155 + 156 + err_ctx.sq = sq; 157 + err_ctx.recover = mlx5e_tx_reporter_timeout_recover; 158 + sprintf(err_str, 159 + "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", 160 + sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 161 + jiffies_to_usecs(jiffies - sq->txq->trans_start)); 162 + 163 + return devlink_health_report(sq->channel->priv->tx_reporter, err_str, 164 + &err_ctx); 165 + } 166 + 129 167 /* state lock cannot be grabbed within this function. 130 168 * It can cause a dead lock or a read-after-free. 131 169 */
+16 -37
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 4116 4116 return features; 4117 4117 } 4118 4118 4119 - static bool mlx5e_tx_timeout_eq_recover(struct net_device *dev, 4120 - struct mlx5e_txqsq *sq) 4121 - { 4122 - struct mlx5_eq_comp *eq = sq->cq.mcq.eq; 4123 - u32 eqe_count; 4124 - 4125 - netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n", 4126 - eq->core.eqn, eq->core.cons_index, eq->core.irqn); 4127 - 4128 - eqe_count = mlx5_eq_poll_irq_disabled(eq); 4129 - if (!eqe_count) 4130 - return false; 4131 - 4132 - netdev_err(dev, "Recover %d eqes on EQ 0x%x\n", eqe_count, eq->core.eqn); 4133 - sq->channel->stats->eq_rearm++; 4134 - return true; 4135 - } 4136 - 4137 4119 static void mlx5e_tx_timeout_work(struct work_struct *work) 4138 4120 { 4139 4121 struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, 4140 4122 tx_timeout_work); 4141 - struct net_device *dev = priv->netdev; 4142 - bool reopen_channels = false; 4143 - int i, err; 4123 + bool report_failed = false; 4124 + int err; 4125 + int i; 4144 4126 4145 4127 rtnl_lock(); 4146 4128 mutex_lock(&priv->state_lock); ··· 4131 4149 goto unlock; 4132 4150 4133 4151 for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { 4134 - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, i); 4152 + struct netdev_queue *dev_queue = 4153 + netdev_get_tx_queue(priv->netdev, i); 4135 4154 struct mlx5e_txqsq *sq = priv->txq2sq[i]; 4136 4155 4137 4156 if (!netif_xmit_stopped(dev_queue)) 4138 4157 continue; 4139 4158 4140 - netdev_err(dev, 4141 - "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n", 4142 - i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, 4143 - jiffies_to_usecs(jiffies - dev_queue->trans_start)); 4144 - 4145 - /* If we recover a lost interrupt, most likely TX timeout will 4146 - * be resolved, skip reopening channels 4147 - */ 4148 - if (!mlx5e_tx_timeout_eq_recover(dev, sq)) { 4149 - clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); 4150 - reopen_channels = true; 4151 - } 4159 + if (mlx5e_tx_reporter_timeout(sq)) 4160 + report_failed = true; 4152 4161 } 4153 4162 4154 - if (!reopen_channels) 4163 + if (!report_failed) 4155 4164 goto unlock; 4156 4165 4157 - mlx5e_close_locked(dev); 4158 - err = mlx5e_open_locked(dev); 4166 + mlx5e_close_locked(priv->netdev); 4167 + err = mlx5e_open_locked(priv->netdev); 4159 4168 if (err) 4160 4169 netdev_err(priv->netdev, 4161 4170 "mlx5e_open_locked failed recovering from a tx_timeout, err(%d).\n", ··· 4162 4189 struct mlx5e_priv *priv = netdev_priv(dev); 4163 4190 4164 4191 netdev_err(dev, "TX timeout detected\n"); 4192 + 4193 + if (IS_ERR_OR_NULL(priv->tx_reporter)) { 4194 + netdev_err_once(priv->netdev, "tx timeout will not be handled, no valid tx reporter\n"); 4195 + return; 4196 + } 4197 + 4165 4198 queue_work(priv->wq, &priv->tx_timeout_work); 4166 4199 } 4167 4200