Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-mlx5e-reduce-interface-downtime-on-configuration-change'

Tariq Toukan says:

====================
net/mlx5e: Reduce interface downtime on configuration change

This series significantly reduces the interface downtime while swapping
channels during a configuration change, on capable devices.

Here we remove an old requirement on operations ordering that became
obsolete on recent capable devices. This helps cutting the downtime by a
factor of magnitude, ~80% in our example.

Perf numbers:
Measured the number of dropped packets in a simple ping flood test,
during a configuration change operation, that switches the number of
channels from 247 to 248.

Before: 71 packets lost
After: 15 packets lost, ~80% saving.
====================

Link: https://patch.msgid.link/1761831159-1013140-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+97 -44
+3 -1
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 1156 1156 int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey); 1157 1157 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises); 1158 1158 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); 1159 - int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, 1159 + int mlx5e_modify_tirs_lb(struct mlx5_core_dev *mdev, bool enable_uc_lb, 1160 + bool enable_mc_lb); 1161 + int mlx5e_refresh_tirs(struct mlx5_core_dev *mdev, bool enable_uc_lb, 1160 1162 bool enable_mc_lb); 1161 1163 void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc); 1162 1164
+2
drivers/net/ethernet/mellanox/mlx5/core/en/rss.c
··· 231 231 rqtn, rss_inner); 232 232 mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); 233 233 rss_tt = mlx5e_rss_get_tt_config(rss, tt); 234 + mlx5e_tir_builder_build_self_lb_block(builder, rss->params.self_lb_blk, 235 + rss->params.self_lb_blk); 234 236 mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); 235 237 236 238 err = mlx5e_tir_init(tir, builder, rss->mdev, true);
+1
drivers/net/ethernet/mellanox/mlx5/core/en/rss.h
··· 23 23 struct mlx5e_rss_params { 24 24 bool inner_ft_support; 25 25 u32 drop_rqn; 26 + bool self_lb_blk; 26 27 }; 27 28 28 29 struct mlx5e_rss_params_traffic_type
+7
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c
··· 71 71 rss_params = (struct mlx5e_rss_params) { 72 72 .inner_ft_support = inner_ft_support, 73 73 .drop_rqn = res->drop_rqn, 74 + .self_lb_blk = 75 + res->features & MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK, 74 76 }; 75 77 76 78 rss = mlx5e_rss_init(res->mdev, &rss_params, &init_params); ··· 106 104 rss_params = (struct mlx5e_rss_params) { 107 105 .inner_ft_support = inner_ft_support, 108 106 .drop_rqn = res->drop_rqn, 107 + .self_lb_blk = 108 + res->features & MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK, 109 109 }; 110 110 111 111 rss = mlx5e_rss_init(res->mdev, &rss_params, &init_params); ··· 350 346 static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res) 351 347 { 352 348 bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; 349 + bool self_lb_blk = res->features & MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK; 353 350 struct mlx5e_tir_builder *builder; 354 351 int err = 0; 355 352 int ix; ··· 381 376 mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), 382 377 inner_ft_support); 383 378 mlx5e_tir_builder_build_packet_merge(builder, &res->pkt_merge_param); 379 + mlx5e_tir_builder_build_self_lb_block(builder, self_lb_blk, 380 + self_lb_blk); 384 381 mlx5e_tir_builder_build_direct(builder); 385 382 386 383 err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true);
+1
drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h
··· 21 21 MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), 22 22 MLX5E_RX_RES_FEATURE_PTP = BIT(1), 23 23 MLX5E_RX_RES_FEATURE_MULTI_VHCA = BIT(2), 24 + MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK = BIT(3), 24 25 }; 25 26 26 27 /* Setup */
+26 -3
drivers/net/ethernet/mellanox/mlx5/core/en/tir.c
··· 146 146 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); 147 147 } 148 148 149 + static void mlx5e_tir_context_self_lb_block(void *tirc, bool enable_uc_lb, 150 + bool enable_mc_lb) 151 + { 152 + u8 lb_flags = 0; 153 + 154 + if (enable_uc_lb) 155 + lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; 156 + if (enable_mc_lb) 157 + lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; 158 + 159 + MLX5_SET(tirc, tirc, self_lb_block, lb_flags); 160 + } 161 + 162 + void mlx5e_tir_builder_build_self_lb_block(struct mlx5e_tir_builder *builder, 163 + bool enable_uc_lb, 164 + bool enable_mc_lb) 165 + { 166 + void *tirc = mlx5e_tir_builder_get_tirc(builder); 167 + 168 + if (builder->modify) 169 + MLX5_SET(modify_tir_in, builder->in, bitmask.self_lb_en, 1); 170 + 171 + mlx5e_tir_context_self_lb_block(tirc, enable_uc_lb, enable_mc_lb); 172 + } 173 + 149 174 void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder) 150 175 { 151 176 void *tirc = mlx5e_tir_builder_get_tirc(builder); ··· 178 153 WARN_ON(builder->modify); 179 154 180 155 MLX5_SET(tirc, tirc, tls_en, 1); 181 - MLX5_SET(tirc, tirc, self_lb_block, 182 - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST | 183 - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST); 156 + mlx5e_tir_context_self_lb_block(tirc, true, true); 184 157 } 185 158 186 159 int mlx5e_tir_init(struct mlx5e_tir *tir, struct mlx5e_tir_builder *builder,
+3
drivers/net/ethernet/mellanox/mlx5/core/en/tir.h
··· 35 35 const struct mlx5e_rss_params_traffic_type *rss_tt, 36 36 bool inner); 37 37 void mlx5e_tir_builder_build_direct(struct mlx5e_tir_builder *builder); 38 + void mlx5e_tir_builder_build_self_lb_block(struct mlx5e_tir_builder *builder, 39 + bool enable_uc_lb, 40 + bool enable_mc_lb); 38 41 void mlx5e_tir_builder_build_tls(struct mlx5e_tir_builder *builder); 39 42 40 43 struct mlx5_core_dev;
+25 -27
drivers/net/ethernet/mellanox/mlx5/core/en_common.c
··· 247 247 memset(res, 0, sizeof(*res)); 248 248 } 249 249 250 - int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb, 251 - bool enable_mc_lb) 250 + int mlx5e_modify_tirs_lb(struct mlx5_core_dev *mdev, bool enable_uc_lb, 251 + bool enable_mc_lb) 252 252 { 253 - struct mlx5_core_dev *mdev = priv->mdev; 253 + struct mlx5e_tir_builder *builder; 254 254 struct mlx5e_tir *tir; 255 - u8 lb_flags = 0; 256 - int err = 0; 257 - u32 tirn = 0; 258 - int inlen; 259 - void *in; 255 + int err = 0; 260 256 261 - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); 262 - in = kvzalloc(inlen, GFP_KERNEL); 263 - if (!in) 257 + builder = mlx5e_tir_builder_alloc(true); 258 + if (!builder) 264 259 return -ENOMEM; 265 260 266 - if (enable_uc_lb) 267 - lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST; 268 - 269 - if (enable_mc_lb) 270 - lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST; 271 - 272 - if (lb_flags) 273 - MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags); 274 - 275 - MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); 261 + mlx5e_tir_builder_build_self_lb_block(builder, enable_uc_lb, 262 + enable_mc_lb); 276 263 277 264 mutex_lock(&mdev->mlx5e_res.hw_objs.td.list_lock); 278 265 list_for_each_entry(tir, &mdev->mlx5e_res.hw_objs.td.tirs_list, list) { 279 - tirn = tir->tirn; 280 - err = mlx5_core_modify_tir(mdev, tirn, in); 281 - if (err) 266 + err = mlx5e_tir_modify(tir, builder); 267 + if (err) { 268 + mlx5_core_err(mdev, 269 + "modify tir(0x%x) enable_lb uc(%d) mc(%d) failed, %d\n", 270 + mlx5e_tir_get_tirn(tir), 271 + enable_uc_lb, enable_mc_lb, err); 282 272 break; 273 + } 283 274 } 284 275 mutex_unlock(&mdev->mlx5e_res.hw_objs.td.list_lock); 285 276 286 - kvfree(in); 287 - if (err) 288 - netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); 277 + mlx5e_tir_builder_free(builder); 289 278 290 279 return err; 280 + } 281 + 282 + int mlx5e_refresh_tirs(struct mlx5_core_dev *mdev, bool enable_uc_lb, 283 + bool enable_mc_lb) 284 + { 285 + if (MLX5_CAP_GEN(mdev, tis_tir_td_order)) 286 + return 0; /* refresh not needed */ 287 + 288 + return mlx5e_modify_tirs_lb(mdev, enable_uc_lb, enable_mc_lb); 291 289 }
+22 -9
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 3356 3356 } 3357 3357 3358 3358 static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv, 3359 + struct mlx5e_channels *old_chs, 3359 3360 struct mlx5e_channels *new_chs, 3360 3361 mlx5e_fp_preactivate preactivate, 3361 3362 void *context) 3362 3363 { 3363 3364 struct net_device *netdev = priv->netdev; 3364 - struct mlx5e_channels old_chs; 3365 3365 int carrier_ok; 3366 3366 int err = 0; 3367 3367 ··· 3370 3370 3371 3371 mlx5e_deactivate_priv_channels(priv); 3372 3372 3373 - old_chs = priv->channels; 3374 3373 priv->channels = *new_chs; 3375 3374 3376 3375 /* New channels are ready to roll, call the preactivate hook if needed ··· 3378 3379 if (preactivate) { 3379 3380 err = preactivate(priv, context); 3380 3381 if (err) { 3381 - priv->channels = old_chs; 3382 + priv->channels = *old_chs; 3382 3383 goto out; 3383 3384 } 3384 3385 } 3385 3386 3386 - mlx5e_close_channels(&old_chs); 3387 + if (!MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) 3388 + mlx5e_close_channels(old_chs); 3387 3389 priv->profile->update_rx(priv); 3388 3390 3389 3391 mlx5e_selq_apply(&priv->selq); ··· 3403 3403 mlx5e_fp_preactivate preactivate, 3404 3404 void *context, bool reset) 3405 3405 { 3406 - struct mlx5e_channels *new_chs; 3406 + struct mlx5e_channels *old_chs, *new_chs; 3407 3407 int err; 3408 3408 3409 3409 reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); 3410 3410 if (!reset) 3411 3411 return mlx5e_switch_priv_params(priv, params, preactivate, context); 3412 3412 3413 + old_chs = kzalloc(sizeof(*old_chs), GFP_KERNEL); 3413 3414 new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL); 3414 - if (!new_chs) 3415 - return -ENOMEM; 3415 + if (!old_chs || !new_chs) { 3416 + err = -ENOMEM; 3417 + goto err_free_chs; 3418 + } 3419 + 3416 3420 new_chs->params = *params; 3417 3421 3418 3422 mlx5e_selq_prepare_params(&priv->selq, &new_chs->params); ··· 3425 3421 if (err) 3426 3422 goto err_cancel_selq; 3427 3423 3428 - err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); 3424 + *old_chs = priv->channels; 3425 + 3426 + err = mlx5e_switch_priv_channels(priv, old_chs, new_chs, 3427 + preactivate, context); 3429 3428 if (err) 3430 3429 goto err_close; 3431 3430 3431 + if (MLX5_CAP_GEN(priv->mdev, tis_tir_td_order)) 3432 + mlx5e_close_channels(old_chs); 3433 + 3432 3434 kfree(new_chs); 3435 + kfree(old_chs); 3433 3436 return 0; 3434 3437 3435 3438 err_close: ··· 3444 3433 3445 3434 err_cancel_selq: 3446 3435 mlx5e_selq_cancel(&priv->selq); 3436 + err_free_chs: 3447 3437 kfree(new_chs); 3438 + kfree(old_chs); 3448 3439 return err; 3449 3440 } 3450 3441 ··· 6149 6136 6150 6137 static int mlx5e_update_nic_rx(struct mlx5e_priv *priv) 6151 6138 { 6152 - return mlx5e_refresh_tirs(priv, false, false); 6139 + return mlx5e_refresh_tirs(priv->mdev, false, false); 6153 6140 } 6154 6141 6155 6142 static const struct mlx5e_profile mlx5e_nic_profile = {
+2 -2
drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
··· 214 214 return err; 215 215 } 216 216 217 - err = mlx5e_refresh_tirs(priv, true, false); 217 + err = mlx5e_modify_tirs_lb(priv->mdev, true, false); 218 218 if (err) 219 219 goto out; 220 220 ··· 243 243 mlx5_nic_vport_update_local_lb(priv->mdev, false); 244 244 245 245 dev_remove_pack(&lbtp->pt); 246 - mlx5e_refresh_tirs(priv, false, false); 246 + mlx5e_modify_tirs_lb(priv->mdev, false, false); 247 247 } 248 248 249 249 static int mlx5e_cond_loopback(struct mlx5e_priv *priv)
+5 -2
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
··· 334 334 335 335 int mlx5i_update_nic_rx(struct mlx5e_priv *priv) 336 336 { 337 - return mlx5e_refresh_tirs(priv, true, true); 337 + return mlx5e_refresh_tirs(priv->mdev, true, true); 338 338 } 339 339 340 340 int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn) ··· 427 427 static int mlx5i_init_rx(struct mlx5e_priv *priv) 428 428 { 429 429 struct mlx5_core_dev *mdev = priv->mdev; 430 + enum mlx5e_rx_res_features features; 430 431 int err; 431 432 432 433 priv->fs = mlx5e_fs_init(priv->profile, mdev, ··· 446 445 goto err_destroy_q_counters; 447 446 } 448 447 449 - priv->rx_res = mlx5e_rx_res_create(priv->mdev, 0, priv->max_nch, priv->drop_rq.rqn, 448 + features = MLX5E_RX_RES_FEATURE_SELF_LB_BLOCK; 449 + priv->rx_res = mlx5e_rx_res_create(priv->mdev, features, priv->max_nch, 450 + priv->drop_rq.rqn, 450 451 &priv->channels.params.packet_merge, 451 452 priv->channels.params.num_channels); 452 453 if (IS_ERR(priv->rx_res)) {