Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mlx5-misc-fixes-2025-08-25'

Mark Bloch says:

====================
mlx5 misc fixes 2025-08-25

This patchset provides misc bug fixes from the team to the mlx5 core
and Eth drivers.

v1: https://lore.kernel.org/20250824083944.523858-1-mbloch@nvidia.com
====================

Link: https://patch.msgid.link/20250825143435.598584-1-mbloch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+139 -70
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
··· 160 160 if (err) 161 161 return err; 162 162 163 - mlx5_unload_one_devl_locked(dev, true); 163 + mlx5_sync_reset_unload_flow(dev, true); 164 164 err = mlx5_health_wait_pci_up(dev); 165 165 if (err) 166 166 NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset");
+2 -1
drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
··· 575 575 if (err) 576 576 return err; 577 577 } 578 - priv->dcbx.xoff = xoff; 579 578 580 579 /* Apply the settings */ 581 580 if (update_buffer) { ··· 582 583 if (err) 583 584 return err; 584 585 } 586 + 587 + priv->dcbx.xoff = xoff; 585 588 586 589 if (update_prio2buffer) 587 590 err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer);
+12
drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h
··· 66 66 struct mlx5e_bufferx_reg buffer[MLX5E_MAX_NETWORK_BUFFER]; 67 67 }; 68 68 69 + #ifdef CONFIG_MLX5_CORE_EN_DCB 69 70 int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, 70 71 u32 change, unsigned int mtu, 71 72 struct ieee_pfc *pfc, 72 73 u32 *buffer_size, 73 74 u8 *prio2buffer); 75 + #else 76 + static inline int 77 + mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, 78 + u32 change, unsigned int mtu, 79 + void *pfc, 80 + u32 *buffer_size, 81 + u8 *prio2buffer) 82 + { 83 + return 0; 84 + } 85 + #endif 74 86 75 87 int mlx5e_port_query_buffer(struct mlx5e_priv *priv, 76 88 struct mlx5e_port_buffer *port_buffer);
+18 -1
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 49 49 #include "en.h" 50 50 #include "en/dim.h" 51 51 #include "en/txrx.h" 52 + #include "en/port_buffer.h" 52 53 #include "en_tc.h" 53 54 #include "en_rep.h" 54 55 #include "en_accel/ipsec.h" ··· 139 138 if (up) { 140 139 netdev_info(priv->netdev, "Link up\n"); 141 140 netif_carrier_on(priv->netdev); 141 + mlx5e_port_manual_buffer_config(priv, 0, priv->netdev->mtu, 142 + NULL, NULL, NULL); 142 143 } else { 143 144 netdev_info(priv->netdev, "Link down\n"); 144 145 netif_carrier_off(priv->netdev); ··· 3043 3040 struct mlx5e_params *params = &priv->channels.params; 3044 3041 struct net_device *netdev = priv->netdev; 3045 3042 struct mlx5_core_dev *mdev = priv->mdev; 3046 - u16 mtu; 3043 + u16 mtu, prev_mtu; 3047 3044 int err; 3045 + 3046 + mlx5e_query_mtu(mdev, params, &prev_mtu); 3048 3047 3049 3048 err = mlx5e_set_mtu(mdev, params, params->sw_mtu); 3050 3049 if (err) ··· 3056 3051 if (mtu != params->sw_mtu) 3057 3052 netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n", 3058 3053 __func__, mtu, params->sw_mtu); 3054 + 3055 + if (mtu != prev_mtu && MLX5_BUFFER_SUPPORTED(mdev)) { 3056 + err = mlx5e_port_manual_buffer_config(priv, 0, mtu, 3057 + NULL, NULL, NULL); 3058 + if (err) { 3059 + netdev_warn(netdev, "%s: Failed to set Xon/Xoff values with MTU %d (err %d), setting back to previous MTU %d\n", 3060 + __func__, mtu, err, prev_mtu); 3061 + 3062 + mlx5e_set_mtu(mdev, params, prev_mtu); 3063 + return err; 3064 + } 3065 + } 3059 3066 3060 3067 params->sw_mtu = mtu; 3061 3068 return 0;
+7 -8
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
··· 3734 3734 char *value = val.vstr; 3735 3735 u8 eswitch_mode; 3736 3736 3737 + eswitch_mode = mlx5_eswitch_mode(dev); 3738 + if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { 3739 + NL_SET_ERR_MSG_FMT_MOD(extack, 3740 + "Changing fs mode is not supported when eswitch offloads enabled."); 3741 + return -EOPNOTSUPP; 3742 + } 3743 + 3737 3744 if (!strcmp(value, "dmfs")) 3738 3745 return 0; 3739 3746 ··· 3764 3757 NL_SET_ERR_MSG_MOD(extack, 3765 3758 "Bad parameter: supported values are [\"dmfs\", \"smfs\", \"hmfs\"]"); 3766 3759 return -EINVAL; 3767 - } 3768 - 3769 - eswitch_mode = mlx5_eswitch_mode(dev); 3770 - if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { 3771 - NL_SET_ERR_MSG_FMT_MOD(extack, 3772 - "Moving to %s is not supported when eswitch offloads enabled.", 3773 - value); 3774 - return -EOPNOTSUPP; 3775 3760 } 3776 3761 3777 3762 return 0;
+76 -56
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
··· 6 6 #include "fw_reset.h" 7 7 #include "diag/fw_tracer.h" 8 8 #include "lib/tout.h" 9 + #include "sf/sf.h" 9 10 10 11 enum { 11 12 MLX5_FW_RESET_FLAGS_RESET_REQUESTED, 12 13 MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, 13 14 MLX5_FW_RESET_FLAGS_PENDING_COMP, 14 15 MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, 15 - MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED 16 + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, 17 + MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, 16 18 }; 17 19 18 20 struct mlx5_fw_reset { ··· 221 219 return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); 222 220 } 223 221 224 - static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) 222 + static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) 225 223 { 226 224 struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; 227 225 struct devlink *devlink = priv_to_devlink(dev); ··· 230 228 if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { 231 229 complete(&fw_reset->done); 232 230 } else { 233 - if (!unloaded) 234 - mlx5_unload_one(dev, false); 231 + mlx5_sync_reset_unload_flow(dev, false); 235 232 if (mlx5_health_wait_pci_up(dev)) 236 233 mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); 237 234 else ··· 273 272 274 273 mlx5_sync_reset_clear_reset_requested(dev, false); 275 274 mlx5_enter_error_state(dev, true); 276 - mlx5_fw_reset_complete_reload(dev, false); 275 + mlx5_fw_reset_complete_reload(dev); 277 276 } 278 277 279 278 #define MLX5_RESET_POLL_INTERVAL (HZ / 10) ··· 426 425 427 426 if (!MLX5_CAP_GEN(dev, fast_teardown)) { 428 427 mlx5_core_warn(dev, "fast teardown is not supported by firmware\n"); 428 + return false; 429 + } 430 + 431 + if (!mlx5_core_is_ecpf(dev) && !mlx5_sf_table_empty(dev)) { 432 + mlx5_core_warn(dev, "SFs should be removed before reset\n"); 429 433 return false; 430 434 } 431 435 ··· 592 586 return err; 593 587 } 594 588 595 - static void mlx5_sync_reset_now_event(struct work_struct *work) 589 + void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked) 596 590 { 597 - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, 598 - reset_now_work); 599 - struct mlx5_core_dev *dev = fw_reset->dev; 600 - int err; 601 - 602 - if (mlx5_sync_reset_clear_reset_requested(dev, false)) 603 - return; 604 - 605 - mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); 606 - 607 - err = mlx5_cmd_fast_teardown_hca(dev); 608 - if (err) { 609 - mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); 610 - goto done; 611 - } 612 - 613 - err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); 614 - if (err) { 615 - mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err); 616 - set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); 617 - } 618 - 619 - mlx5_enter_error_state(dev, true); 620 - done: 621 - fw_reset->ret = err; 622 - mlx5_fw_reset_complete_reload(dev, false); 623 - } 624 - 625 - static void mlx5_sync_reset_unload_event(struct work_struct *work) 626 - { 627 - struct mlx5_fw_reset *fw_reset; 628 - struct mlx5_core_dev *dev; 591 + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; 629 592 unsigned long timeout; 630 593 int poll_freq = 20; 631 594 bool reset_action; 632 595 u8 rst_state; 633 596 int err; 634 597 635 - fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); 636 - dev = fw_reset->dev; 637 - 638 - if (mlx5_sync_reset_clear_reset_requested(dev, false)) 639 - return; 640 - 641 - mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); 642 - 643 - err = mlx5_cmd_fast_teardown_hca(dev); 644 - if (err) 645 - mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); 646 - else 647 - mlx5_enter_error_state(dev, true); 648 - 649 - if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) 598 + if (locked) 650 599 mlx5_unload_one_devl_locked(dev, false); 651 600 else 652 601 mlx5_unload_one(dev, false); 602 + 603 + if (!test_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags)) 604 + return; 653 605 654 606 mlx5_set_fw_rst_ack(dev); 655 607 mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); ··· 636 672 goto done; 637 673 } 638 674 639 - mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); 675 + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", 676 + rst_state); 640 677 if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { 641 678 err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); 642 679 if (err) { 643 - mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err); 680 + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", 681 + err); 644 682 fw_reset->ret = err; 645 683 } 646 684 } 647 685 648 686 done: 649 - mlx5_fw_reset_complete_reload(dev, true); 687 + clear_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); 688 + } 689 + 690 + static void mlx5_sync_reset_now_event(struct work_struct *work) 691 + { 692 + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, 693 + reset_now_work); 694 + struct mlx5_core_dev *dev = fw_reset->dev; 695 + int err; 696 + 697 + if (mlx5_sync_reset_clear_reset_requested(dev, false)) 698 + return; 699 + 700 + mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); 701 + 702 + err = mlx5_cmd_fast_teardown_hca(dev); 703 + if (err) { 704 + mlx5_core_warn(dev, "Fast teardown failed, no reset done, err %d\n", err); 705 + goto done; 706 + } 707 + 708 + err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); 709 + if (err) { 710 + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err); 711 + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); 712 + } 713 + 714 + mlx5_enter_error_state(dev, true); 715 + done: 716 + fw_reset->ret = err; 717 + mlx5_fw_reset_complete_reload(dev); 718 + } 719 + 720 + static void mlx5_sync_reset_unload_event(struct work_struct *work) 721 + { 722 + struct mlx5_fw_reset *fw_reset; 723 + struct mlx5_core_dev *dev; 724 + int err; 725 + 726 + fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); 727 + dev = fw_reset->dev; 728 + 729 + if (mlx5_sync_reset_clear_reset_requested(dev, false)) 730 + return; 731 + 732 + set_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); 733 + mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); 734 + 735 + err = mlx5_cmd_fast_teardown_hca(dev); 736 + if (err) 737 + mlx5_core_warn(dev, "Fast teardown failed, unloading, err %d\n", err); 738 + else 739 + mlx5_enter_error_state(dev, true); 740 + 741 + mlx5_fw_reset_complete_reload(dev); 650 742 } 651 743 652 744 static void mlx5_sync_reset_abort_event(struct work_struct *work)
+1
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
··· 12 12 int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); 13 13 14 14 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); 15 + void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked); 15 16 int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, 16 17 struct netlink_ext_ack *extack); 17 18 void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev);
+10
drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c
··· 518 518 WARN_ON(!xa_empty(&table->function_ids)); 519 519 kfree(table); 520 520 } 521 + 522 + bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) 523 + { 524 + struct mlx5_sf_table *table = dev->priv.sf_table; 525 + 526 + if (!table) 527 + return true; 528 + 529 + return xa_empty(&table->function_ids); 530 + }
+6
drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h
··· 17 17 18 18 int mlx5_sf_table_init(struct mlx5_core_dev *dev); 19 19 void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); 20 + bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev); 20 21 21 22 int mlx5_devlink_sf_port_new(struct devlink *devlink, 22 23 const struct devlink_port_new_attrs *add_attr, ··· 60 59 61 60 static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) 62 61 { 62 + } 63 + 64 + static inline bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) 65 + { 66 + return true; 63 67 } 64 68 65 69 #endif
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
··· 117 117 mlx5hws_err(ctx, "No such stc_type: %d\n", stc_type); 118 118 pr_warn("HWS: Invalid stc_type: %d\n", stc_type); 119 119 ret = -EINVAL; 120 - goto unlock_and_out; 120 + goto free_shared_stc; 121 121 } 122 122 123 123 ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type,
+4 -2
drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
··· 279 279 return ret; 280 280 281 281 clean_pattern: 282 - mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, *pattern_id); 282 + mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, ptrn_id); 283 283 out_unlock: 284 284 mutex_unlock(&ctx->pattern_cache->lock); 285 285 return ret; ··· 527 527 u32 *nop_locations, __be64 *new_pat) 528 528 { 529 529 u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD; 530 - u16 src_field, dst_field; 531 530 u8 action_type; 532 531 bool dependent; 533 532 size_t i, j; ··· 538 539 return 0; 539 540 540 541 for (i = 0, j = 0; i < num_actions; i++, j++) { 542 + u16 src_field = INVALID_FIELD; 543 + u16 dst_field = INVALID_FIELD; 544 + 541 545 if (j >= max_actions) 542 546 return -EINVAL; 543 547
+1
drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
··· 124 124 mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n", 125 125 pool->type, pool->alloc_log_sz); 126 126 mlx5hws_buddy_cleanup(buddy); 127 + kfree(buddy); 127 128 return -ENOMEM; 128 129 } 129 130