Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/mlx5: Add support for devlink reload action fw activate

Add support for devlink reload action fw_activate. To activate firmware
image the mlx5 driver resets the firmware and reloads it from flash. If
a new image was stored on flash it will be loaded. Once this reload
command is executed the driver initiates fw sync reset flow, where the
firmware synchronizes all PFs on coming reset and driver reload.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Moshe Shemesh and committed by
Jakub Kicinski
5ec69744 7dd6df32

+108 -11
+54 -5
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
··· 4 4 #include <devlink.h> 5 5 6 6 #include "mlx5_core.h" 7 + #include "fw_reset.h" 7 8 #include "fs_core.h" 8 9 #include "eswitch.h" 9 10 ··· 85 84 return 0; 86 85 } 87 86 87 + static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netlink_ext_ack *extack) 88 + { 89 + struct mlx5_core_dev *dev = devlink_priv(devlink); 90 + u8 reset_level, reset_type, net_port_alive; 91 + int err; 92 + 93 + err = mlx5_fw_reset_query(dev, &reset_level, &reset_type); 94 + if (err) 95 + return err; 96 + if (!(reset_level & MLX5_MFRL_REG_RESET_LEVEL3)) { 97 + NL_SET_ERR_MSG_MOD(extack, "FW activate requires reboot"); 98 + return -EINVAL; 99 + } 100 + 101 + net_port_alive = !!(reset_type & MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE); 102 + err = mlx5_fw_reset_set_reset_sync(dev, net_port_alive); 103 + if (err) 104 + goto out; 105 + 106 + err = mlx5_fw_reset_wait_reset_done(dev); 107 + out: 108 + if (err) 109 + NL_SET_ERR_MSG_MOD(extack, "FW activate command failed"); 110 + return err; 111 + } 112 + 88 113 static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, 89 114 enum devlink_reload_action action, 90 115 enum devlink_reload_limit limit, ··· 118 91 { 119 92 struct mlx5_core_dev *dev = devlink_priv(devlink); 120 93 121 - mlx5_unload_one(dev, false); 122 - return 0; 94 + switch (action) { 95 + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: 96 + mlx5_unload_one(dev, false); 97 + return 0; 98 + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: 99 + return mlx5_devlink_reload_fw_activate(devlink, extack); 100 + default: 101 + /* Unsupported action should not get to this function */ 102 + WARN_ON(1); 103 + return -EOPNOTSUPP; 104 + } 123 105 } 124 106 125 107 static int mlx5_devlink_reload_up(struct devlink *devlink, enum devlink_reload_action action, ··· 137 101 { 138 102 struct mlx5_core_dev *dev = devlink_priv(devlink); 139 103 140 - *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); 141 - return mlx5_load_one(dev, false); 104 + *actions_performed = BIT(action); 105 + switch (action) { 106 + case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: 107 + case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: 108 + /* On fw_activate action, also driver is reloaded and reinit performed */ 109 + *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); 110 + return mlx5_load_one(dev, false); 111 + default: 112 + /* Unsupported action should not get to this function */ 113 + WARN_ON(1); 114 + return -EOPNOTSUPP; 115 + } 116 + 117 + return 0; 142 118 } 143 119 144 120 static const struct devlink_ops mlx5_devlink_ops = { ··· 166 118 #endif 167 119 .flash_update = mlx5_devlink_flash_update, 168 120 .info_get = mlx5_devlink_info_get, 169 - .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT), 121 + .reload_actions = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | 122 + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE), 170 123 .reload_down = mlx5_devlink_reload_down, 171 124 .reload_up = mlx5_devlink_reload_up, 172 125 };
+53 -6
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
··· 5 5 6 6 enum { 7 7 MLX5_FW_RESET_FLAGS_RESET_REQUESTED, 8 + MLX5_FW_RESET_FLAGS_PENDING_COMP 8 9 }; 9 10 10 11 struct mlx5_fw_reset { ··· 18 17 struct work_struct reset_abort_work; 19 18 unsigned long reset_flags; 20 19 struct timer_list timer; 20 + struct completion done; 21 + int ret; 21 22 }; 22 23 23 24 static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, ··· 61 58 62 59 int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel) 63 60 { 64 - return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true); 61 + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; 62 + int err; 63 + 64 + set_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); 65 + err = mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, reset_type_sel, 0, true); 66 + if (err) 67 + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); 68 + return err; 65 69 } 66 70 67 71 int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) ··· 76 66 return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); 77 67 } 78 68 69 + static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) 70 + { 71 + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; 72 + 73 + /* if this is the driver that initiated the fw reset, devlink completed the reload */ 74 + if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { 75 + complete(&fw_reset->done); 76 + } else { 77 + mlx5_load_one(dev, false); 78 + devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, 79 + BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | 80 + BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); 81 + } 82 + } 83 + 79 84 static void mlx5_sync_reset_reload_work(struct work_struct *work) 80 85 { 81 86 struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, 82 87 reset_reload_work); 83 88 struct mlx5_core_dev *dev = fw_reset->dev; 89 + int err; 84 90 85 91 mlx5_enter_error_state(dev, true); 86 92 mlx5_unload_one(dev, false); 87 - if (mlx5_health_wait_pci_up(dev)) { 93 + err = mlx5_health_wait_pci_up(dev); 94 + if (err) 88 95 mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); 89 - return; 90 - } 91 - mlx5_load_one(dev, false); 96 + fw_reset->ret = err; 97 + mlx5_fw_reset_complete_reload(dev); 92 98 } 93 99 94 100 static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) ··· 295 269 mlx5_enter_error_state(dev, true); 296 270 mlx5_unload_one(dev, false); 297 271 done: 298 - mlx5_load_one(dev, false); 272 + fw_reset->ret = err; 273 + mlx5_fw_reset_complete_reload(dev); 299 274 } 300 275 301 276 static void mlx5_sync_reset_abort_event(struct work_struct *work) ··· 345 318 return NOTIFY_OK; 346 319 } 347 320 321 + #define MLX5_FW_RESET_TIMEOUT_MSEC 5000 322 + int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) 323 + { 324 + unsigned long timeout = msecs_to_jiffies(MLX5_FW_RESET_TIMEOUT_MSEC); 325 + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; 326 + int err; 327 + 328 + if (!wait_for_completion_timeout(&fw_reset->done, timeout)) { 329 + mlx5_core_warn(dev, "FW sync reset timeout after %d seconds\n", 330 + MLX5_FW_RESET_TIMEOUT_MSEC / 1000); 331 + err = -ETIMEDOUT; 332 + goto out; 333 + } 334 + err = fw_reset->ret; 335 + out: 336 + clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); 337 + return err; 338 + } 339 + 348 340 void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev) 349 341 { 350 342 struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; ··· 397 351 INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event); 398 352 INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event); 399 353 354 + init_completion(&fw_reset->done); 400 355 return 0; 401 356 } 402 357
+1
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
··· 10 10 int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel); 11 11 int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); 12 12 13 + int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); 13 14 void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); 14 15 void mlx5_fw_reset_events_stop(struct mlx5_core_dev *dev); 15 16 int mlx5_fw_reset_init(struct mlx5_core_dev *dev);