Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mlx5-misc-fixes-2025-12-09'

Tariq Toukan says:

====================
mlx5 misc fixes 2025-12-09

This patchset provides misc bug fixes from the team to the mlx5 core and
Eth drivers.
====================

Link: https://patch.msgid.link/1765284977-1363052-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+152 -23
+5
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
··· 197 197 struct pci_dev *pdev = dev->pdev; 198 198 int ret = 0; 199 199 200 + if (mlx5_fw_reset_in_progress(dev)) { 201 + NL_SET_ERR_MSG_MOD(extack, "Can't reload during firmware reset"); 202 + return -EBUSY; 203 + } 204 + 200 205 if (mlx5_dev_is_lightweight(dev)) { 201 206 if (action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) 202 207 return -EOPNOTSUPP;
+84 -13
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
··· 33 33 #include "lib/eq.h" 34 34 #include "fw_tracer.h" 35 35 #include "fw_tracer_tracepoint.h" 36 + #include <linux/ctype.h> 36 37 37 38 static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) 38 39 { ··· 359 358 static const char *REPLACE_64_VAL_PARM = "%x%x"; 360 359 static const char *PARAM_CHAR = "%"; 361 360 361 + static bool mlx5_is_valid_spec(const char *str) 362 + { 363 + /* Parse format specifiers to find the actual type. 364 + * Structure: %[flags][width][.precision][length]type 365 + * Skip flags, width, precision & length. 366 + */ 367 + while (isdigit(*str) || *str == '#' || *str == '.' || *str == 'l') 368 + str++; 369 + 370 + /* Check if it's a valid integer/hex specifier or %%: 371 + * Valid formats: %x, %d, %i, %u, etc. 372 + */ 373 + if (*str != 'x' && *str != 'X' && *str != 'd' && *str != 'i' && 374 + *str != 'u' && *str != 'c' && *str != '%') 375 + return false; 376 + 377 + return true; 378 + } 379 + 380 + static bool mlx5_tracer_validate_params(const char *str) 381 + { 382 + const char *substr = str; 383 + 384 + if (!str) 385 + return false; 386 + 387 + substr = strstr(substr, PARAM_CHAR); 388 + while (substr) { 389 + if (!mlx5_is_valid_spec(substr + 1)) 390 + return false; 391 + 392 + if (*(substr + 1) == '%') 393 + substr = strstr(substr + 2, PARAM_CHAR); 394 + else 395 + substr = strstr(substr + 1, PARAM_CHAR); 396 + 397 + } 398 + 399 + return true; 400 + } 401 + 362 402 static int mlx5_tracer_message_hash(u32 message_id) 363 403 { 364 404 return jhash_1word(message_id, 0) & (MESSAGE_HASH_SIZE - 1); ··· 461 419 char *substr, *pstr = str; 462 420 int num_of_params = 0; 463 421 422 + /* Validate that all parameters are valid before processing */ 423 + if (!mlx5_tracer_validate_params(str)) 424 + return -EINVAL; 425 + 464 426 /* replace %llx with %x%x */ 465 427 substr = strstr(pstr, VAL_PARM); 466 428 while (substr) { ··· 473 427 substr = strstr(pstr, VAL_PARM); 474 428 } 475 429 476 - /* count all the % characters */ 430 + /* count all the % characters, but skip %% (escaped percent) */ 477 431 substr = strstr(str, PARAM_CHAR); 478 432 while (substr) { 479 - num_of_params += 1; 480 - str = substr + 1; 433 + if (*(substr + 1) != '%') { 434 + num_of_params += 1; 435 + str = substr + 1; 436 + } else { 437 + str = substr + 2; 438 + } 481 439 substr = strstr(str, PARAM_CHAR); 482 440 } 483 441 ··· 620 570 { 621 571 char tmp[512]; 622 572 623 - snprintf(tmp, sizeof(tmp), str_frmt->string, 624 - str_frmt->params[0], 625 - str_frmt->params[1], 626 - str_frmt->params[2], 627 - str_frmt->params[3], 628 - str_frmt->params[4], 629 - str_frmt->params[5], 630 - str_frmt->params[6]); 573 + if (str_frmt->invalid_string) 574 + snprintf(tmp, sizeof(tmp), "BAD_FORMAT: %s", str_frmt->string); 575 + else 576 + snprintf(tmp, sizeof(tmp), str_frmt->string, 577 + str_frmt->params[0], 578 + str_frmt->params[1], 579 + str_frmt->params[2], 580 + str_frmt->params[3], 581 + str_frmt->params[4], 582 + str_frmt->params[5], 583 + str_frmt->params[6]); 631 584 632 585 trace_mlx5_fw(dev->tracer, trace_timestamp, str_frmt->lost, 633 586 str_frmt->event_id, tmp); ··· 662 609 return 0; 663 610 } 664 611 612 + static void mlx5_tracer_handle_bad_format_string(struct mlx5_fw_tracer *tracer, 613 + struct tracer_string_format *cur_string) 614 + { 615 + cur_string->invalid_string = true; 616 + list_add_tail(&cur_string->list, &tracer->ready_strings_list); 617 + } 618 + 665 619 static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, 666 620 struct tracer_event *tracer_event) 667 621 { ··· 679 619 if (!cur_string) 680 620 return mlx5_tracer_handle_raw_string(tracer, tracer_event); 681 621 682 - cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); 683 - cur_string->last_param_num = 0; 684 622 cur_string->event_id = tracer_event->event_id; 685 623 cur_string->tmsn = tracer_event->string_event.tmsn; 686 624 cur_string->timestamp = tracer_event->string_event.timestamp; 687 625 cur_string->lost = tracer_event->lost_event; 626 + cur_string->last_param_num = 0; 627 + cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); 628 + if (cur_string->num_of_params < 0) { 629 + pr_debug("%s Invalid format string parameters\n", 630 + __func__); 631 + mlx5_tracer_handle_bad_format_string(tracer, cur_string); 632 + return 0; 633 + } 688 634 if (cur_string->num_of_params == 0) /* trace with no params */ 689 635 list_add_tail(&cur_string->list, &tracer->ready_strings_list); 690 636 } else { ··· 699 633 pr_debug("%s Got string event for unknown string tmsn: %d\n", 700 634 __func__, tracer_event->string_event.tmsn); 701 635 return mlx5_tracer_handle_raw_string(tracer, tracer_event); 636 + } 637 + if (cur_string->num_of_params < 0) { 638 + pr_debug("%s string parameter of invalid string, dumping\n", 639 + __func__); 640 + return 0; 702 641 } 703 642 cur_string->last_param_num += 1; 704 643 if (cur_string->last_param_num > TRACER_MAX_PARAMS) {
+1
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h
··· 125 125 struct list_head list; 126 126 u32 timestamp; 127 127 bool lost; 128 + bool invalid_string; 128 129 }; 129 130 130 131 enum mlx5_fw_tracer_ownership_state {
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/en.h
··· 69 69 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) 70 70 #define MLX5E_METADATA_ETHER_LEN 8 71 71 72 - #define MLX5E_ETH_HARD_MTU (ETH_HLEN + PSP_ENCAP_HLEN + PSP_TRL_SIZE + VLAN_HLEN + ETH_FCS_LEN) 72 + #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) 73 73 74 74 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) 75 75 #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
+5 -3
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
··· 342 342 rt_dst_entry = &rt->dst; 343 343 break; 344 344 case AF_INET6: 345 - rt_dst_entry = ipv6_stub->ipv6_dst_lookup_flow( 346 - dev_net(netdev), NULL, &fl6, NULL); 347 - if (IS_ERR(rt_dst_entry)) 345 + if (!IS_ENABLED(CONFIG_IPV6) || 346 + ip6_dst_lookup(dev_net(netdev), NULL, &rt_dst_entry, &fl6)) 348 347 goto neigh; 349 348 break; 350 349 default: ··· 358 359 359 360 neigh_ha_snapshot(addr, n, netdev); 360 361 ether_addr_copy(dst, addr); 362 + if (attrs->dir == XFRM_DEV_OFFLOAD_OUT && 363 + is_zero_ether_addr(addr)) 364 + neigh_event_send(n, NULL); 361 365 dst_release(rt_dst_entry); 362 366 neigh_release(n); 363 367 return;
+5 -1
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
··· 939 939 sq->dma_fifo_cc = dma_fifo_cc; 940 940 sq->cc = sqcc; 941 941 942 - netdev_tx_completed_queue(sq->txq, npkts, nbytes); 942 + /* Do not update BQL for TXQs that got replaced by new active ones, as 943 + * netdev_tx_reset_queue() is called for them in mlx5e_activate_txqsq(). 944 + */ 945 + if (sq == sq->priv->txq2sq[sq->txq_ix]) 946 + netdev_tx_completed_queue(sq->txq, npkts, nbytes); 943 947 } 944 948 945 949 #ifdef CONFIG_MLX5_CORE_IPOIB
+6
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
··· 52 52 #include "devlink.h" 53 53 #include "lag/lag.h" 54 54 #include "en/tc/post_meter.h" 55 + #include "fw_reset.h" 55 56 56 57 /* There are two match-all miss flows, one for unicast dst mac and 57 58 * one for multicast. ··· 3991 3990 esw = mlx5_devlink_eswitch_get(devlink); 3992 3991 if (IS_ERR(esw)) 3993 3992 return PTR_ERR(esw); 3993 + 3994 + if (mlx5_fw_reset_in_progress(esw->dev)) { 3995 + NL_SET_ERR_MSG_MOD(extack, "Can't change eswitch mode during firmware reset"); 3996 + return -EBUSY; 3997 + } 3994 3998 3995 3999 if (esw_mode_from_devlink(mode, &mlx5_mode)) 3996 4000 return -EINVAL;
+43 -5
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
··· 15 15 MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, 16 16 MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, 17 17 MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, 18 + MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, 18 19 }; 19 20 20 21 struct mlx5_fw_reset { ··· 127 126 int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type) 128 127 { 129 128 return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL); 129 + } 130 + 131 + bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev) 132 + { 133 + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; 134 + 135 + if (!fw_reset) 136 + return false; 137 + 138 + return test_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); 130 139 } 131 140 132 141 static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev, ··· 254 243 BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); 255 244 devl_unlock(devlink); 256 245 } 246 + 247 + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); 257 248 } 258 249 259 250 static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) ··· 475 462 struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, 476 463 reset_request_work); 477 464 struct mlx5_core_dev *dev = fw_reset->dev; 465 + bool nack_request = false; 466 + struct devlink *devlink; 478 467 int err; 479 468 480 469 err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method); 481 - if (err) 470 + if (err) { 471 + nack_request = true; 482 472 mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err); 473 + } else if (!mlx5_is_reset_now_capable(dev, fw_reset->reset_method) || 474 + test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, 475 + &fw_reset->reset_flags)) { 476 + nack_request = true; 477 + } 483 478 484 - if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) || 485 - !mlx5_is_reset_now_capable(dev, fw_reset->reset_method)) { 479 + devlink = priv_to_devlink(dev); 480 + /* For external resets, try to acquire devl_lock. Skip if devlink reset is 481 + * pending (lock already held) 482 + */ 483 + if (nack_request || 484 + (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, 485 + &fw_reset->reset_flags) && 486 + !devl_trylock(devlink))) { 486 487 err = mlx5_fw_reset_set_reset_sync_nack(dev); 487 488 mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s", 488 489 err ? "Failed" : "Sent"); 489 490 return; 490 491 } 492 + 491 493 if (mlx5_sync_reset_set_reset_requested(dev)) 492 - return; 494 + goto unlock; 495 + 496 + set_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); 493 497 494 498 err = mlx5_fw_reset_set_reset_sync_ack(dev); 495 499 if (err) 496 500 mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); 497 501 else 498 502 mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n"); 503 + 504 + unlock: 505 + if (!test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) 506 + devl_unlock(devlink); 499 507 } 500 508 501 509 static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id) ··· 756 722 757 723 if (mlx5_sync_reset_clear_reset_requested(dev, true)) 758 724 return; 725 + 726 + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); 759 727 mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); 760 728 } 761 729 ··· 794 758 795 759 if (mlx5_sync_reset_clear_reset_requested(dev, true)) 796 760 return; 761 + clear_bit(MLX5_FW_RESET_FLAGS_RESET_IN_PROGRESS, &fw_reset->reset_flags); 797 762 mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n"); 798 763 } 799 764 ··· 881 844 cancel_work_sync(&fw_reset->reset_reload_work); 882 845 cancel_work_sync(&fw_reset->reset_now_work); 883 846 cancel_work_sync(&fw_reset->reset_abort_work); 884 - cancel_delayed_work(&fw_reset->reset_timeout_work); 847 + if (test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) 848 + mlx5_sync_reset_clear_reset_requested(dev, true); 885 849 } 886 850 887 851 static const struct devlink_param mlx5_fw_reset_devlink_params[] = {
+1
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h
··· 10 10 int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, 11 11 struct netlink_ext_ack *extack); 12 12 int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); 13 + bool mlx5_fw_reset_in_progress(struct mlx5_core_dev *dev); 13 14 14 15 int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); 15 16 void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked);
+1
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 2232 2232 2233 2233 mlx5_core_info(dev, "Shutdown was called\n"); 2234 2234 set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); 2235 + mlx5_drain_fw_reset(dev); 2235 2236 mlx5_drain_health_wq(dev); 2236 2237 err = mlx5_try_fast_unload(dev); 2237 2238 if (err)