Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'mlx5-fixes-2022-11-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5 fixes 2022-11-21

This series provides bug fixes to mlx5 driver.

* tag 'mlx5-fixes-2022-11-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
net/mlx5e: Fix possible race condition in macsec extended packet number update routine
net/mlx5e: Fix MACsec update SecY
net/mlx5e: Fix MACsec SA initialization routine
net/mlx5e: Remove leftovers from old XSK queues enumeration
net/mlx5e: Offload rule only when all encaps are valid
net/mlx5e: Fix missing alignment in size of MTT/KLM entries
net/mlx5: Fix sync reset event handler error flow
net/mlx5: E-Switch, Set correctly vport destination
net/mlx5: Lag, avoid lockdep warnings
net/mlx5: Fix handling of entry refcount when command is not issued to FW
net/mlx5: cmdif, Print info on any firmware cmd failure to tracepoint
net/mlx5: SF: Fix probing active SFs during driver probe phase
net/mlx5: Fix FW tracer timestamp calculation
net/mlx5: Do not query pci info while pci disabled
====================

Link: https://lore.kernel.org/r/20221122022559.89459-1-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+285 -122
+25 -22
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
··· 45 45 #include "mlx5_core.h" 46 46 #include "lib/eq.h" 47 47 #include "lib/tout.h" 48 + #define CREATE_TRACE_POINTS 49 + #include "diag/cmd_tracepoint.h" 48 50 49 51 enum { 50 52 CMD_IF_REV = 5, ··· 787 785 static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) 788 786 { 789 787 u16 opcode, op_mod; 790 - u32 syndrome; 791 - u8 status; 792 788 u16 uid; 793 - int err; 794 - 795 - syndrome = MLX5_GET(mbox_out, out, syndrome); 796 - status = MLX5_GET(mbox_out, out, status); 797 789 798 790 opcode = MLX5_GET(mbox_in, in, opcode); 799 791 op_mod = MLX5_GET(mbox_in, in, op_mod); 800 792 uid = MLX5_GET(mbox_in, in, uid); 801 793 802 - err = cmd_status_to_err(status); 803 - 804 794 if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) 805 795 mlx5_cmd_out_err(dev, opcode, op_mod, out); 806 - else 807 - mlx5_core_dbg(dev, 808 - "%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n", 809 - mlx5_command_str(opcode), opcode, op_mod, uid, 810 - cmd_status_str(status), status, syndrome, err); 811 796 } 812 797 813 798 int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) ··· 1005 1016 cmd_ent_get(ent); 1006 1017 set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state); 1007 1018 1019 + cmd_ent_get(ent); /* for the _real_ FW event on completion */ 1008 1020 /* Skip sending command to fw if internal error */ 1009 1021 if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) { 1010 1022 ent->ret = -ENXIO; ··· 1013 1023 return; 1014 1024 } 1015 1025 1016 - cmd_ent_get(ent); /* for the _real_ FW event on completion */ 1017 1026 /* ring doorbell after the descriptor is valid */ 1018 1027 mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx); 1019 1028 wmb(); ··· 1661 1672 cmd_ent_put(ent); /* timeout work was canceled */ 1662 1673 1663 1674 if (!forced || /* Real FW completion */ 1664 - pci_channel_offline(dev->pdev) || /* FW is inaccessible */ 1665 - dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1675 + mlx5_cmd_is_down(dev) || /* No real FW completion is expected */ 1676 + !opcode_allowed(cmd, ent->op)) 1666 1677 cmd_ent_put(ent); 1667 1678 1668 1679 ent->ts2 = ktime_get_ns(); ··· 1881 1892 return err; 1882 1893 } 1883 1894 1895 + static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) 1896 + { 1897 + u32 syndrome = MLX5_GET(mbox_out, out, syndrome); 1898 + u8 status = MLX5_GET(mbox_out, out, status); 1899 + 1900 + trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod, 1901 + cmd_status_str(status), status, syndrome, 1902 + cmd_status_to_err(status)); 1903 + } 1904 + 1884 1905 static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status, 1885 1906 u32 syndrome, int err) 1886 1907 { ··· 1913 1914 } 1914 1915 1915 1916 /* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */ 1916 - static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out) 1917 + static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out) 1917 1918 { 1918 1919 u32 syndrome = MLX5_GET(mbox_out, out, syndrome); 1919 1920 u8 status = MLX5_GET(mbox_out, out, status); ··· 1921 1922 if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */ 1922 1923 err = -EIO; 1923 1924 1924 - if (!err && status != MLX5_CMD_STAT_OK) 1925 + if (!err && status != MLX5_CMD_STAT_OK) { 1925 1926 err = -EREMOTEIO; 1927 + mlx5_cmd_err_trace(dev, opcode, op_mod, out); 1928 + } 1926 1929 1927 1930 cmd_status_log(dev, opcode, status, syndrome, err); 1928 1931 return err; ··· 1952 1951 { 1953 1952 int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); 1954 1953 u16 opcode = MLX5_GET(mbox_in, in, opcode); 1954 + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); 1955 1955 1956 - err = cmd_status_err(dev, err, opcode, out); 1957 - return err; 1956 + return cmd_status_err(dev, err, opcode, op_mod, out); 1958 1957 } 1959 1958 EXPORT_SYMBOL(mlx5_cmd_do); 1960 1959 ··· 1998 1997 { 1999 1998 int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); 2000 1999 u16 opcode = MLX5_GET(mbox_in, in, opcode); 2000 + u16 op_mod = MLX5_GET(mbox_in, in, op_mod); 2001 2001 2002 - err = cmd_status_err(dev, err, opcode, out); 2002 + err = cmd_status_err(dev, err, opcode, op_mod, out); 2003 2003 return mlx5_cmd_check(dev, err, in, out); 2004 2004 } 2005 2005 EXPORT_SYMBOL(mlx5_cmd_exec_polling); ··· 2036 2034 struct mlx5_async_ctx *ctx; 2037 2035 2038 2036 ctx = work->ctx; 2039 - status = cmd_status_err(ctx->dev, status, work->opcode, work->out); 2037 + status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out); 2040 2038 work->user_callback(status, work); 2041 2039 if (atomic_dec_and_test(&ctx->num_inflight)) 2042 2040 complete(&ctx->inflight_done); ··· 2051 2049 work->ctx = ctx; 2052 2050 work->user_callback = callback; 2053 2051 work->opcode = MLX5_GET(mbox_in, in, opcode); 2052 + work->op_mod = MLX5_GET(mbox_in, in, op_mod); 2054 2053 work->out = out; 2055 2054 if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) 2056 2055 return -EIO;
+45
drivers/net/ethernet/mellanox/mlx5/core/diag/cmd_tracepoint.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ 2 + /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 + 4 + #undef TRACE_SYSTEM 5 + #define TRACE_SYSTEM mlx5 6 + 7 + #if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ) 8 + #define _MLX5_CMD_TP_H_ 9 + 10 + #include <linux/tracepoint.h> 11 + #include <linux/trace_seq.h> 12 + 13 + TRACE_EVENT(mlx5_cmd, 14 + TP_PROTO(const char *command_str, u16 opcode, u16 op_mod, 15 + const char *status_str, u8 status, u32 syndrome, int err), 16 + TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err), 17 + TP_STRUCT__entry(__string(command_str, command_str) 18 + __field(u16, opcode) 19 + __field(u16, op_mod) 20 + __string(status_str, status_str) 21 + __field(u8, status) 22 + __field(u32, syndrome) 23 + __field(int, err) 24 + ), 25 + TP_fast_assign(__assign_str(command_str, command_str); 26 + __entry->opcode = opcode; 27 + __entry->op_mod = op_mod; 28 + __assign_str(status_str, status_str); 29 + __entry->status = status; 30 + __entry->syndrome = syndrome; 31 + __entry->err = err; 32 + ), 33 + TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)", 34 + __get_str(command_str), __entry->opcode, __entry->op_mod, 35 + __get_str(status_str), __entry->status, __entry->syndrome, 36 + __entry->err) 37 + ); 38 + 39 + #endif /* _MLX5_CMD_TP_H_ */ 40 + 41 + #undef TRACE_INCLUDE_PATH 42 + #define TRACE_INCLUDE_PATH ./diag 43 + #undef TRACE_INCLUDE_FILE 44 + #define TRACE_INCLUDE_FILE cmd_tracepoint 45 + #include <trace/define_trace.h>
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
··· 638 638 trace_timestamp = (timestamp_event.timestamp & MASK_52_7) | 639 639 (str_frmt->timestamp & MASK_6_0); 640 640 else 641 - trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) | 641 + trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) | 642 642 (str_frmt->timestamp & MASK_6_0); 643 643 644 644 mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);
+8 -8
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
··· 224 224 list_for_each_entry(flow, flow_list, tmp_list) { 225 225 if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW)) 226 226 continue; 227 - spec = &flow->attr->parse_attr->spec; 228 - 229 - /* update from encap rule to slow path rule */ 230 - rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 231 227 232 228 attr = mlx5e_tc_get_encap_attr(flow); 233 229 esw_attr = attr->esw_attr; 234 230 /* mark the flow's encap dest as non-valid */ 235 231 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID; 232 + esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL; 233 + 234 + /* update from encap rule to slow path rule */ 235 + spec = &flow->attr->parse_attr->spec; 236 + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec); 236 237 237 238 if (IS_ERR(rule)) { 238 239 err = PTR_ERR(rule); ··· 252 251 /* we know that the encap is valid */ 253 252 e->flags &= ~MLX5_ENCAP_ENTRY_VALID; 254 253 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat); 254 + e->pkt_reformat = NULL; 255 255 } 256 256 257 257 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow, ··· 764 762 struct net_device *mirred_dev, 765 763 int out_index, 766 764 struct netlink_ext_ack *extack, 767 - struct net_device **encap_dev, 768 - bool *encap_valid) 765 + struct net_device **encap_dev) 769 766 { 770 767 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; 771 768 struct mlx5e_tc_flow_parse_attr *parse_attr; ··· 879 878 if (e->flags & MLX5_ENCAP_ENTRY_VALID) { 880 879 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat; 881 880 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID; 882 - *encap_valid = true; 883 881 } else { 884 - *encap_valid = false; 882 + flow_flag_set(flow, SLOW); 885 883 } 886 884 mutex_unlock(&esw->offloads.encap_tbl_lock); 887 885
+1 -2
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.h
··· 17 17 struct net_device *mirred_dev, 18 18 int out_index, 19 19 struct netlink_ext_ack *extack, 20 - struct net_device **encap_dev, 21 - bool *encap_valid); 20 + struct net_device **encap_dev); 22 21 23 22 int mlx5e_attach_decap(struct mlx5e_priv *priv, 24 23 struct mlx5e_tc_flow *flow,
+11 -8
drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
··· 368 368 obj_attrs.aso_pdn = macsec->aso.pdn; 369 369 obj_attrs.epn_state = sa->epn_state; 370 370 371 - if (is_tx) { 372 - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci); 373 - key = &ctx->sa.tx_sa->key; 374 - } else { 375 - obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); 376 - key = &ctx->sa.rx_sa->key; 371 + key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key; 372 + 373 + if (sa->epn_state.epn_enabled) { 374 + obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) : 375 + cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci); 376 + 377 + memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); 377 378 } 378 379 379 - memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt)); 380 380 obj_attrs.replay_window = ctx->secy->replay_window; 381 381 obj_attrs.replay_protect = ctx->secy->replay_protect; 382 382 ··· 1155 1155 continue; 1156 1156 1157 1157 if (rx_sa->active) { 1158 - err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false); 1158 + err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false); 1159 1159 if (err) 1160 1160 goto out; 1161 1161 } ··· 1536 1536 1537 1537 async_work = container_of(work, struct mlx5e_macsec_async_work, work); 1538 1538 macsec = async_work->macsec; 1539 + mutex_lock(&macsec->lock); 1540 + 1539 1541 mdev = async_work->mdev; 1540 1542 obj_id = async_work->obj_id; 1541 1543 macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id); ··· 1559 1557 1560 1558 out_async_work: 1561 1559 kfree(async_work); 1560 + mutex_unlock(&macsec->lock); 1562 1561 } 1563 1562 1564 1563 static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data)
-18
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
··· 35 35 #include "en.h" 36 36 #include "en/port.h" 37 37 #include "en/params.h" 38 - #include "en/xsk/pool.h" 39 38 #include "en/ptp.h" 40 39 #include "lib/clock.h" 41 40 #include "en/fs_ethtool.h" ··· 411 412 struct ethtool_channels *ch) 412 413 { 413 414 mutex_lock(&priv->state_lock); 414 - 415 415 ch->max_combined = priv->max_nch; 416 416 ch->combined_count = priv->channels.params.num_channels; 417 - if (priv->xsk.refcnt) { 418 - /* The upper half are XSK queues. */ 419 - ch->max_combined *= 2; 420 - ch->combined_count *= 2; 421 - } 422 - 423 417 mutex_unlock(&priv->state_lock); 424 418 } 425 419 ··· 445 453 return 0; 446 454 447 455 mutex_lock(&priv->state_lock); 448 - 449 - /* Don't allow changing the number of channels if there is an active 450 - * XSK, because the numeration of the XSK and regular RQs will change. 451 - */ 452 - if (priv->xsk.refcnt) { 453 - err = -EINVAL; 454 - netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n", 455 - __func__); 456 - goto out; 457 - } 458 456 459 457 /* Don't allow changing the number of channels if HTB offload is active, 460 458 * because the numeration of the QoS SQs will change, while per-queue
+3 -2
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
··· 206 206 static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode) 207 207 { 208 208 u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode); 209 + u32 sz; 209 210 210 - WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD); 211 + sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT); 211 212 212 - return entries * umr_entry_size / MLX5_OCTWORD; 213 + return sz / MLX5_OCTWORD; 213 214 } 214 215 215 216 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
+6 -11
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
··· 1634 1634 struct mlx5e_tc_flow *flow, 1635 1635 struct mlx5_flow_attr *attr, 1636 1636 struct netlink_ext_ack *extack, 1637 - bool *encap_valid, 1638 1637 bool *vf_tun) 1639 1638 { 1640 1639 struct mlx5e_tc_flow_parse_attr *parse_attr; ··· 1650 1651 parse_attr = attr->parse_attr; 1651 1652 esw_attr = attr->esw_attr; 1652 1653 *vf_tun = false; 1653 - *encap_valid = true; 1654 1654 1655 1655 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { 1656 1656 struct net_device *out_dev; ··· 1666 1668 goto out; 1667 1669 } 1668 1670 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index, 1669 - extack, &encap_dev, encap_valid); 1671 + extack, &encap_dev); 1670 1672 dev_put(out_dev); 1671 1673 if (err) 1672 1674 goto out; ··· 1730 1732 struct mlx5e_tc_flow_parse_attr *parse_attr; 1731 1733 struct mlx5_flow_attr *attr = flow->attr; 1732 1734 struct mlx5_esw_flow_attr *esw_attr; 1733 - bool vf_tun, encap_valid; 1734 1735 u32 max_prio, max_chain; 1736 + bool vf_tun; 1735 1737 int err = 0; 1736 1738 1737 1739 parse_attr = attr->parse_attr; ··· 1821 1823 esw_attr->int_port = int_port; 1822 1824 } 1823 1825 1824 - err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun); 1826 + err = set_encap_dests(priv, flow, attr, extack, &vf_tun); 1825 1827 if (err) 1826 1828 goto err_out; 1827 1829 ··· 1851 1853 * (1) there's no error 1852 1854 * (2) there's an encap action and we don't have valid neigh 1853 1855 */ 1854 - if (!encap_valid || flow_flag_test(flow, SLOW)) 1856 + if (flow_flag_test(flow, SLOW)) 1855 1857 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec); 1856 1858 else 1857 1859 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); ··· 3757 3759 struct mlx5e_post_act *post_act = get_post_action(flow->priv); 3758 3760 struct mlx5_flow_attr *attr, *next_attr = NULL; 3759 3761 struct mlx5e_post_act_handle *handle; 3760 - bool vf_tun, encap_valid = true; 3762 + bool vf_tun; 3761 3763 int err; 3762 3764 3763 3765 /* This is going in reverse order as needed. ··· 3779 3781 if (list_is_last(&attr->list, &flow->attrs)) 3780 3782 break; 3781 3783 3782 - err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun); 3784 + err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); 3783 3785 if (err) 3784 3786 goto out_free; 3785 - 3786 - if (!encap_valid) 3787 - flow_flag_set(flow, SLOW); 3788 3787 3789 3788 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); 3790 3789 if (err)
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
··· 433 433 mlx5_lag_mpesw_is_activated(esw->dev)) 434 434 dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; 435 435 } 436 - if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { 436 + if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { 437 437 if (pkt_reformat) { 438 438 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; 439 439 flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;
+7 -2
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
··· 9 9 MLX5_FW_RESET_FLAGS_RESET_REQUESTED, 10 10 MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, 11 11 MLX5_FW_RESET_FLAGS_PENDING_COMP, 12 - MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS 12 + MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, 13 + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED 13 14 }; 14 15 15 16 struct mlx5_fw_reset { ··· 407 406 err = mlx5_pci_link_toggle(dev); 408 407 if (err) { 409 408 mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err); 410 - goto done; 409 + set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags); 411 410 } 412 411 413 412 mlx5_enter_error_state(dev, true); ··· 483 482 goto out; 484 483 } 485 484 err = fw_reset->ret; 485 + if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { 486 + mlx5_unload_one_devl_locked(dev); 487 + mlx5_load_one_devl_locked(dev, false); 488 + } 486 489 out: 487 490 clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags); 488 491 return err;
+1 -2
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
··· 228 228 if (ldev->nb.notifier_call) 229 229 unregister_netdevice_notifier_net(&init_net, &ldev->nb); 230 230 mlx5_lag_mp_cleanup(ldev); 231 - mlx5_lag_mpesw_cleanup(ldev); 232 - cancel_work_sync(&ldev->mpesw_work); 233 231 destroy_workqueue(ldev->wq); 232 + mlx5_lag_mpesw_cleanup(ldev); 234 233 mutex_destroy(&ldev->lock); 235 234 kfree(ldev); 236 235 }
+13 -1
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
··· 50 50 enum netdev_lag_hash hash_type; 51 51 }; 52 52 53 + enum mpesw_op { 54 + MLX5_MPESW_OP_ENABLE, 55 + MLX5_MPESW_OP_DISABLE, 56 + }; 57 + 58 + struct mlx5_mpesw_work_st { 59 + struct work_struct work; 60 + struct mlx5_lag *lag; 61 + enum mpesw_op op; 62 + struct completion comp; 63 + int result; 64 + }; 65 + 53 66 /* LAG data of a ConnectX card. 54 67 * It serves both its phys functions. 55 68 */ ··· 79 66 struct lag_tracker tracker; 80 67 struct workqueue_struct *wq; 81 68 struct delayed_work bond_work; 82 - struct work_struct mpesw_work; 83 69 struct notifier_block nb; 84 70 struct lag_mp lag_mp; 85 71 struct mlx5_lag_port_sel port_sel;
+68 -40
drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
··· 7 7 #include "eswitch.h" 8 8 #include "lib/mlx5.h" 9 9 10 - void mlx5_mpesw_work(struct work_struct *work) 10 + static int add_mpesw_rule(struct mlx5_lag *ldev) 11 11 { 12 - struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work); 12 + struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; 13 + int err; 13 14 14 - mutex_lock(&ldev->lock); 15 - mlx5_disable_lag(ldev); 16 - mutex_unlock(&ldev->lock); 15 + if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) 16 + return 0; 17 + 18 + if (ldev->mode != MLX5_LAG_MODE_NONE) { 19 + err = -EINVAL; 20 + goto out_err; 21 + } 22 + 23 + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); 24 + if (err) { 25 + mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); 26 + goto out_err; 27 + } 28 + 29 + return 0; 30 + 31 + out_err: 32 + atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); 33 + return err; 17 34 } 18 35 19 - static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev) 36 + static void del_mpesw_rule(struct mlx5_lag *ldev) 20 37 { 21 - struct mlx5_lag *ldev = dev->priv.lag; 22 - 23 - if (!queue_work(ldev->wq, &ldev->mpesw_work)) 24 - mlx5_core_warn(dev, "failed to queue work\n"); 25 - } 26 - 27 - void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) 28 - { 29 - struct mlx5_lag *ldev = dev->priv.lag; 30 - 31 - if (!ldev) 32 - return; 33 - 34 - mutex_lock(&ldev->lock); 35 38 if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && 36 39 ldev->mode == MLX5_LAG_MODE_MPESW) 37 - mlx5_lag_disable_mpesw(dev); 38 - mutex_unlock(&ldev->lock); 40 + mlx5_disable_lag(ldev); 39 41 } 40 42 41 - int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) 43 + static void mlx5_mpesw_work(struct work_struct *work) 44 + { 45 + struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); 46 + struct mlx5_lag *ldev = mpesww->lag; 47 + 48 + mutex_lock(&ldev->lock); 49 + if (mpesww->op == MLX5_MPESW_OP_ENABLE) 50 + mpesww->result = add_mpesw_rule(ldev); 51 + else if (mpesww->op == MLX5_MPESW_OP_DISABLE) 52 + del_mpesw_rule(ldev); 53 + mutex_unlock(&ldev->lock); 54 + 55 + complete(&mpesww->comp); 56 + } 57 + 58 + static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, 59 + enum mpesw_op op) 42 60 { 43 61 struct mlx5_lag *ldev = dev->priv.lag; 62 + struct mlx5_mpesw_work_st *work; 44 63 int err = 0; 45 64 46 65 if (!ldev) 47 66 return 0; 48 67 49 - mutex_lock(&ldev->lock); 50 - if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) 51 - goto out; 68 + work = kzalloc(sizeof(*work), GFP_KERNEL); 69 + if (!work) 70 + return -ENOMEM; 52 71 53 - if (ldev->mode != MLX5_LAG_MODE_NONE) { 72 + INIT_WORK(&work->work, mlx5_mpesw_work); 73 + init_completion(&work->comp); 74 + work->op = op; 75 + work->lag = ldev; 76 + 77 + if (!queue_work(ldev->wq, &work->work)) { 78 + mlx5_core_warn(dev, "failed to queue mpesw work\n"); 54 79 err = -EINVAL; 55 80 goto out; 56 81 } 57 - 58 - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); 59 - if (err) 60 - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); 61 - 82 + wait_for_completion(&work->comp); 83 + err = work->result; 62 84 out: 63 - mutex_unlock(&ldev->lock); 85 + kfree(work); 64 86 return err; 87 + } 88 + 89 + void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) 90 + { 91 + mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); 92 + } 93 + 94 + int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) 95 + { 96 + return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); 65 97 } 66 98 67 99 int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) ··· 103 71 if (!netif_is_bond_master(out_dev) || !ldev) 104 72 return 0; 105 73 106 - mutex_lock(&ldev->lock); 107 - if (ldev->mode == MLX5_LAG_MODE_MPESW) { 108 - mutex_unlock(&ldev->lock); 74 + if (ldev->mode == MLX5_LAG_MODE_MPESW) 109 75 return -EOPNOTSUPP; 110 - } 111 - mutex_unlock(&ldev->lock); 76 + 112 77 return 0; 113 78 } 114 79 ··· 119 90 120 91 void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) 121 92 { 122 - INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work); 123 93 atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); 124 94 } 125 95 126 96 void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) 127 97 { 128 - cancel_delayed_work_sync(&ldev->bond_work); 98 + WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); 129 99 }
-1
drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
··· 12 12 atomic_t mpesw_rule_count; 13 13 }; 14 14 15 - void mlx5_mpesw_work(struct work_struct *work); 16 15 int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); 17 16 bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); 18 17 #if IS_ENABLED(CONFIG_MLX5_ESWITCH)
+6 -3
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 1798 1798 res = state == pci_channel_io_perm_failure ? 1799 1799 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 1800 1800 1801 - mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res)); 1801 + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", 1802 + __func__, dev->state, dev->pci_status, res, result2str(res)); 1802 1803 return res; 1803 1804 } 1804 1805 ··· 1838 1837 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 1839 1838 int err; 1840 1839 1841 - mlx5_pci_trace(dev, "Enter\n"); 1840 + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", 1841 + __func__, dev->state, dev->pci_status); 1842 1842 1843 1843 err = mlx5_pci_enable_device(dev); 1844 1844 if (err) { ··· 1861 1859 1862 1860 res = PCI_ERS_RESULT_RECOVERED; 1863 1861 out: 1864 - mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res)); 1862 + mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", 1863 + __func__, dev->state, dev->pci_status, err, res, result2str(res)); 1865 1864 return res; 1866 1865 } 1867 1866
+88
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
··· 18 18 phys_addr_t base_address; 19 19 u64 sf_bar_length; 20 20 struct notifier_block nb; 21 + struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */ 22 + struct workqueue_struct *active_wq; 23 + struct work_struct work; 24 + u8 stop_active_wq:1; 21 25 struct mlx5_core_dev *dev; 22 26 }; 23 27 ··· 172 168 return 0; 173 169 174 170 sf_index = event->function_id - base_id; 171 + mutex_lock(&table->table_lock); 175 172 sf_dev = xa_load(&table->devices, sf_index); 176 173 switch (event->new_vhca_state) { 177 174 case MLX5_VHCA_STATE_INVALID: ··· 196 191 default: 197 192 break; 198 193 } 194 + mutex_unlock(&table->table_lock); 199 195 return 0; 200 196 } 201 197 ··· 219 213 function_id++; 220 214 } 221 215 return 0; 216 + } 217 + 218 + static void mlx5_sf_dev_add_active_work(struct work_struct *work) 219 + { 220 + struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work); 221 + u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {}; 222 + struct mlx5_core_dev *dev = table->dev; 223 + u16 max_functions; 224 + u16 function_id; 225 + u16 sw_func_id; 226 + int err = 0; 227 + u8 state; 228 + int i; 229 + 230 + max_functions = mlx5_sf_max_functions(dev); 231 + function_id = MLX5_CAP_GEN(dev, sf_base_id); 232 + for (i = 0; i < max_functions; i++, function_id++) { 233 + if (table->stop_active_wq) 234 + return; 235 + err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out)); 236 + if (err) 237 + /* A failure of specific vhca doesn't mean others will 238 + * fail as well. 239 + */ 240 + continue; 241 + state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); 242 + if (state != MLX5_VHCA_STATE_ACTIVE) 243 + continue; 244 + 245 + sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id); 246 + mutex_lock(&table->table_lock); 247 + /* Don't probe device which is already probe */ 248 + if (!xa_load(&table->devices, i)) 249 + mlx5_sf_dev_add(dev, i, function_id, sw_func_id); 250 + /* There is a race where SF got inactive after the query 251 + * above. e.g.: the query returns that the state of the 252 + * SF is active, and after that the eswitch manager set it to 253 + * inactive. 254 + * This case cannot be managed in SW, since the probing of the 255 + * SF is on one system, and the inactivation is on a different 256 + * system. 257 + * If the inactive is done after the SF perform init_hca(), 258 + * the SF will fully probe and then removed. If it was 259 + * done before init_hca(), the SF probe will fail. 260 + */ 261 + mutex_unlock(&table->table_lock); 262 + } 263 + } 264 + 265 + /* In case SFs are generated externally, probe active SFs */ 266 + static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table) 267 + { 268 + if (MLX5_CAP_GEN(table->dev, eswitch_manager)) 269 + return 0; /* the table is local */ 270 + 271 + /* Use a workqueue to probe active SFs, which are in large 272 + * quantity and may take up to minutes to probe. 273 + */ 274 + table->active_wq = create_singlethread_workqueue("mlx5_active_sf"); 275 + if (!table->active_wq) 276 + return -ENOMEM; 277 + INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work); 278 + queue_work(table->active_wq, &table->work); 279 + return 0; 280 + } 281 + 282 + static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table) 283 + { 284 + if (table->active_wq) { 285 + table->stop_active_wq = true; 286 + destroy_workqueue(table->active_wq); 287 + } 222 288 } 223 289 224 290 void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev) ··· 318 240 table->base_address = pci_resource_start(dev->pdev, 2); 319 241 table->max_sfs = max_sfs; 320 242 xa_init(&table->devices); 243 + mutex_init(&table->table_lock); 321 244 dev->priv.sf_dev_table = table; 322 245 323 246 err = mlx5_vhca_event_notifier_register(dev, &table->nb); 324 247 if (err) 325 248 goto vhca_err; 249 + 250 + err = mlx5_sf_dev_queue_active_work(table); 251 + if (err) 252 + goto add_active_err; 253 + 326 254 err = mlx5_sf_dev_vhca_arm_all(table); 327 255 if (err) 328 256 goto arm_err; ··· 336 252 return; 337 253 338 254 arm_err: 255 + mlx5_sf_dev_destroy_active_work(table); 256 + add_active_err: 339 257 mlx5_vhca_event_notifier_unregister(dev, &table->nb); 340 258 vhca_err: 341 259 table->max_sfs = 0; ··· 365 279 if (!table) 366 280 return; 367 281 282 + mlx5_sf_dev_destroy_active_work(table); 368 283 mlx5_vhca_event_notifier_unregister(dev, &table->nb); 284 + mutex_destroy(&table->table_lock); 369 285 370 286 /* Now that event handler is not running, it is safe to destroy 371 287 * the sf device without race.
+1
include/linux/mlx5/driver.h
··· 981 981 struct mlx5_async_ctx *ctx; 982 982 mlx5_async_cbk_t user_callback; 983 983 u16 opcode; /* cmd opcode */ 984 + u16 op_mod; /* cmd op_mod */ 984 985 void *out; /* pointer to the cmd output buffer */ 985 986 }; 986 987