Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'mlx5-updates-2022-05-09' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2022-05-09

1) Gavin Li, adds exit route from waiting for FW init on device boot and
increases FW init timeout on health recovery flow

2) Support 4 ports HCAs LAG mode

Mark Bloch Says:
================

This series adds to mlx5 drivers support for 4 ports HCAs.
Starting with ConnectX-7 HCAs with 4 ports are possible.

As most driver parts aren't affected by such configuration most driver
code is unchanged.

Specially the only affected areas are:
- Lag
- Devcom
- Merged E-Switch
- Single FDB E-Switch

Lag was chosen to be converted first. Creating hardware LAG when all 4
ports are added to the same bond device.

Devom, merge E-Switch and single FDB E-Switch, are marked as supporting
only 2 ports HCAs and future patches will add support for 4 ports HCAs.

In order to activate the hardware lag a user can execute the:

ip link add bond0 type bond
ip link set bond0 type bond miimon 100 mode 2
ip link set eth2 master bond0
ip link set eth3 master bond0
ip link set eth4 master bond0
ip link set eth5 master bond0

Where eth2, eth3, eth4 and eth5 are the PFs of the same HCA.

================

====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+720 -302
+1 -1
drivers/infiniband/hw/mlx5/gsi.c
··· 100 100 port_type) == MLX5_CAP_PORT_TYPE_IB) 101 101 num_qps = pd->device->attrs.max_pkeys; 102 102 else if (dev->lag_active) 103 - num_qps = MLX5_MAX_PORTS; 103 + num_qps = dev->lag_ports; 104 104 } 105 105 106 106 gsi = &mqp->gsi;
+1
drivers/infiniband/hw/mlx5/main.c
··· 2991 2991 } 2992 2992 2993 2993 dev->flow_db->lag_demux_ft = ft; 2994 + dev->lag_ports = mlx5_lag_get_num_ports(mdev); 2994 2995 dev->lag_active = true; 2995 2996 return 0; 2996 2997
+1
drivers/infiniband/hw/mlx5/mlx5_ib.h
··· 1131 1131 struct xarray sig_mrs; 1132 1132 struct mlx5_port_caps port_caps[MLX5_MAX_PORTS]; 1133 1133 u16 pkey_table_len; 1134 + u8 lag_ports; 1134 1135 }; 1135 1136 1136 1137 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
+1 -1
drivers/infiniband/hw/mlx5/qp.c
··· 3907 3907 tx_port_affinity = &dev->port[port_num].roce.tx_port_affinity; 3908 3908 3909 3909 return (unsigned int)atomic_add_return(1, tx_port_affinity) % 3910 - MLX5_MAX_PORTS + 1; 3910 + (dev->lag_active ? dev->lag_ports : MLX5_CAP_GEN(dev->mdev, num_lag_ports)) + 1; 3911 3911 } 3912 3912 3913 3913 static bool qp_supports_affinity(struct mlx5_ib_qp *qp)
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/Makefile
··· 14 14 mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ 15 15 health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ 16 16 transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ 17 - fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \ 17 + fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ 18 18 lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ 19 19 diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ 20 20 fw_reset.o qos.o lib/tout.o
+41 -8
drivers/net/ethernet/mellanox/mlx5/core/dev.c
··· 555 555 PCI_SLOT(dev->pdev->devfn)); 556 556 } 557 557 558 - static int next_phys_dev(struct device *dev, const void *data) 558 + static int _next_phys_dev(struct mlx5_core_dev *mdev, 559 + const struct mlx5_core_dev *curr) 559 560 { 560 - struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); 561 - struct mlx5_core_dev *mdev = madev->mdev; 562 - const struct mlx5_core_dev *curr = data; 563 - 564 561 if (!mlx5_core_is_pf(mdev)) 565 562 return 0; 566 563 ··· 571 574 return 1; 572 575 } 573 576 574 - /* Must be called with intf_mutex held */ 575 - struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) 577 + static int next_phys_dev(struct device *dev, const void *data) 578 + { 579 + struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); 580 + struct mlx5_core_dev *mdev = madev->mdev; 581 + 582 + return _next_phys_dev(mdev, data); 583 + } 584 + 585 + static int next_phys_dev_lag(struct device *dev, const void *data) 586 + { 587 + struct mlx5_adev *madev = container_of(dev, struct mlx5_adev, adev.dev); 588 + struct mlx5_core_dev *mdev = madev->mdev; 589 + 590 + if (!MLX5_CAP_GEN(mdev, vport_group_manager) || 591 + !MLX5_CAP_GEN(mdev, lag_master) || 592 + (MLX5_CAP_GEN(mdev, num_lag_ports) > MLX5_MAX_PORTS || 593 + MLX5_CAP_GEN(mdev, num_lag_ports) <= 1)) 594 + return 0; 595 + 596 + return _next_phys_dev(mdev, data); 597 + } 598 + 599 + static struct mlx5_core_dev *mlx5_get_next_dev(struct mlx5_core_dev *dev, 600 + int (*match)(struct device *dev, const void *data)) 576 601 { 577 602 struct auxiliary_device *adev; 578 603 struct mlx5_adev *madev; ··· 602 583 if (!mlx5_core_is_pf(dev)) 603 584 return NULL; 604 585 605 - adev = auxiliary_find_device(NULL, dev, &next_phys_dev); 586 + adev = auxiliary_find_device(NULL, dev, match); 606 587 if (!adev) 607 588 return NULL; 608 589 609 590 madev = container_of(adev, struct mlx5_adev, adev); 610 591 put_device(&adev->dev); 611 592 return madev->mdev; 593 + } 594 + 595 + /* Must be called with intf_mutex held */ 596 + struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) 597 + { 598 + lockdep_assert_held(&mlx5_intf_mutex); 599 + return mlx5_get_next_dev(dev, &next_phys_dev); 600 + } 601 + 602 + /* Must be called with intf_mutex held */ 603 + struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev) 604 + { 605 + lockdep_assert_held(&mlx5_intf_mutex); 606 + return mlx5_get_next_dev(dev, &next_phys_dev_lag); 612 607 } 613 608 614 609 void mlx5_dev_list_lock(void)
+2 -2
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
··· 178 178 *actions_performed = BIT(action); 179 179 switch (action) { 180 180 case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: 181 - return mlx5_load_one(dev); 181 + return mlx5_load_one(dev, false); 182 182 case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: 183 183 if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) 184 184 break; 185 185 /* On fw_activate action, also driver is reloaded and reinit performed */ 186 186 *actions_performed |= BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); 187 - return mlx5_load_one(dev); 187 + return mlx5_load_one(dev, false); 188 188 default: 189 189 /* Unsupported action should not get to this function */ 190 190 WARN_ON(1);
-25
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
··· 1569 1569 ida_init(&esw->offloads.vport_metadata_ida); 1570 1570 xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); 1571 1571 mutex_init(&esw->state_lock); 1572 - lockdep_register_key(&esw->mode_lock_key); 1573 1572 init_rwsem(&esw->mode_lock); 1574 - lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key); 1575 1573 refcount_set(&esw->qos.refcnt, 0); 1576 1574 1577 1575 esw->enabled_vports = 0; ··· 1613 1615 esw->dev->priv.eswitch = NULL; 1614 1616 destroy_workqueue(esw->work_queue); 1615 1617 WARN_ON(refcount_read(&esw->qos.refcnt)); 1616 - lockdep_unregister_key(&esw->mode_lock_key); 1617 1618 mutex_destroy(&esw->state_lock); 1618 1619 WARN_ON(!xa_empty(&esw->offloads.vhca_map)); 1619 1620 xa_destroy(&esw->offloads.vhca_map); ··· 1890 1893 } 1891 1894 EXPORT_SYMBOL(mlx5_eswitch_get_encap_mode); 1892 1895 1893 - bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) 1894 - { 1895 - if ((dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && 1896 - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE) || 1897 - (dev0->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS && 1898 - dev1->priv.eswitch->mode == MLX5_ESWITCH_OFFLOADS)) 1899 - return true; 1900 - 1901 - return false; 1902 - } 1903 - 1904 1896 bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, 1905 1897 struct mlx5_core_dev *dev1) 1906 1898 { ··· 1998 2012 if (!mlx5_esw_allowed(esw)) 1999 2013 return; 2000 2014 up_write(&esw->mode_lock); 2001 - } 2002 - 2003 - /** 2004 - * mlx5_esw_lock() - Take write lock on esw mode lock 2005 - * @esw: eswitch device. 2006 - */ 2007 - void mlx5_esw_lock(struct mlx5_eswitch *esw) 2008 - { 2009 - if (!mlx5_esw_allowed(esw)) 2010 - return; 2011 - down_write(&esw->mode_lock); 2012 2015 } 2013 2016 2014 2017 /**
-8
drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
··· 331 331 u32 large_group_num; 332 332 } params; 333 333 struct blocking_notifier_head n_head; 334 - struct lock_class_key mode_lock_key; 335 334 }; 336 335 337 336 void esw_offloads_disable(struct mlx5_eswitch *esw); ··· 517 518 MLX5_CAP_ESW_FLOWTABLE_FDB(dev, push_vlan_2); 518 519 } 519 520 520 - bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, 521 - struct mlx5_core_dev *dev1); 522 521 bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, 523 522 struct mlx5_core_dev *dev1); 524 523 ··· 703 706 void mlx5_esw_put(struct mlx5_core_dev *dev); 704 707 int mlx5_esw_try_lock(struct mlx5_eswitch *esw); 705 708 void mlx5_esw_unlock(struct mlx5_eswitch *esw); 706 - void mlx5_esw_lock(struct mlx5_eswitch *esw); 707 709 708 710 void esw_vport_change_handle_locked(struct mlx5_vport *vport); 709 711 ··· 720 724 static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} 721 725 static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } 722 726 static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) {} 723 - static inline bool mlx5_esw_lag_prereq(struct mlx5_core_dev *dev0, struct mlx5_core_dev *dev1) { return true; } 724 727 static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } 725 728 static inline 726 729 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, u16 vport, int link_state) { return 0; } ··· 727 732 { 728 733 return ERR_PTR(-EOPNOTSUPP); 729 734 } 730 - 731 - static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; } 732 - static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; } 733 735 734 736 static inline struct mlx5_flow_handle * 735 737 esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag)
+1 -1
drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
··· 148 148 if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { 149 149 complete(&fw_reset->done); 150 150 } else { 151 - mlx5_load_one(dev); 151 + mlx5_load_one(dev, false); 152 152 devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, 153 153 BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | 154 154 BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE));
+173
drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 + /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 + 4 + #include "lag.h" 5 + 6 + static char *get_str_mode_type(struct mlx5_lag *ldev) 7 + { 8 + if (ldev->flags & MLX5_LAG_FLAG_ROCE) 9 + return "roce"; 10 + if (ldev->flags & MLX5_LAG_FLAG_SRIOV) 11 + return "switchdev"; 12 + if (ldev->flags & MLX5_LAG_FLAG_MULTIPATH) 13 + return "multipath"; 14 + 15 + return NULL; 16 + } 17 + 18 + static int type_show(struct seq_file *file, void *priv) 19 + { 20 + struct mlx5_core_dev *dev = file->private; 21 + struct mlx5_lag *ldev; 22 + char *mode = NULL; 23 + 24 + ldev = dev->priv.lag; 25 + mutex_lock(&ldev->lock); 26 + if (__mlx5_lag_is_active(ldev)) 27 + mode = get_str_mode_type(ldev); 28 + mutex_unlock(&ldev->lock); 29 + if (!mode) 30 + return -EINVAL; 31 + seq_printf(file, "%s\n", mode); 32 + 33 + return 0; 34 + } 35 + 36 + static int port_sel_mode_show(struct seq_file *file, void *priv) 37 + { 38 + struct mlx5_core_dev *dev = file->private; 39 + struct mlx5_lag *ldev; 40 + int ret = 0; 41 + char *mode; 42 + 43 + ldev = dev->priv.lag; 44 + mutex_lock(&ldev->lock); 45 + if (__mlx5_lag_is_active(ldev)) 46 + mode = get_str_port_sel_mode(ldev->flags); 47 + else 48 + ret = -EINVAL; 49 + mutex_unlock(&ldev->lock); 50 + if (ret || !mode) 51 + return ret; 52 + 53 + seq_printf(file, "%s\n", mode); 54 + return 0; 55 + } 56 + 57 + static int state_show(struct seq_file *file, void *priv) 58 + { 59 + struct mlx5_core_dev *dev = file->private; 60 + struct mlx5_lag *ldev; 61 + bool active; 62 + 63 + ldev = dev->priv.lag; 64 + mutex_lock(&ldev->lock); 65 + active = __mlx5_lag_is_active(ldev); 66 + mutex_unlock(&ldev->lock); 67 + seq_printf(file, "%s\n", active ? "active" : "disabled"); 68 + return 0; 69 + } 70 + 71 + static int flags_show(struct seq_file *file, void *priv) 72 + { 73 + struct mlx5_core_dev *dev = file->private; 74 + struct mlx5_lag *ldev; 75 + bool shared_fdb; 76 + bool lag_active; 77 + 78 + ldev = dev->priv.lag; 79 + mutex_lock(&ldev->lock); 80 + lag_active = __mlx5_lag_is_active(ldev); 81 + if (lag_active) 82 + shared_fdb = ldev->shared_fdb; 83 + 84 + mutex_unlock(&ldev->lock); 85 + if (!lag_active) 86 + return -EINVAL; 87 + 88 + seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off"); 89 + return 0; 90 + } 91 + 92 + static int mapping_show(struct seq_file *file, void *priv) 93 + { 94 + struct mlx5_core_dev *dev = file->private; 95 + u8 ports[MLX5_MAX_PORTS] = {}; 96 + struct mlx5_lag *ldev; 97 + bool hash = false; 98 + bool lag_active; 99 + int num_ports; 100 + int i; 101 + 102 + ldev = dev->priv.lag; 103 + mutex_lock(&ldev->lock); 104 + lag_active = __mlx5_lag_is_active(ldev); 105 + if (lag_active) { 106 + if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) { 107 + mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, 108 + &num_ports); 109 + hash = true; 110 + } else { 111 + for (i = 0; i < ldev->ports; i++) 112 + ports[i] = ldev->v2p_map[i]; 113 + num_ports = ldev->ports; 114 + } 115 + } 116 + mutex_unlock(&ldev->lock); 117 + if (!lag_active) 118 + return -EINVAL; 119 + 120 + for (i = 0; i < num_ports; i++) { 121 + if (hash) 122 + seq_printf(file, "%d\n", ports[i] + 1); 123 + else 124 + seq_printf(file, "%d:%d\n", i + 1, ports[i]); 125 + } 126 + 127 + return 0; 128 + } 129 + 130 + static int members_show(struct seq_file *file, void *priv) 131 + { 132 + struct mlx5_core_dev *dev = file->private; 133 + struct mlx5_lag *ldev; 134 + int i; 135 + 136 + ldev = dev->priv.lag; 137 + mutex_lock(&ldev->lock); 138 + for (i = 0; i < ldev->ports; i++) { 139 + if (!ldev->pf[i].dev) 140 + continue; 141 + seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); 142 + } 143 + mutex_unlock(&ldev->lock); 144 + 145 + return 0; 146 + } 147 + 148 + DEFINE_SHOW_ATTRIBUTE(type); 149 + DEFINE_SHOW_ATTRIBUTE(port_sel_mode); 150 + DEFINE_SHOW_ATTRIBUTE(state); 151 + DEFINE_SHOW_ATTRIBUTE(flags); 152 + DEFINE_SHOW_ATTRIBUTE(mapping); 153 + DEFINE_SHOW_ATTRIBUTE(members); 154 + 155 + void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev) 156 + { 157 + struct dentry *dbg; 158 + 159 + dbg = debugfs_create_dir("lag", mlx5_debugfs_get_dev_root(dev)); 160 + dev->priv.dbg.lag_debugfs = dbg; 161 + 162 + debugfs_create_file("type", 0444, dbg, dev, &type_fops); 163 + debugfs_create_file("port_sel_mode", 0444, dbg, dev, &port_sel_mode_fops); 164 + debugfs_create_file("state", 0444, dbg, dev, &state_fops); 165 + debugfs_create_file("flags", 0444, dbg, dev, &flags_fops); 166 + debugfs_create_file("mapping", 0444, dbg, dev, &mapping_fops); 167 + debugfs_create_file("members", 0444, dbg, dev, &members_fops); 168 + } 169 + 170 + void mlx5_ldev_remove_debugfs(struct dentry *dbg) 171 + { 172 + debugfs_remove_recursive(dbg); 173 + }
+361 -176
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
··· 53 53 */ 54 54 static DEFINE_SPINLOCK(lag_lock); 55 55 56 - static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, 57 - u8 remap_port2, bool shared_fdb, u8 flags) 56 + static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, bool shared_fdb, u8 flags) 58 57 { 59 58 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 60 59 void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); ··· 62 63 63 64 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); 64 65 if (!(flags & MLX5_LAG_FLAG_HASH_BASED)) { 65 - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); 66 - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); 66 + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 67 + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 67 68 } else { 68 69 MLX5_SET(lagc, lag_ctx, port_select_mode, 69 70 MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT); ··· 72 73 return mlx5_cmd_exec_in(dev, create_lag, in); 73 74 } 74 75 75 - static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, 76 - u8 remap_port2) 76 + static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, 77 + u8 *ports) 77 78 { 78 79 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 79 80 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); ··· 81 82 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 82 83 MLX5_SET(modify_lag_in, in, field_select, 0x1); 83 84 84 - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); 85 - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); 85 + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); 86 + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); 86 87 87 88 return mlx5_cmd_exec_in(dev, modify_lag, in); 88 89 } ··· 107 108 } 108 109 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 109 110 111 + static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, 112 + u8 *ports, int *num_disabled) 113 + { 114 + int i; 115 + 116 + *num_disabled = 0; 117 + for (i = 0; i < num_ports; i++) { 118 + if (!tracker->netdev_state[i].tx_enabled || 119 + !tracker->netdev_state[i].link_up) 120 + ports[(*num_disabled)++] = i; 121 + } 122 + } 123 + 124 + void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 125 + u8 *ports, int *num_enabled) 126 + { 127 + int i; 128 + 129 + *num_enabled = 0; 130 + for (i = 0; i < num_ports; i++) { 131 + if (tracker->netdev_state[i].tx_enabled && 132 + tracker->netdev_state[i].link_up) 133 + ports[(*num_enabled)++] = i; 134 + } 135 + 136 + if (*num_enabled == 0) 137 + mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); 138 + } 139 + 140 + static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 141 + struct mlx5_lag *ldev, 142 + struct lag_tracker *tracker, 143 + u8 flags) 144 + { 145 + char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 146 + u8 enabled_ports[MLX5_MAX_PORTS] = {}; 147 + int written = 0; 148 + int num_enabled; 149 + int idx; 150 + int err; 151 + int i; 152 + int j; 153 + 154 + if (flags & MLX5_LAG_FLAG_HASH_BASED) { 155 + mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, 156 + &num_enabled); 157 + for (i = 0; i < num_enabled; i++) { 158 + err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 159 + if (err != 3) 160 + return; 161 + written += err; 162 + } 163 + buf[written - 2] = 0; 164 + mlx5_core_info(dev, "lag map active ports: %s\n", buf); 165 + } else { 166 + for (i = 0; i < ldev->ports; i++) { 167 + for (j = 0; j < ldev->buckets; j++) { 168 + idx = i * ldev->buckets + j; 169 + err = scnprintf(buf + written, 10, 170 + " port %d:%d", i + 1, ldev->v2p_map[idx]); 171 + if (err != 9) 172 + return; 173 + written += err; 174 + } 175 + } 176 + mlx5_core_info(dev, "lag map:%s\n", buf); 177 + } 178 + } 179 + 110 180 static int mlx5_lag_netdev_event(struct notifier_block *this, 111 181 unsigned long event, void *ptr); 112 182 static void mlx5_do_bond_work(struct work_struct *work); ··· 189 121 mlx5_lag_mp_cleanup(ldev); 190 122 cancel_delayed_work_sync(&ldev->bond_work); 191 123 destroy_workqueue(ldev->wq); 124 + mutex_destroy(&ldev->lock); 192 125 kfree(ldev); 193 126 } 194 127 ··· 219 150 } 220 151 221 152 kref_init(&ldev->ref); 153 + mutex_init(&ldev->lock); 222 154 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 223 155 224 156 ldev->nb.notifier_call = mlx5_lag_netdev_event; ··· 232 162 if (err) 233 163 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 234 164 err); 165 + ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 166 + ldev->buckets = 1; 235 167 236 168 return ldev; 237 169 } ··· 243 171 { 244 172 int i; 245 173 246 - for (i = 0; i < MLX5_MAX_PORTS; i++) 174 + for (i = 0; i < ldev->ports; i++) 247 175 if (ldev->pf[i].netdev == ndev) 248 176 return i; 249 177 ··· 260 188 return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); 261 189 } 262 190 191 + /* Create a mapping between steering slots and active ports. 192 + * As we have ldev->buckets slots per port first assume the native 193 + * mapping should be used. 194 + * If there are ports that are disabled fill the relevant slots 195 + * with mapping that points to active ports. 196 + */ 263 197 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 264 - u8 *port1, u8 *port2) 198 + u8 num_ports, 199 + u8 buckets, 200 + u8 *ports) 265 201 { 266 - bool p1en; 267 - bool p2en; 202 + int disabled[MLX5_MAX_PORTS] = {}; 203 + int enabled[MLX5_MAX_PORTS] = {}; 204 + int disabled_ports_num = 0; 205 + int enabled_ports_num = 0; 206 + int idx; 207 + u32 rand; 208 + int i; 209 + int j; 268 210 269 - p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled && 270 - tracker->netdev_state[MLX5_LAG_P1].link_up; 211 + for (i = 0; i < num_ports; i++) { 212 + if (tracker->netdev_state[i].tx_enabled && 213 + tracker->netdev_state[i].link_up) 214 + enabled[enabled_ports_num++] = i; 215 + else 216 + disabled[disabled_ports_num++] = i; 217 + } 271 218 272 - p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && 273 - tracker->netdev_state[MLX5_LAG_P2].link_up; 219 + /* Use native mapping by default where each port's buckets 220 + * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 221 + */ 222 + for (i = 0; i < num_ports; i++) 223 + for (j = 0; j < buckets; j++) { 224 + idx = i * buckets + j; 225 + ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; 226 + } 274 227 275 - *port1 = MLX5_LAG_EGRESS_PORT_1; 276 - *port2 = MLX5_LAG_EGRESS_PORT_2; 277 - if ((!p1en && !p2en) || (p1en && p2en)) 228 + /* If all ports are disabled/enabled keep native mapping */ 229 + if (enabled_ports_num == num_ports || 230 + disabled_ports_num == num_ports) 278 231 return; 279 232 280 - if (p1en) 281 - *port2 = MLX5_LAG_EGRESS_PORT_1; 282 - else 283 - *port1 = MLX5_LAG_EGRESS_PORT_2; 233 + /* Go over the disabled ports and for each assign a random active port */ 234 + for (i = 0; i < disabled_ports_num; i++) { 235 + for (j = 0; j < buckets; j++) { 236 + get_random_bytes(&rand, 4); 237 + ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1; 238 + } 239 + } 284 240 } 285 241 286 242 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 287 243 { 288 - return ldev->pf[MLX5_LAG_P1].has_drop || ldev->pf[MLX5_LAG_P2].has_drop; 244 + int i; 245 + 246 + for (i = 0; i < ldev->ports; i++) 247 + if (ldev->pf[i].has_drop) 248 + return true; 249 + return false; 289 250 } 290 251 291 252 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 292 253 { 293 254 int i; 294 255 295 - for (i = 0; i < MLX5_MAX_PORTS; i++) { 256 + for (i = 0; i < ldev->ports; i++) { 296 257 if (!ldev->pf[i].has_drop) 297 258 continue; 298 259 ··· 338 233 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 339 234 struct lag_tracker *tracker) 340 235 { 341 - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 342 - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 343 - struct mlx5_core_dev *inactive; 344 - u8 v2p_port1, v2p_port2; 345 - int inactive_idx; 236 + u8 disabled_ports[MLX5_MAX_PORTS] = {}; 237 + struct mlx5_core_dev *dev; 238 + int disabled_index; 239 + int num_disabled; 346 240 int err; 241 + int i; 347 242 348 243 /* First delete the current drop rule so there won't be any dropped 349 244 * packets ··· 353 248 if (!ldev->tracker.has_inactive) 354 249 return; 355 250 356 - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, &v2p_port2); 251 + mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); 357 252 358 - if (v2p_port1 == MLX5_LAG_EGRESS_PORT_1) { 359 - inactive = dev1; 360 - inactive_idx = MLX5_LAG_P2; 361 - } else { 362 - inactive = dev0; 363 - inactive_idx = MLX5_LAG_P1; 253 + for (i = 0; i < num_disabled; i++) { 254 + disabled_index = disabled_ports[i]; 255 + dev = ldev->pf[disabled_index].dev; 256 + err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 257 + MLX5_VPORT_UPLINK); 258 + if (!err) 259 + ldev->pf[disabled_index].has_drop = true; 260 + else 261 + mlx5_core_err(dev, 262 + "Failed to create lag drop rule, error: %d", err); 364 263 } 365 - 366 - err = mlx5_esw_acl_ingress_vport_drop_rule_create(inactive->priv.eswitch, 367 - MLX5_VPORT_UPLINK); 368 - if (!err) 369 - ldev->pf[inactive_idx].has_drop = true; 370 - else 371 - mlx5_core_err(inactive, 372 - "Failed to create lag drop rule, error: %d", err); 373 264 } 374 265 375 - static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 v2p_port1, u8 v2p_port2) 266 + static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 376 267 { 377 268 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 378 269 379 270 if (ldev->flags & MLX5_LAG_FLAG_HASH_BASED) 380 - return mlx5_lag_port_sel_modify(ldev, v2p_port1, v2p_port2); 381 - return mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); 271 + return mlx5_lag_port_sel_modify(ldev, ports); 272 + return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); 382 273 } 383 274 384 275 void mlx5_modify_lag(struct mlx5_lag *ldev, 385 276 struct lag_tracker *tracker) 386 277 { 278 + u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 387 279 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 388 - u8 v2p_port1, v2p_port2; 280 + int idx; 389 281 int err; 282 + int i; 283 + int j; 390 284 391 - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, 392 - &v2p_port2); 285 + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); 393 286 394 - if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || 395 - v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { 396 - err = _mlx5_modify_lag(ldev, v2p_port1, v2p_port2); 397 - if (err) { 398 - mlx5_core_err(dev0, 399 - "Failed to modify LAG (%d)\n", 400 - err); 401 - return; 287 + for (i = 0; i < ldev->ports; i++) { 288 + for (j = 0; j < ldev->buckets; j++) { 289 + idx = i * ldev->buckets + j; 290 + if (ports[idx] == ldev->v2p_map[idx]) 291 + continue; 292 + err = _mlx5_modify_lag(ldev, ports); 293 + if (err) { 294 + mlx5_core_err(dev0, 295 + "Failed to modify LAG (%d)\n", 296 + err); 297 + return; 298 + } 299 + memcpy(ldev->v2p_map, ports, sizeof(ports)); 300 + 301 + mlx5_lag_print_mapping(dev0, ldev, tracker, 302 + ldev->flags); 303 + break; 402 304 } 403 - ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; 404 - ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; 405 - mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", 406 - ldev->v2p_map[MLX5_LAG_P1], 407 - ldev->v2p_map[MLX5_LAG_P2]); 408 305 } 409 306 410 307 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && ··· 414 307 mlx5_lag_drop_rule_setup(ldev, tracker); 415 308 } 416 309 417 - static void mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, 418 - struct lag_tracker *tracker, u8 *flags) 310 + #define MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED 4 311 + static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, 312 + struct lag_tracker *tracker, u8 *flags) 419 313 { 420 - bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE); 421 314 struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 422 315 423 - if (roce_lag || 424 - !MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) || 425 - tracker->tx_type != NETDEV_LAG_TX_TYPE_HASH) 426 - return; 427 - *flags |= MLX5_LAG_FLAG_HASH_BASED; 316 + if (ldev->ports == MLX5_LAG_ROCE_HASH_PORTS_SUPPORTED) { 317 + /* Four ports are support only in hash mode */ 318 + if (!MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table)) 319 + return -EINVAL; 320 + *flags |= MLX5_LAG_FLAG_HASH_BASED; 321 + if (ldev->ports > 2) 322 + ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 323 + } 324 + 325 + return 0; 428 326 } 429 327 430 - static char *get_str_port_sel_mode(u8 flags) 328 + static int mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, 329 + struct lag_tracker *tracker, u8 *flags) 330 + { 331 + struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; 332 + 333 + if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && 334 + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) 335 + *flags |= MLX5_LAG_FLAG_HASH_BASED; 336 + 337 + return 0; 338 + } 339 + 340 + static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, 341 + struct lag_tracker *tracker, u8 *flags) 342 + { 343 + bool roce_lag = !!(*flags & MLX5_LAG_FLAG_ROCE); 344 + 345 + if (roce_lag) 346 + return mlx5_lag_set_port_sel_mode_roce(ldev, tracker, flags); 347 + return mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, flags); 348 + } 349 + 350 + char *get_str_port_sel_mode(u8 flags) 431 351 { 432 352 if (flags & MLX5_LAG_FLAG_HASH_BASED) 433 353 return "hash"; ··· 470 336 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 471 337 int err; 472 338 473 - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d mode:%s", 474 - ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], 339 + mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 340 + mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 475 341 shared_fdb, get_str_port_sel_mode(flags)); 476 342 477 - err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], 478 - ldev->v2p_map[MLX5_LAG_P2], shared_fdb, flags); 343 + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, shared_fdb, flags); 479 344 if (err) { 480 345 mlx5_core_err(dev0, 481 346 "Failed to create LAG (%d)\n", ··· 510 377 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 511 378 int err; 512 379 513 - mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], 514 - &ldev->v2p_map[MLX5_LAG_P2]); 515 - mlx5_lag_set_port_sel_mode(ldev, tracker, &flags); 380 + err = mlx5_lag_set_port_sel_mode(ldev, tracker, &flags); 381 + if (err) 382 + return err; 383 + 384 + mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); 385 + 516 386 if (flags & MLX5_LAG_FLAG_HASH_BASED) { 517 387 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 518 - ldev->v2p_map[MLX5_LAG_P1], 519 - ldev->v2p_map[MLX5_LAG_P2]); 388 + ldev->v2p_map); 520 389 if (err) { 521 390 mlx5_core_err(dev0, 522 391 "Failed to create LAG port selection(%d)\n", ··· 590 455 return 0; 591 456 } 592 457 458 + #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 593 459 static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 594 460 { 595 - if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) 596 - return false; 461 + #ifdef CONFIG_MLX5_ESWITCH 462 + u8 mode; 463 + #endif 464 + int i; 465 + 466 + for (i = 0; i < ldev->ports; i++) 467 + if (!ldev->pf[i].dev) 468 + return false; 597 469 598 470 #ifdef CONFIG_MLX5_ESWITCH 599 - return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev, 600 - ldev->pf[MLX5_LAG_P2].dev); 471 + mode = mlx5_eswitch_mode(ldev->pf[MLX5_LAG_P1].dev); 472 + 473 + if (mode != MLX5_ESWITCH_NONE && mode != MLX5_ESWITCH_OFFLOADS) 474 + return false; 475 + 476 + for (i = 0; i < ldev->ports; i++) 477 + if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) 478 + return false; 479 + 480 + if (mode == MLX5_ESWITCH_OFFLOADS && ldev->ports != MLX5_LAG_OFFLOADS_SUPPORTED_PORTS) 481 + return false; 601 482 #else 602 - return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) && 603 - !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev)); 483 + for (i = 0; i < ldev->ports; i++) 484 + if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) 485 + return false; 604 486 #endif 487 + return true; 605 488 } 606 489 607 490 static void mlx5_lag_add_devices(struct mlx5_lag *ldev) 608 491 { 609 492 int i; 610 493 611 - for (i = 0; i < MLX5_MAX_PORTS; i++) { 494 + for (i = 0; i < ldev->ports; i++) { 612 495 if (!ldev->pf[i].dev) 613 496 continue; 614 497 ··· 643 490 { 644 491 int i; 645 492 646 - for (i = 0; i < MLX5_MAX_PORTS; i++) { 493 + for (i = 0; i < ldev->ports; i++) { 647 494 if (!ldev->pf[i].dev) 648 495 continue; 649 496 ··· 663 510 bool shared_fdb = ldev->shared_fdb; 664 511 bool roce_lag; 665 512 int err; 513 + int i; 666 514 667 515 roce_lag = __mlx5_lag_is_roce(ldev); 668 516 ··· 674 520 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 675 521 mlx5_rescan_drivers_locked(dev0); 676 522 } 677 - mlx5_nic_vport_disable_roce(dev1); 523 + for (i = 1; i < ldev->ports; i++) 524 + mlx5_nic_vport_disable_roce(ldev->pf[i].dev); 678 525 } 679 526 680 527 err = mlx5_deactivate_lag(ldev); ··· 712 557 return false; 713 558 } 714 559 560 + static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 561 + { 562 + bool roce_lag = true; 563 + int i; 564 + 565 + for (i = 0; i < ldev->ports; i++) 566 + roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); 567 + 568 + #ifdef CONFIG_MLX5_ESWITCH 569 + for (i = 0; i < ldev->ports; i++) 570 + roce_lag = roce_lag && 571 + ldev->pf[i].dev->priv.eswitch->mode == MLX5_ESWITCH_NONE; 572 + #endif 573 + 574 + return roce_lag; 575 + } 576 + 715 577 static void mlx5_do_bond(struct mlx5_lag *ldev) 716 578 { 717 579 struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; ··· 736 564 struct lag_tracker tracker; 737 565 bool do_bond, roce_lag; 738 566 int err; 567 + int i; 739 568 740 569 if (!mlx5_lag_is_ready(ldev)) { 741 570 do_bond = false; ··· 753 580 if (do_bond && !__mlx5_lag_is_active(ldev)) { 754 581 bool shared_fdb = mlx5_shared_fdb_supported(ldev); 755 582 756 - roce_lag = !mlx5_sriov_is_enabled(dev0) && 757 - !mlx5_sriov_is_enabled(dev1); 758 - 759 - #ifdef CONFIG_MLX5_ESWITCH 760 - roce_lag = roce_lag && 761 - dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && 762 - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; 763 - #endif 583 + roce_lag = mlx5_lag_is_roce_lag(ldev); 764 584 765 585 if (shared_fdb || roce_lag) 766 586 mlx5_lag_remove_devices(ldev); ··· 770 604 } else if (roce_lag) { 771 605 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 772 606 mlx5_rescan_drivers_locked(dev0); 773 - mlx5_nic_vport_enable_roce(dev1); 607 + for (i = 1; i < ldev->ports; i++) 608 + mlx5_nic_vport_enable_roce(ldev->pf[i].dev); 774 609 } else if (shared_fdb) { 775 610 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 776 611 mlx5_rescan_drivers_locked(dev0); ··· 803 636 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 804 637 } 805 638 806 - static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, 807 - struct mlx5_core_dev *dev1) 808 - { 809 - if (dev0) 810 - mlx5_esw_lock(dev0->priv.eswitch); 811 - if (dev1) 812 - mlx5_esw_lock(dev1->priv.eswitch); 813 - } 814 - 815 - static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, 816 - struct mlx5_core_dev *dev1) 817 - { 818 - if (dev1) 819 - mlx5_esw_unlock(dev1->priv.eswitch); 820 - if (dev0) 821 - mlx5_esw_unlock(dev0->priv.eswitch); 822 - } 823 - 824 639 static void mlx5_do_bond_work(struct work_struct *work) 825 640 { 826 641 struct delayed_work *delayed_work = to_delayed_work(work); 827 642 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 828 643 bond_work); 829 - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; 830 - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; 831 644 int status; 832 645 833 646 status = mlx5_dev_list_trylock(); ··· 816 669 return; 817 670 } 818 671 672 + mutex_lock(&ldev->lock); 819 673 if (ldev->mode_changes_in_progress) { 674 + mutex_unlock(&ldev->lock); 820 675 mlx5_dev_list_unlock(); 821 676 mlx5_queue_bond_work(ldev, HZ); 822 677 return; 823 678 } 824 679 825 - mlx5_lag_lock_eswitches(dev0, dev1); 826 680 mlx5_do_bond(ldev); 827 - mlx5_lag_unlock_eswitches(dev0, dev1); 681 + mutex_unlock(&ldev->lock); 828 682 mlx5_dev_list_unlock(); 829 683 } 830 684 ··· 839 691 bool is_bonded, is_in_lag, mode_supported; 840 692 bool has_inactive = 0; 841 693 struct slave *slave; 842 - int bond_status = 0; 694 + u8 bond_status = 0; 843 695 int num_slaves = 0; 844 696 int changed = 0; 845 697 int idx; ··· 870 722 rcu_read_unlock(); 871 723 872 724 /* None of this lagdev's netdevs are slaves of this master. */ 873 - if (!(bond_status & 0x3)) 725 + if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 874 726 return 0; 875 727 876 728 if (lag_upper_info) { ··· 883 735 * A device is considered bonded if both its physical ports are slaves 884 736 * of the same lag master, and only them. 885 737 */ 886 - is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; 738 + is_in_lag = num_slaves == ldev->ports && 739 + bond_status == GENMASK(ldev->ports - 1, 0); 887 740 888 741 /* Lag mode must be activebackup or hash. */ 889 742 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || ··· 1013 864 { 1014 865 unsigned int fn = mlx5_get_dev_index(dev); 1015 866 1016 - if (fn >= MLX5_MAX_PORTS) 867 + if (fn >= ldev->ports) 1017 868 return; 1018 869 1019 870 spin_lock(&lag_lock); ··· 1029 880 int i; 1030 881 1031 882 spin_lock(&lag_lock); 1032 - for (i = 0; i < MLX5_MAX_PORTS; i++) { 883 + for (i = 0; i < ldev->ports; i++) { 1033 884 if (ldev->pf[i].netdev == netdev) { 1034 885 ldev->pf[i].netdev = NULL; 1035 886 break; ··· 1043 894 { 1044 895 unsigned int fn = mlx5_get_dev_index(dev); 1045 896 1046 - if (fn >= MLX5_MAX_PORTS) 897 + if (fn >= ldev->ports) 1047 898 return; 1048 899 1049 900 ldev->pf[fn].dev = dev; 1050 901 dev->priv.lag = ldev; 1051 902 } 1052 903 1053 - /* Must be called with intf_mutex held */ 1054 904 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 1055 905 struct mlx5_core_dev *dev) 1056 906 { 1057 907 int i; 1058 908 1059 - for (i = 0; i < MLX5_MAX_PORTS; i++) 909 + for (i = 0; i < ldev->ports; i++) 1060 910 if (ldev->pf[i].dev == dev) 1061 911 break; 1062 912 1063 - if (i == MLX5_MAX_PORTS) 913 + if (i == ldev->ports) 1064 914 return; 1065 915 1066 916 ldev->pf[i].dev = NULL; ··· 1072 924 struct mlx5_lag *ldev = NULL; 1073 925 struct mlx5_core_dev *tmp_dev; 1074 926 1075 - if (!MLX5_CAP_GEN(dev, vport_group_manager) || 1076 - !MLX5_CAP_GEN(dev, lag_master) || 1077 - MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) 1078 - return 0; 1079 - 1080 - tmp_dev = mlx5_get_next_phys_dev(dev); 927 + tmp_dev = mlx5_get_next_phys_dev_lag(dev); 1081 928 if (tmp_dev) 1082 929 ldev = tmp_dev->priv.lag; 1083 930 ··· 1082 939 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 1083 940 return 0; 1084 941 } 1085 - } else { 1086 - if (ldev->mode_changes_in_progress) 1087 - return -EAGAIN; 1088 - mlx5_ldev_get(ldev); 942 + mlx5_ldev_add_mdev(ldev, dev); 943 + return 0; 1089 944 } 1090 945 946 + mutex_lock(&ldev->lock); 947 + if (ldev->mode_changes_in_progress) { 948 + mutex_unlock(&ldev->lock); 949 + return -EAGAIN; 950 + } 951 + mlx5_ldev_get(ldev); 1091 952 mlx5_ldev_add_mdev(ldev, dev); 953 + mutex_unlock(&ldev->lock); 1092 954 1093 955 return 0; 1094 956 } ··· 1106 958 if (!ldev) 1107 959 return; 1108 960 961 + /* mdev is being removed, might as well remove debugfs 962 + * as early as possible. 963 + */ 964 + mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 1109 965 recheck: 1110 - mlx5_dev_list_lock(); 966 + mutex_lock(&ldev->lock); 1111 967 if (ldev->mode_changes_in_progress) { 1112 - mlx5_dev_list_unlock(); 968 + mutex_unlock(&ldev->lock); 1113 969 msleep(100); 1114 970 goto recheck; 1115 971 } 1116 972 mlx5_ldev_remove_mdev(ldev, dev); 1117 - mlx5_dev_list_unlock(); 973 + mutex_unlock(&ldev->lock); 1118 974 mlx5_ldev_put(ldev); 1119 975 } 1120 976 ··· 1126 974 { 1127 975 int err; 1128 976 977 + if (!MLX5_CAP_GEN(dev, vport_group_manager) || 978 + !MLX5_CAP_GEN(dev, lag_master) || 979 + (MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS || 980 + MLX5_CAP_GEN(dev, num_lag_ports) <= 1)) 981 + return; 982 + 1129 983 recheck: 1130 984 mlx5_dev_list_lock(); 1131 985 err = __mlx5_lag_dev_add_mdev(dev); 986 + mlx5_dev_list_unlock(); 987 + 1132 988 if (err) { 1133 - mlx5_dev_list_unlock(); 1134 989 msleep(100); 1135 990 goto recheck; 1136 991 } 1137 - mlx5_dev_list_unlock(); 992 + mlx5_ldev_add_debugfs(dev); 1138 993 } 1139 994 1140 - /* Must be called with intf_mutex held */ 1141 995 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 1142 996 struct net_device *netdev) 1143 997 { 1144 998 struct mlx5_lag *ldev; 999 + bool lag_is_active; 1145 1000 1146 1001 ldev = mlx5_lag_dev(dev); 1147 1002 if (!ldev) 1148 1003 return; 1149 1004 1005 + mutex_lock(&ldev->lock); 1150 1006 mlx5_ldev_remove_netdev(ldev, netdev); 1151 1007 ldev->flags &= ~MLX5_LAG_FLAG_READY; 1152 1008 1153 - if (__mlx5_lag_is_active(ldev)) 1009 + lag_is_active = __mlx5_lag_is_active(ldev); 1010 + mutex_unlock(&ldev->lock); 1011 + 1012 + if (lag_is_active) 1154 1013 mlx5_queue_bond_work(ldev, 0); 1155 1014 } 1156 1015 1157 - /* Must be called with intf_mutex held */ 1158 1016 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 1159 1017 struct net_device *netdev) 1160 1018 { ··· 1175 1013 if (!ldev) 1176 1014 return; 1177 1015 1016 + mutex_lock(&ldev->lock); 1178 1017 mlx5_ldev_add_netdev(ldev, dev, netdev); 1179 1018 1180 - for (i = 0; i < MLX5_MAX_PORTS; i++) 1019 + for (i = 0; i < ldev->ports; i++) 1181 1020 if (!ldev->pf[i].dev) 1182 1021 break; 1183 1022 1184 - if (i >= MLX5_MAX_PORTS) 1023 + if (i >= ldev->ports) 1185 1024 ldev->flags |= MLX5_LAG_FLAG_READY; 1025 + mutex_unlock(&ldev->lock); 1186 1026 mlx5_queue_bond_work(ldev, 0); 1187 1027 } 1188 1028 ··· 1261 1097 1262 1098 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 1263 1099 { 1264 - struct mlx5_core_dev *dev0; 1265 - struct mlx5_core_dev *dev1; 1266 1100 struct mlx5_lag *ldev; 1267 1101 1268 1102 ldev = mlx5_lag_dev(dev); ··· 1268 1106 return; 1269 1107 1270 1108 mlx5_dev_list_lock(); 1271 - 1272 - dev0 = ldev->pf[MLX5_LAG_P1].dev; 1273 - dev1 = ldev->pf[MLX5_LAG_P2].dev; 1109 + mutex_lock(&ldev->lock); 1274 1110 1275 1111 ldev->mode_changes_in_progress++; 1276 - if (__mlx5_lag_is_active(ldev)) { 1277 - mlx5_lag_lock_eswitches(dev0, dev1); 1112 + if (__mlx5_lag_is_active(ldev)) 1278 1113 mlx5_disable_lag(ldev); 1279 - mlx5_lag_unlock_eswitches(dev0, dev1); 1280 - } 1114 + 1115 + mutex_unlock(&ldev->lock); 1281 1116 mlx5_dev_list_unlock(); 1282 1117 } 1283 1118 ··· 1286 1127 if (!ldev) 1287 1128 return; 1288 1129 1289 - mlx5_dev_list_lock(); 1130 + mutex_lock(&ldev->lock); 1290 1131 ldev->mode_changes_in_progress--; 1291 - mlx5_dev_list_unlock(); 1132 + mutex_unlock(&ldev->lock); 1292 1133 mlx5_queue_bond_work(ldev, 0); 1293 1134 } 1294 1135 ··· 1296 1137 { 1297 1138 struct net_device *ndev = NULL; 1298 1139 struct mlx5_lag *ldev; 1140 + int i; 1299 1141 1300 1142 spin_lock(&lag_lock); 1301 1143 ldev = mlx5_lag_dev(dev); ··· 1305 1145 goto unlock; 1306 1146 1307 1147 if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1308 - ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ? 1309 - ldev->pf[MLX5_LAG_P1].netdev : 1310 - ldev->pf[MLX5_LAG_P2].netdev; 1148 + for (i = 0; i < ldev->ports; i++) 1149 + if (ldev->tracker.netdev_state[i].tx_enabled) 1150 + ndev = ldev->pf[i].netdev; 1151 + if (!ndev) 1152 + ndev = ldev->pf[ldev->ports - 1].netdev; 1311 1153 } else { 1312 1154 ndev = ldev->pf[MLX5_LAG_P1].netdev; 1313 1155 } ··· 1328 1166 { 1329 1167 struct mlx5_lag *ldev; 1330 1168 u8 port = 0; 1169 + int i; 1331 1170 1332 1171 spin_lock(&lag_lock); 1333 1172 ldev = mlx5_lag_dev(dev); 1334 1173 if (!(ldev && __mlx5_lag_is_roce(ldev))) 1335 1174 goto unlock; 1336 1175 1337 - if (ldev->pf[MLX5_LAG_P1].netdev == slave) 1338 - port = MLX5_LAG_P1; 1339 - else 1340 - port = MLX5_LAG_P2; 1176 + for (i = 0; i < ldev->ports; i++) { 1177 + if (ldev->pf[MLX5_LAG_P1].netdev == slave) { 1178 + port = i; 1179 + break; 1180 + } 1181 + } 1341 1182 1342 - port = ldev->v2p_map[port]; 1183 + port = ldev->v2p_map[port * ldev->buckets]; 1343 1184 1344 1185 unlock: 1345 1186 spin_unlock(&lag_lock); 1346 1187 return port; 1347 1188 } 1348 1189 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 1190 + 1191 + u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 1192 + { 1193 + struct mlx5_lag *ldev; 1194 + 1195 + ldev = mlx5_lag_dev(dev); 1196 + if (!ldev) 1197 + return 0; 1198 + 1199 + return ldev->ports; 1200 + } 1201 + EXPORT_SYMBOL(mlx5_lag_get_num_ports); 1349 1202 1350 1203 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) 1351 1204 { ··· 1388 1211 size_t *offsets) 1389 1212 { 1390 1213 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 1391 - struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; 1214 + struct mlx5_core_dev **mdev; 1392 1215 struct mlx5_lag *ldev; 1393 1216 int num_ports; 1394 1217 int ret, i, j; ··· 1398 1221 if (!out) 1399 1222 return -ENOMEM; 1400 1223 1224 + mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 1225 + if (!mdev) { 1226 + ret = -ENOMEM; 1227 + goto free_out; 1228 + } 1229 + 1401 1230 memset(values, 0, sizeof(*values) * num_counters); 1402 1231 1403 1232 spin_lock(&lag_lock); 1404 1233 ldev = mlx5_lag_dev(dev); 1405 1234 if (ldev && __mlx5_lag_is_active(ldev)) { 1406 - num_ports = MLX5_MAX_PORTS; 1407 - mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; 1408 - mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; 1235 + num_ports = ldev->ports; 1236 + for (i = 0; i < ldev->ports; i++) 1237 + mdev[i] = ldev->pf[i].dev; 1409 1238 } else { 1410 1239 num_ports = 1; 1411 1240 mdev[MLX5_LAG_P1] = dev; ··· 1426 1243 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 1427 1244 out); 1428 1245 if (ret) 1429 - goto free; 1246 + goto free_mdev; 1430 1247 1431 1248 for (j = 0; j < num_counters; ++j) 1432 1249 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 1433 1250 } 1434 1251 1435 - free: 1252 + free_mdev: 1253 + kvfree(mdev); 1254 + free_out: 1436 1255 kvfree(out); 1437 1256 return ret; 1438 1257 }
+15 -1
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
··· 4 4 #ifndef __MLX5_LAG_H__ 5 5 #define __MLX5_LAG_H__ 6 6 7 + #include <linux/debugfs.h> 8 + 9 + #define MLX5_LAG_MAX_HASH_BUCKETS 16 7 10 #include "mlx5_core.h" 8 11 #include "mp.h" 9 12 #include "port_sel.h" ··· 48 45 */ 49 46 struct mlx5_lag { 50 47 u8 flags; 48 + u8 ports; 49 + u8 buckets; 51 50 int mode_changes_in_progress; 52 51 bool shared_fdb; 53 - u8 v2p_map[MLX5_MAX_PORTS]; 52 + u8 v2p_map[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; 54 53 struct kref ref; 55 54 struct lag_func pf[MLX5_MAX_PORTS]; 56 55 struct lag_tracker tracker; ··· 61 56 struct notifier_block nb; 62 57 struct lag_mp lag_mp; 63 58 struct mlx5_lag_port_sel port_sel; 59 + /* Protect lag fields/state changes */ 60 + struct mutex lock; 64 61 }; 65 62 66 63 static inline struct mlx5_lag * ··· 91 84 bool shared_fdb); 92 85 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 93 86 struct net_device *ndev); 87 + 88 + char *get_str_port_sel_mode(u8 flags); 89 + void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, 90 + u8 *ports, int *num_enabled); 91 + 92 + void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); 93 + void mlx5_ldev_remove_debugfs(struct dentry *dbg); 94 94 95 95 #endif /* __MLX5_LAG_H__ */
+77 -52
drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
··· 12 12 13 13 static struct mlx5_flow_group * 14 14 mlx5_create_hash_flow_group(struct mlx5_flow_table *ft, 15 - struct mlx5_flow_definer *definer) 15 + struct mlx5_flow_definer *definer, 16 + u8 rules) 16 17 { 17 18 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); 18 19 struct mlx5_flow_group *fg; ··· 26 25 MLX5_SET(create_flow_group_in, in, match_definer_id, 27 26 mlx5_get_match_definer_id(definer)); 28 27 MLX5_SET(create_flow_group_in, in, start_flow_index, 0); 29 - MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_MAX_PORTS - 1); 28 + MLX5_SET(create_flow_group_in, in, end_flow_index, rules - 1); 30 29 MLX5_SET(create_flow_group_in, in, group_type, 31 30 MLX5_CREATE_FLOW_GROUP_IN_GROUP_TYPE_HASH_SPLIT); 32 31 ··· 37 36 38 37 static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, 39 38 struct mlx5_lag_definer *lag_definer, 40 - u8 port1, u8 port2) 39 + u8 *ports) 41 40 { 42 41 struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; 43 42 struct mlx5_flow_table_attr ft_attr = {}; ··· 45 44 MLX5_DECLARE_FLOW_ACT(flow_act); 46 45 struct mlx5_flow_namespace *ns; 47 46 int err, i; 47 + int idx; 48 + int j; 48 49 49 - ft_attr.max_fte = MLX5_MAX_PORTS; 50 + ft_attr.max_fte = ldev->ports * ldev->buckets; 50 51 ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; 51 52 52 53 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_PORT_SEL); ··· 64 61 } 65 62 66 63 lag_definer->fg = mlx5_create_hash_flow_group(lag_definer->ft, 67 - lag_definer->definer); 64 + lag_definer->definer, 65 + ft_attr.max_fte); 68 66 if (IS_ERR(lag_definer->fg)) { 69 67 err = PTR_ERR(lag_definer->fg); 70 68 goto destroy_ft; ··· 74 70 dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; 75 71 dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; 76 72 flow_act.flags |= FLOW_ACT_NO_APPEND; 77 - for (i = 0; i < MLX5_MAX_PORTS; i++) { 78 - u8 affinity = i == 0 ? port1 : port2; 73 + for (i = 0; i < ldev->ports; i++) { 74 + for (j = 0; j < ldev->buckets; j++) { 75 + u8 affinity; 79 76 80 - dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, 81 - vhca_id); 82 - lag_definer->rules[i] = mlx5_add_flow_rules(lag_definer->ft, 83 - NULL, &flow_act, 84 - &dest, 1); 85 - if (IS_ERR(lag_definer->rules[i])) { 86 - err = PTR_ERR(lag_definer->rules[i]); 87 - while (i--) 88 - mlx5_del_flow_rules(lag_definer->rules[i]); 89 - goto destroy_fg; 77 + idx = i * ldev->buckets + j; 78 + affinity = ports[idx]; 79 + 80 + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[affinity - 1].dev, 81 + vhca_id); 82 + lag_definer->rules[idx] = mlx5_add_flow_rules(lag_definer->ft, 83 + NULL, &flow_act, 84 + &dest, 1); 85 + if (IS_ERR(lag_definer->rules[idx])) { 86 + err = PTR_ERR(lag_definer->rules[idx]); 87 + while (i--) 88 + while (j--) 89 + mlx5_del_flow_rules(lag_definer->rules[idx]); 90 + goto destroy_fg; 91 + } 90 92 } 91 93 } 92 94 ··· 289 279 290 280 static struct mlx5_lag_definer * 291 281 mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, 292 - enum mlx5_traffic_types tt, bool tunnel, u8 port1, 293 - u8 port2) 282 + enum mlx5_traffic_types tt, bool tunnel, u8 *ports) 294 283 { 295 284 struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; 296 285 struct mlx5_lag_definer *lag_definer; ··· 317 308 goto free_mask; 318 309 } 319 310 320 - err = mlx5_lag_create_port_sel_table(ldev, lag_definer, port1, port2); 311 + err = mlx5_lag_create_port_sel_table(ldev, lag_definer, ports); 321 312 if (err) 322 313 goto destroy_match_definer; 323 314 ··· 338 329 struct mlx5_lag_definer *lag_definer) 339 330 { 340 331 struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; 332 + int idx; 341 333 int i; 334 + int j; 342 335 343 - for (i = 0; i < MLX5_MAX_PORTS; i++) 344 - mlx5_del_flow_rules(lag_definer->rules[i]); 336 + for (i = 0; i < ldev->ports; i++) { 337 + for (j = 0; j < ldev->buckets; j++) { 338 + idx = i * ldev->buckets + j; 339 + mlx5_del_flow_rules(lag_definer->rules[idx]); 340 + } 341 + } 345 342 mlx5_destroy_flow_group(lag_definer->fg); 346 343 mlx5_destroy_flow_table(lag_definer->ft); 347 344 mlx5_destroy_match_definer(dev, lag_definer->definer); ··· 371 356 372 357 static int mlx5_lag_create_definers(struct mlx5_lag *ldev, 373 358 enum netdev_lag_hash hash_type, 374 - u8 port1, u8 port2) 359 + u8 *ports) 375 360 { 376 361 struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; 377 362 struct mlx5_lag_definer *lag_definer; ··· 379 364 380 365 for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { 381 366 lag_definer = mlx5_lag_create_definer(ldev, hash_type, tt, 382 - false, port1, port2); 367 + false, ports); 383 368 if (IS_ERR(lag_definer)) { 384 369 err = PTR_ERR(lag_definer); 385 370 goto destroy_definers; ··· 391 376 392 377 lag_definer = 393 378 mlx5_lag_create_definer(ldev, hash_type, tt, 394 - true, port1, port2); 379 + true, ports); 395 380 if (IS_ERR(lag_definer)) { 396 381 err = PTR_ERR(lag_definer); 397 382 goto destroy_definers; ··· 528 513 } 529 514 530 515 int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, 531 - enum netdev_lag_hash hash_type, u8 port1, u8 port2) 516 + enum netdev_lag_hash hash_type, u8 *ports) 532 517 { 533 518 struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; 534 519 int err; 535 520 536 521 set_tt_map(port_sel, hash_type); 537 - err = mlx5_lag_create_definers(ldev, hash_type, port1, port2); 522 + err = mlx5_lag_create_definers(ldev, hash_type, ports); 538 523 if (err) 539 524 return err; 540 525 ··· 558 543 return err; 559 544 } 560 545 561 - static int 562 - mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, 563 - struct mlx5_lag_definer **definers, 564 - u8 port1, u8 port2) 546 + static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, 547 + struct mlx5_lag_definer *def, 548 + u8 *ports) 565 549 { 566 - struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; 567 550 struct mlx5_flow_destination dest = {}; 551 + int idx; 568 552 int err; 569 - int tt; 553 + int i; 554 + int j; 570 555 571 556 dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; 572 557 dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; 573 558 574 - for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { 575 - struct mlx5_flow_handle **rules = definers[tt]->rules; 559 + for (i = 0; i < ldev->ports; i++) { 560 + for (j = 0; j < ldev->buckets; j++) { 561 + idx = i * ldev->buckets + j; 562 + if (ldev->v2p_map[i] == ports[i]) 563 + continue; 576 564 577 - if (ldev->v2p_map[MLX5_LAG_P1] != port1) { 578 - dest.vport.vhca_id = 579 - MLX5_CAP_GEN(ldev->pf[port1 - 1].dev, vhca_id); 580 - err = mlx5_modify_rule_destination(rules[MLX5_LAG_P1], 581 - &dest, NULL); 582 - if (err) 583 - return err; 584 - } 585 - 586 - if (ldev->v2p_map[MLX5_LAG_P2] != port2) { 587 - dest.vport.vhca_id = 588 - MLX5_CAP_GEN(ldev->pf[port2 - 1].dev, vhca_id); 589 - err = mlx5_modify_rule_destination(rules[MLX5_LAG_P2], 590 - &dest, NULL); 565 + dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev, 566 + vhca_id); 567 + err = mlx5_modify_rule_destination(def->rules[idx], &dest, NULL); 591 568 if (err) 592 569 return err; 593 570 } ··· 588 581 return 0; 589 582 } 590 583 591 - int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2) 584 + static int 585 + mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, 586 + struct mlx5_lag_definer **definers, 587 + u8 *ports) 588 + { 589 + struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; 590 + int err; 591 + int tt; 592 + 593 + for_each_set_bit(tt, port_sel->tt_map, MLX5_NUM_TT) { 594 + err = __mlx5_lag_modify_definers_destinations(ldev, definers[tt], ports); 595 + if (err) 596 + return err; 597 + } 598 + 599 + return 0; 600 + } 601 + 602 + int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) 592 603 { 593 604 struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; 594 605 int err; 595 606 596 607 err = mlx5_lag_modify_definers_destinations(ldev, 597 608 port_sel->outer.definers, 598 - port1, port2); 609 + ports); 599 610 if (err) 600 611 return err; 601 612 ··· 622 597 623 598 return mlx5_lag_modify_definers_destinations(ldev, 624 599 port_sel->inner.definers, 625 - port1, port2); 600 + ports); 626 601 } 627 602 628 603 void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev)
+8 -7
drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.h
··· 10 10 struct mlx5_flow_definer *definer; 11 11 struct mlx5_flow_table *ft; 12 12 struct mlx5_flow_group *fg; 13 - struct mlx5_flow_handle *rules[MLX5_MAX_PORTS]; 13 + /* Each port has ldev->buckets number of rules and they are arrange in 14 + * [port * buckets .. port * buckets + buckets) locations 15 + */ 16 + struct mlx5_flow_handle *rules[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS]; 14 17 }; 15 18 16 19 struct mlx5_lag_ttc { ··· 30 27 31 28 #ifdef CONFIG_MLX5_ESWITCH 32 29 33 - int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, u8 port2); 30 + int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports); 34 31 void mlx5_lag_port_sel_destroy(struct mlx5_lag *ldev); 35 32 int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, 36 - enum netdev_lag_hash hash_type, u8 port1, 37 - u8 port2); 33 + enum netdev_lag_hash hash_type, u8 *ports); 38 34 39 35 #else /* CONFIG_MLX5_ESWITCH */ 40 36 static inline int mlx5_lag_port_sel_create(struct mlx5_lag *ldev, 41 37 enum netdev_lag_hash hash_type, 42 - u8 port1, u8 port2) 38 + u8 *ports) 43 39 { 44 40 return 0; 45 41 } 46 42 47 - static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 port1, 48 - u8 port2) 43 + static inline int mlx5_lag_port_sel_modify(struct mlx5_lag *ldev, u8 *ports) 49 44 { 50 45 return 0; 51 46 }
+9 -7
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
··· 14 14 struct mlx5_devcom_component { 15 15 struct { 16 16 void *data; 17 - } device[MLX5_MAX_PORTS]; 17 + } device[MLX5_DEVCOM_PORTS_SUPPORTED]; 18 18 19 19 mlx5_devcom_event_handler_t handler; 20 20 struct rw_semaphore sem; ··· 25 25 struct list_head list; 26 26 27 27 struct mlx5_devcom_component components[MLX5_DEVCOM_NUM_COMPONENTS]; 28 - struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; 28 + struct mlx5_core_dev *devs[MLX5_DEVCOM_PORTS_SUPPORTED]; 29 29 }; 30 30 31 31 struct mlx5_devcom { ··· 74 74 75 75 if (!mlx5_core_is_pf(dev)) 76 76 return NULL; 77 + if (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_DEVCOM_PORTS_SUPPORTED) 78 + return NULL; 77 79 78 80 sguid0 = mlx5_query_nic_system_image_guid(dev); 79 81 list_for_each_entry(iter, &devcom_list, list) { 80 82 struct mlx5_core_dev *tmp_dev = NULL; 81 83 82 84 idx = -1; 83 - for (i = 0; i < MLX5_MAX_PORTS; i++) { 85 + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) { 84 86 if (iter->devs[i]) 85 87 tmp_dev = iter->devs[i]; 86 88 else ··· 136 134 137 135 kfree(devcom); 138 136 139 - for (i = 0; i < MLX5_MAX_PORTS; i++) 137 + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) 140 138 if (priv->devs[i]) 141 139 break; 142 140 143 - if (i != MLX5_MAX_PORTS) 141 + if (i != MLX5_DEVCOM_PORTS_SUPPORTED) 144 142 return; 145 143 146 144 list_del(&priv->list); ··· 193 191 194 192 comp = &devcom->priv->components[id]; 195 193 down_write(&comp->sem); 196 - for (i = 0; i < MLX5_MAX_PORTS; i++) 194 + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) 197 195 if (i != devcom->idx && comp->device[i].data) { 198 196 err = comp->handler(event, comp->device[i].data, 199 197 event_data); ··· 241 239 return NULL; 242 240 } 243 241 244 - for (i = 0; i < MLX5_MAX_PORTS; i++) 242 + for (i = 0; i < MLX5_DEVCOM_PORTS_SUPPORTED; i++) 245 243 if (i != devcom->idx) 246 244 break; 247 245
+2
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h
··· 6 6 7 7 #include <linux/mlx5/driver.h> 8 8 9 + #define MLX5_DEVCOM_PORTS_SUPPORTED 2 10 + 9 11 enum mlx5_devcom_components { 10 12 MLX5_DEVCOM_ESW_OFFLOADS, 11 13
+1
drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c
··· 10 10 11 11 static const u32 tout_def_sw_val[MAX_TIMEOUT_TYPES] = { 12 12 [MLX5_TO_FW_PRE_INIT_TIMEOUT_MS] = 120000, 13 + [MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS] = 7200000, 13 14 [MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS] = 20000, 14 15 [MLX5_TO_FW_PRE_INIT_WAIT_MS] = 2, 15 16 [MLX5_TO_FW_INIT_MS] = 2000,
+1
drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h
··· 7 7 enum mlx5_timeouts_types { 8 8 /* pre init timeouts (not read from FW) */ 9 9 MLX5_TO_FW_PRE_INIT_TIMEOUT_MS, 10 + MLX5_TO_FW_PRE_INIT_ON_RECOVERY_TIMEOUT_MS, 10 11 MLX5_TO_FW_PRE_INIT_WARN_MESSAGE_INTERVAL_MS, 11 12 MLX5_TO_FW_PRE_INIT_WAIT_MS, 12 13
+18 -10
drivers/net/ethernet/mellanox/mlx5/core/main.c
··· 189 189 fw_initializing = ioread32be(&dev->iseg->initializing); 190 190 if (!(fw_initializing >> 31)) 191 191 break; 192 - if (time_after(jiffies, end)) { 192 + if (time_after(jiffies, end) || 193 + test_and_clear_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { 193 194 err = -EBUSY; 194 195 break; 195 196 } ··· 1003 1002 mlx5_devcom_unregister_device(dev->priv.devcom); 1004 1003 } 1005 1004 1006 - static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot) 1005 + static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout) 1007 1006 { 1008 1007 int err; 1009 1008 ··· 1018 1017 1019 1018 /* wait for firmware to accept initialization segments configurations 1020 1019 */ 1021 - err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT), 1020 + err = wait_fw_init(dev, timeout, 1022 1021 mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL)); 1023 1022 if (err) { 1024 1023 mlx5_core_err(dev, "Firmware over %llu MS in pre-initializing state, aborting\n", 1025 - mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); 1024 + timeout); 1026 1025 return err; 1027 1026 } 1028 1027 ··· 1272 1271 mutex_lock(&dev->intf_state_mutex); 1273 1272 dev->state = MLX5_DEVICE_STATE_UP; 1274 1273 1275 - err = mlx5_function_setup(dev, true); 1274 + err = mlx5_function_setup(dev, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); 1276 1275 if (err) 1277 1276 goto err_function; 1278 1277 ··· 1336 1335 mutex_unlock(&dev->intf_state_mutex); 1337 1336 } 1338 1337 1339 - int mlx5_load_one(struct mlx5_core_dev *dev) 1338 + int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) 1340 1339 { 1341 1340 int err = 0; 1341 + u64 timeout; 1342 1342 1343 1343 mutex_lock(&dev->intf_state_mutex); 1344 1344 if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { ··· 1349 1347 /* remove any previous indication of internal error */ 1350 1348 dev->state = MLX5_DEVICE_STATE_UP; 1351 1349 1352 - err = mlx5_function_setup(dev, false); 1350 + if (recovery) 1351 + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); 1352 + else 1353 + timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); 1354 + err = mlx5_function_setup(dev, timeout); 1353 1355 if (err) 1354 1356 goto err_function; 1355 1357 ··· 1608 1602 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 1609 1603 struct devlink *devlink = priv_to_devlink(dev); 1610 1604 1605 + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); 1611 1606 devlink_unregister(devlink); 1612 1607 mlx5_sriov_disable(pdev); 1613 1608 mlx5_crdump_disable(dev); ··· 1724 1717 1725 1718 mlx5_pci_trace(dev, "Enter, loading driver..\n"); 1726 1719 1727 - err = mlx5_load_one(dev); 1720 + err = mlx5_load_one(dev, false); 1728 1721 1729 1722 mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, 1730 1723 !err ? "recovered" : "Failed"); ··· 1792 1785 int err; 1793 1786 1794 1787 mlx5_core_info(dev, "Shutdown was called\n"); 1788 + set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); 1795 1789 err = mlx5_try_fast_unload(dev); 1796 1790 if (err) 1797 1791 mlx5_unload_one(dev); ··· 1812 1804 { 1813 1805 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 1814 1806 1815 - return mlx5_load_one(dev); 1807 + return mlx5_load_one(dev, false); 1816 1808 } 1817 1809 1818 1810 static const struct pci_device_id mlx5_core_pci_table[] = { ··· 1857 1849 return -EIO; 1858 1850 } 1859 1851 1860 - return mlx5_load_one(dev); 1852 + return mlx5_load_one(dev, true); 1861 1853 } 1862 1854 1863 1855 static struct pci_driver mlx5_core_driver = {
+2 -1
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
··· 210 210 int mlx5_register_device(struct mlx5_core_dev *dev); 211 211 void mlx5_unregister_device(struct mlx5_core_dev *dev); 212 212 struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); 213 + struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); 213 214 void mlx5_dev_list_lock(void); 214 215 void mlx5_dev_list_unlock(void); 215 216 int mlx5_dev_list_trylock(void); ··· 291 290 int mlx5_init_one(struct mlx5_core_dev *dev); 292 291 void mlx5_uninit_one(struct mlx5_core_dev *dev); 293 292 void mlx5_unload_one(struct mlx5_core_dev *dev); 294 - int mlx5_load_one(struct mlx5_core_dev *dev); 293 + int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); 295 294 296 295 int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); 297 296
+4 -1
include/linux/mlx5/driver.h
··· 84 84 }; 85 85 86 86 enum { 87 - MLX5_MAX_PORTS = 2, 87 + MLX5_MAX_PORTS = 4, 88 88 }; 89 89 90 90 enum { ··· 558 558 struct dentry *cq_debugfs; 559 559 struct dentry *cmdif_debugfs; 560 560 struct dentry *pages_debugfs; 561 + struct dentry *lag_debugfs; 561 562 }; 562 563 563 564 struct mlx5_ft_pool; ··· 633 632 634 633 enum mlx5_interface_state { 635 634 MLX5_INTERFACE_STATE_UP = BIT(0), 635 + MLX5_BREAK_FW_WAIT = BIT(1), 636 636 }; 637 637 638 638 enum mlx5_pci_status { ··· 1143 1141 int num_counters, 1144 1142 size_t *offsets); 1145 1143 struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev); 1144 + u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev); 1146 1145 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); 1147 1146 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); 1148 1147 int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,