Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/mlx5e: Add devlink hairpin queues parameters

We refer to a TC NIC rule that involves forwarding as "hairpin".
Hairpin queues are mlx5 hardware specific implementation for hardware
forwarding of such packets.

Per the discussion in [1], move the hairpin queues control (number and
size) from debugfs to devlink.

Expose two devlink params:
- hairpin_num_queues: control the number of hairpin queues
- hairpin_queue_size: control the size (in packets) of the hairpin queues

[1] https://lore.kernel.org/all/20230111194608.7f15b9a1@kernel.org/

Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230314054234.267365-12-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Gal Pressman and committed by
Jakub Kicinski
1bffcea4 028522e2

+134 -31
+35
Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
··· 122 122 123 123 $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev 124 124 125 + hairpin_num_queues: Number of hairpin queues 126 + -------------------------------------------- 127 + We refer to a TC NIC rule that involves forwarding as "hairpin". 128 + 129 + Hairpin queues are mlx5 hardware specific implementation for hardware 130 + forwarding of such packets. 131 + 132 + - Show the number of hairpin queues:: 133 + 134 + $ devlink dev param show pci/0000:06:00.0 name hairpin_num_queues 135 + pci/0000:06:00.0: 136 + name hairpin_num_queues type driver-specific 137 + values: 138 + cmode driverinit value 2 139 + 140 + - Change the number of hairpin queues:: 141 + 142 + $ devlink dev param set pci/0000:06:00.0 name hairpin_num_queues value 4 cmode driverinit 143 + 144 + hairpin_queue_size: Size of the hairpin queues 145 + ---------------------------------------------- 146 + Control the size of the hairpin queues. 147 + 148 + - Show the size of the hairpin queues:: 149 + 150 + $ devlink dev param show pci/0000:06:00.0 name hairpin_queue_size 151 + pci/0000:06:00.0: 152 + name hairpin_queue_size type driver-specific 153 + values: 154 + cmode driverinit value 1024 155 + 156 + - Change the size (in packets) of the hairpin queues:: 157 + 158 + $ devlink dev param set pci/0000:06:00.0 name hairpin_queue_size value 512 cmode driverinit 159 + 125 160 Health reporters 126 161 ================ 127 162
+12
Documentation/networking/devlink/mlx5.rst
··· 72 72 73 73 Default: disabled 74 74 75 + * - ``hairpin_num_queues`` 76 + - u32 77 + - driverinit 78 + - We refer to a TC NIC rule that involves forwarding as "hairpin". 79 + Hairpin queues are mlx5 hardware specific implementation for hardware 80 + forwarding of such packets. 81 + 82 + Control the number of hairpin queues. 83 + * - ``hairpin_queue_size`` 84 + - u32 85 + - driverinit 86 + - Control the size (in packets) of the hairpin queues. 75 87 76 88 The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD`` 77 89
+66
drivers/net/ethernet/mellanox/mlx5/core/devlink.c
··· 494 494 return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL; 495 495 } 496 496 497 + static int 498 + mlx5_devlink_hairpin_num_queues_validate(struct devlink *devlink, u32 id, 499 + union devlink_param_value val, 500 + struct netlink_ext_ack *extack) 501 + { 502 + return val.vu32 ? 0 : -EINVAL; 503 + } 504 + 505 + static int 506 + mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id, 507 + union devlink_param_value val, 508 + struct netlink_ext_ack *extack) 509 + { 510 + struct mlx5_core_dev *dev = devlink_priv(devlink); 511 + u32 val32 = val.vu32; 512 + 513 + if (!is_power_of_2(val32)) { 514 + NL_SET_ERR_MSG_MOD(extack, "Value is not power of two"); 515 + return -EINVAL; 516 + } 517 + 518 + if (val32 > BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))) { 519 + NL_SET_ERR_MSG_FMT_MOD( 520 + extack, "Maximum hairpin queue size is %lu", 521 + BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); 522 + return -EINVAL; 523 + } 524 + 525 + return 0; 526 + } 527 + 528 + static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink) 529 + { 530 + struct mlx5_core_dev *dev = devlink_priv(devlink); 531 + union devlink_param_value value; 532 + u64 link_speed64; 533 + u32 link_speed; 534 + 535 + /* set hairpin pair per each 50Gbs share of the link */ 536 + mlx5_port_max_linkspeed(dev, &link_speed); 537 + link_speed = max_t(u32, link_speed, 50000); 538 + link_speed64 = link_speed; 539 + do_div(link_speed64, 50000); 540 + 541 + value.vu32 = link_speed64; 542 + devl_param_driverinit_value_set( 543 + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, value); 544 + 545 + value.vu32 = 546 + BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(dev), 547 + MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))); 548 + devl_param_driverinit_value_set( 549 + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, value); 550 + } 551 + 497 552 static const struct devlink_param mlx5_devlink_params[] = { 498 553 DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), 499 554 NULL, NULL, mlx5_devlink_enable_roce_validate), ··· 602 547 static const struct devlink_param mlx5_devlink_eth_params[] = { 603 548 DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), 604 549 NULL, NULL, NULL), 550 + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, 551 + "hairpin_num_queues", DEVLINK_PARAM_TYPE_U32, 552 + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, 553 + mlx5_devlink_hairpin_num_queues_validate), 554 + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, 555 + "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32, 556 + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, 557 + mlx5_devlink_hairpin_queue_size_validate), 605 558 }; 606 559 607 560 static int mlx5_devlink_eth_params_register(struct devlink *devlink) ··· 630 567 devl_param_driverinit_value_set(devlink, 631 568 DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, 632 569 value); 570 + 571 + mlx5_devlink_hairpin_params_init_values(devlink); 572 + 633 573 return 0; 634 574 } 635 575
+2
drivers/net/ethernet/mellanox/mlx5/core/devlink.h
··· 12 12 MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, 13 13 MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, 14 14 MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, 15 + MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, 16 + MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, 15 17 }; 16 18 17 19 struct mlx5_trap_ctx {
+19 -31
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
··· 44 44 #include <net/bareudp.h> 45 45 #include <net/bonding.h> 46 46 #include <net/dst_metadata.h> 47 + #include "devlink.h" 47 48 #include "en.h" 48 49 #include "en/tc/post_act.h" 49 50 #include "en/tc/act_stats.h" ··· 74 73 #define MLX5E_TC_TABLE_NUM_GROUPS 4 75 74 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) 76 75 77 - struct mlx5e_hairpin_params { 78 - struct mlx5_core_dev *mdev; 79 - u32 num_queues; 80 - u32 queue_size; 81 - }; 82 - 83 76 struct mlx5e_tc_table { 84 77 /* Protects the dynamic assignment of the t parameter 85 78 * which is the nic tc root table. ··· 96 101 97 102 struct mlx5_tc_ct_priv *ct; 98 103 struct mapping_ctx *mapping; 99 - struct mlx5e_hairpin_params hairpin_params; 100 104 struct dentry *dfs_root; 101 105 102 106 /* tc action stats */ ··· 1093 1099 &debugfs_hairpin_table_dump_fops); 1094 1100 } 1095 1101 1096 - static void 1097 - mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, 1098 - struct mlx5_core_dev *mdev) 1099 - { 1100 - u64 link_speed64; 1101 - u32 link_speed; 1102 - 1103 - hairpin_params->mdev = mdev; 1104 - /* set hairpin pair per each 50Gbs share of the link */ 1105 - mlx5_port_max_linkspeed(mdev, &link_speed); 1106 - link_speed = max_t(u32, link_speed, 50000); 1107 - link_speed64 = link_speed; 1108 - do_div(link_speed64, 50000); 1109 - hairpin_params->num_queues = link_speed64; 1110 - 1111 - hairpin_params->queue_size = 1112 - BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), 1113 - MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets))); 1114 - } 1115 - 1116 1102 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, 1117 1103 struct mlx5e_tc_flow *flow, 1118 1104 struct mlx5e_tc_flow_parse_attr *parse_attr, 1119 1105 struct netlink_ext_ack *extack) 1120 1106 { 1121 1107 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); 1108 + struct devlink *devlink = priv_to_devlink(priv->mdev); 1122 1109 int peer_ifindex = parse_attr->mirred_ifindex[0]; 1110 + union devlink_param_value val = {}; 1123 1111 struct mlx5_hairpin_params params; 1124 1112 struct mlx5_core_dev *peer_mdev; 1125 1113 struct mlx5e_hairpin_entry *hpe; ··· 1158 1182 hash_hairpin_info(peer_id, match_prio)); 1159 1183 mutex_unlock(&tc->hairpin_tbl_lock); 1160 1184 1161 - params.log_num_packets = ilog2(tc->hairpin_params.queue_size); 1185 + err = devl_param_driverinit_value_get( 1186 + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val); 1187 + if (err) { 1188 + err = -ENOMEM; 1189 + goto out_err; 1190 + } 1191 + 1192 + params.log_num_packets = ilog2(val.vu32); 1162 1193 params.log_data_size = 1163 1194 clamp_t(u32, 1164 1195 params.log_num_packets + ··· 1174 1191 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); 1175 1192 1176 1193 params.q_counter = priv->q_counter; 1177 - params.num_channels = tc->hairpin_params.num_queues; 1194 + err = devl_param_driverinit_value_get( 1195 + devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val); 1196 + if (err) { 1197 + err = -ENOMEM; 1198 + goto out_err; 1199 + } 1200 + 1201 + params.num_channels = val.vu32; 1178 1202 1179 1203 hp = mlx5e_hairpin_create(priv, &params, peer_ifindex); 1180 1204 hpe->hp = hp; ··· 5278 5288 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL); 5279 5289 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, 5280 5290 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); 5281 - 5282 - mlx5e_hairpin_params_init(&tc->hairpin_params, dev); 5283 5291 5284 5292 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; 5285 5293 err = register_netdevice_notifier_dev_net(priv->netdev,