Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'net-ethtool-support-including-flow-label-in-the-flow-hash-for-rss'

Jakub Kicinski says:

====================
net: ethtool: support including Flow Label in the flow hash for RSS

Add support for using IPv6 Flow Label in Rx hash computation
and therefore RSS queue selection.

v3: https://lore.kernel.org/20250724015101.186608-1-kuba@kernel.org
v2: https://lore.kernel.org/20250722014915.3365370-1-kuba@kernel.org
RFC: https://lore.kernel.org/20250609173442.1745856-1-kuba@kernel.org
====================

Link: https://patch.msgid.link/20250811234212.580748-1-kuba@kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+233 -18
+3
Documentation/netlink/specs/ethtool.yaml
··· 205 205 - 206 206 name: gtp-teid 207 207 - 208 + name: ip6-fl 209 + doc: IPv6 Flow Label 210 + - 208 211 name: discard 209 212 value: 31 210 213
+2
drivers/net/ethernet/broadcom/bnxt/bnxt.c
··· 6957 6957 bp->rss_cap |= BNXT_RSS_CAP_ESP_V4_RSS_CAP; 6958 6958 if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP) 6959 6959 bp->rss_cap |= BNXT_RSS_CAP_ESP_V6_RSS_CAP; 6960 + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP) 6961 + bp->rss_cap |= BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP; 6960 6962 if (flags & VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP) 6961 6963 bp->fw_cap |= BNXT_FW_CAP_VNIC_RE_FLUSH; 6962 6964 }
+1
drivers/net/ethernet/broadcom/bnxt/bnxt.h
··· 2407 2407 #define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6) 2408 2408 #define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7) 2409 2409 #define BNXT_RSS_CAP_MULTI_RSS_CTX BIT(8) 2410 + #define BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP BIT(9) 2410 2411 2411 2412 u8 rss_hash_key[HW_HASH_KEY_SIZE]; 2412 2413 u8 rss_hash_key_valid:1;
+16 -4
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
··· 1584 1584 { 1585 1585 if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6) 1586 1586 return RXH_IP_SRC | RXH_IP_DST; 1587 + if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL) 1588 + return RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL; 1587 1589 return 0; 1588 1590 } 1589 1591 ··· 1664 1662 1665 1663 if (cmd->data == RXH_4TUPLE) 1666 1664 tuple = 4; 1667 - else if (cmd->data == RXH_2TUPLE) 1665 + else if (cmd->data == RXH_2TUPLE || 1666 + cmd->data == (RXH_2TUPLE | RXH_IP6_FL)) 1668 1667 tuple = 2; 1669 1668 else if (!cmd->data) 1670 1669 tuple = 0; 1671 1670 else 1671 + return -EINVAL; 1672 + 1673 + if (cmd->data & RXH_IP6_FL && 1674 + !(bp->rss_cap & BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP)) 1672 1675 return -EINVAL; 1673 1676 1674 1677 if (cmd->flow_type == TCP_V4_FLOW) { ··· 1739 1732 case AH_V6_FLOW: 1740 1733 case ESP_V6_FLOW: 1741 1734 case IPV6_FLOW: 1742 - if (tuple == 2) 1735 + rss_hash_cfg &= ~(VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | 1736 + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL); 1737 + if (!tuple) 1738 + break; 1739 + if (cmd->data & RXH_IP6_FL) 1740 + rss_hash_cfg |= 1741 + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL; 1742 + else if (tuple == 2) 1743 1743 rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6; 1744 - else if (!tuple) 1745 - rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6; 1746 1744 break; 1747 1745 } 1748 1746
+1 -1
drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
··· 1310 1310 #define FBNIC_L2_HASH_OPTIONS \ 1311 1311 (RXH_L2DA | RXH_DISCARD) 1312 1312 #define FBNIC_L3_HASH_OPTIONS \ 1313 - (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST) 1313 + (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL) 1314 1314 #define FBNIC_L4_HASH_OPTIONS \ 1315 1315 (FBNIC_L3_HASH_OPTIONS | RXH_L4_B_0_1 | RXH_L4_B_2_3) 1316 1316
+2
drivers/net/ethernet/meta/fbnic/fbnic_rpc.c
··· 71 71 rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP_DST, IP_DST, flow_hash); 72 72 rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_0_1, L4_SRC, flow_hash); 73 73 rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_2_3, L4_DST, flow_hash); 74 + rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, OV6_FL_LBL, flow_hash); 75 + rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, IV6_FL_LBL, flow_hash); 74 76 75 77 return rss_en_mask; 76 78 }
+1
include/uapi/linux/ethtool.h
··· 2380 2380 #define RXH_L4_B_0_1 (1 << 6) /* src port in case of TCP/UDP/SCTP */ 2381 2381 #define RXH_L4_B_2_3 (1 << 7) /* dst port in case of TCP/UDP/SCTP */ 2382 2382 #define RXH_GTP_TEID (1 << 8) /* teid in case of GTP */ 2383 + #define RXH_IP6_FL (1 << 9) /* IPv6 flow label */ 2383 2384 #define RXH_DISCARD (1 << 31) 2384 2385 2385 2386 #define RX_CLS_FLOW_DISC 0xffffffffffffffffULL
+25
net/ethtool/ioctl.c
··· 1014 1014 return false; 1015 1015 } 1016 1016 1017 + static bool flow_type_v6(u32 flow_type) 1018 + { 1019 + switch (flow_type) { 1020 + case TCP_V6_FLOW: 1021 + case UDP_V6_FLOW: 1022 + case SCTP_V6_FLOW: 1023 + case AH_ESP_V6_FLOW: 1024 + case AH_V6_FLOW: 1025 + case ESP_V6_FLOW: 1026 + case IPV6_FLOW: 1027 + case GTPU_V6_FLOW: 1028 + case GTPC_V6_FLOW: 1029 + case GTPC_TEID_V6_FLOW: 1030 + case GTPU_EH_V6_FLOW: 1031 + case GTPU_UL_V6_FLOW: 1032 + case GTPU_DL_V6_FLOW: 1033 + return true; 1034 + } 1035 + 1036 + return false; 1037 + } 1038 + 1017 1039 /* When adding a new type, update the assert and, if it's hashable, add it to 1018 1040 * the flow_type_hashable switch case. 1019 1041 */ ··· 1098 1076 rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr); 1099 1077 if (rc) 1100 1078 return rc; 1079 + 1080 + if (info.data & RXH_IP6_FL && !flow_type_v6(info.flow_type)) 1081 + return -EINVAL; 1101 1082 1102 1083 if (info.flow_type & FLOW_RSS && info.rss_context && 1103 1084 !ops->rxfh_per_ctx_fields)
+14 -13
net/ethtool/rss.c
··· 536 536 #define RFH_MASK (RXH_L2DA | RXH_VLAN | RXH_IP_SRC | RXH_IP_DST | \ 537 537 RXH_L3_PROTO | RXH_L4_B_0_1 | RXH_L4_B_2_3 | \ 538 538 RXH_GTP_TEID | RXH_DISCARD) 539 + #define RFH_MASKv6 (RFH_MASK | RXH_IP6_FL) 539 540 540 541 static const struct nla_policy ethnl_rss_flows_policy[] = { 541 542 [ETHTOOL_A_FLOW_ETHER] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 542 543 [ETHTOOL_A_FLOW_IP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 543 - [ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 544 + [ETHTOOL_A_FLOW_IP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 544 545 [ETHTOOL_A_FLOW_TCP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 545 546 [ETHTOOL_A_FLOW_UDP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 546 547 [ETHTOOL_A_FLOW_SCTP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 547 548 [ETHTOOL_A_FLOW_AH_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 548 - [ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 549 - [ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 550 - [ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 551 - [ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 549 + [ETHTOOL_A_FLOW_TCP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 550 + [ETHTOOL_A_FLOW_UDP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 551 + [ETHTOOL_A_FLOW_SCTP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 552 + [ETHTOOL_A_FLOW_AH_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 552 553 [ETHTOOL_A_FLOW_AH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 553 554 [ETHTOOL_A_FLOW_ESP4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 554 - [ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 555 - [ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 555 + [ETHTOOL_A_FLOW_AH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 556 + [ETHTOOL_A_FLOW_ESP6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 556 557 [ETHTOOL_A_FLOW_GTPU4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 557 - [ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 558 + [ETHTOOL_A_FLOW_GTPU6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 558 559 [ETHTOOL_A_FLOW_GTPC4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 559 - [ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 560 + [ETHTOOL_A_FLOW_GTPC6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 560 561 [ETHTOOL_A_FLOW_GTPC_TEID4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 561 - [ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 562 + [ETHTOOL_A_FLOW_GTPC_TEID6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 562 563 [ETHTOOL_A_FLOW_GTPU_EH4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 563 - [ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 564 + [ETHTOOL_A_FLOW_GTPU_EH6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 564 565 [ETHTOOL_A_FLOW_GTPU_UL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 565 - [ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 566 + [ETHTOOL_A_FLOW_GTPU_UL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 566 567 [ETHTOOL_A_FLOW_GTPU_DL4] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 567 - [ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASK), 568 + [ETHTOOL_A_FLOW_GTPU_DL6] = NLA_POLICY_MASK(NLA_UINT, RFH_MASKv6), 568 569 }; 569 570 570 571 const struct nla_policy ethnl_rss_set_policy[ETHTOOL_A_RSS_FLOW_HASH + 1] = {
+1
tools/testing/selftests/drivers/net/hw/Makefile
··· 18 18 pp_alloc_fail.py \ 19 19 rss_api.py \ 20 20 rss_ctx.py \ 21 + rss_flow_label.py \ 21 22 rss_input_xfrm.py \ 22 23 tso.py \ 23 24 xsk_reconfig.py \
+167
tools/testing/selftests/drivers/net/hw/rss_flow_label.py
··· 1 + #!/usr/bin/env python3 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + """ 5 + Tests for RSS hashing on IPv6 Flow Label. 6 + """ 7 + 8 + import glob 9 + import os 10 + import socket 11 + from lib.py import CmdExitFailure 12 + from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ 13 + ksft_not_in, ksft_raises, KsftSkipEx 14 + from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port 15 + from lib.py import NetDrvEpEnv 16 + 17 + 18 + def _check_system(cfg): 19 + if not hasattr(socket, "SO_INCOMING_CPU"): 20 + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") 21 + 22 + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) 23 + if qcnt < 2: 24 + raise KsftSkipEx(f"Local has only {qcnt} queues") 25 + 26 + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", 27 + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: 28 + try: 29 + with open(f, 'r') as fp: 30 + setting = fp.read().strip() 31 + # CPU mask will be zeros and commas 32 + if setting.replace("0", "").replace(",", ""): 33 + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") 34 + except FileNotFoundError: 35 + pass 36 + 37 + # 1 is the default, if someone changed it we probably shouldn"t mess with it 38 + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout 39 + if af.strip() != "1": 40 + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") 41 + 42 + 43 + def _ethtool_get_cfg(cfg, fl_type): 44 + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout 45 + 46 + converter = { 47 + "IP SA": "s", 48 + "IP DA": "d", 49 + "L3 proto": "t", 50 + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", 51 + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", 52 + "IPv6 Flow Label": "l", 53 + } 54 + 55 + ret = "" 56 + for line in descr.split("\n")[1:-2]: 57 + # if this raises we probably need to add more keys to converter above 58 + ret += converter[line] 59 + return ret 60 + 61 + 62 + def _traffic(cfg, one_sock, one_cpu): 63 + local_port = rand_port(socket.SOCK_DGRAM) 64 + remote_port = rand_port(socket.SOCK_DGRAM) 65 + 66 + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) 67 + sock.bind(("", local_port)) 68 + sock.connect((cfg.remote_addr_v["6"], 0)) 69 + if one_sock: 70 + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ 71 + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" 72 + else: 73 + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ 74 + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" 75 + 76 + cpus = set() 77 + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): 78 + for _ in range(20): 79 + fd_read_timeout(sock.fileno(), 1) 80 + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) 81 + cpus.add(cpu) 82 + 83 + if one_cpu: 84 + ksft_eq(len(cpus), 1, 85 + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") 86 + else: 87 + ksft_ge(len(cpus), 2, 88 + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") 89 + 90 + 91 + def test_rss_flow_label(cfg): 92 + """ 93 + Test hashing on IPv6 flow label. Send traffic over a single socket 94 + and over multiple sockets. Depend on the remote having auto-label 95 + enabled so that it randomizes the label per socket. 96 + """ 97 + 98 + cfg.require_ipver("6") 99 + cfg.require_cmd("socat", remote=True) 100 + _check_system(cfg) 101 + 102 + # Enable flow label hashing for UDP6 103 + initial = _ethtool_get_cfg(cfg, "udp6") 104 + no_lbl = initial.replace("l", "") 105 + if "l" not in initial: 106 + try: 107 + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") 108 + except CmdExitFailure as exc: 109 + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc 110 + 111 + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") 112 + 113 + _traffic(cfg, one_sock=True, one_cpu=True) 114 + _traffic(cfg, one_sock=False, one_cpu=False) 115 + 116 + # Disable it, we should see no hashing (reset was already defer()ed) 117 + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") 118 + 119 + _traffic(cfg, one_sock=False, one_cpu=True) 120 + 121 + 122 + def _check_v4_flow_types(cfg): 123 + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: 124 + try: 125 + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout 126 + ksft_not_in("Flow Label", cur, 127 + comment=f"{fl_type=} has Flow Label:" + cur) 128 + except CmdExitFailure: 129 + # Probably does not support this flow type 130 + pass 131 + 132 + 133 + def test_rss_flow_label_6only(cfg): 134 + """ 135 + Test interactions with IPv4 flow types. It should not be possible to set 136 + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also 137 + not appear in the IPv4 "current config". 138 + """ 139 + 140 + with ksft_raises(CmdExitFailure) as cm: 141 + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") 142 + ksft_in("Invalid argument", cm.exception.cmd.stderr) 143 + 144 + _check_v4_flow_types(cfg) 145 + 146 + # Try to enable Flow Labels and check again, in case it leaks thru 147 + initial = _ethtool_get_cfg(cfg, "udp6") 148 + changed = initial.replace("l", "") if "l" in initial else initial + "l" 149 + 150 + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") 151 + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") 152 + 153 + _check_v4_flow_types(cfg) 154 + restore.exec() 155 + _check_v4_flow_types(cfg) 156 + 157 + 158 + def main() -> None: 159 + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: 160 + ksft_run([test_rss_flow_label, 161 + test_rss_flow_label_6only], 162 + args=(cfg, )) 163 + ksft_exit() 164 + 165 + 166 + if __name__ == "__main__": 167 + main()