Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests net: add UDP GRO fraglist + bpf self-tests

When NET_F_F_GRO_FRAGLIST is enabled and bpf_skb_change_proto is used,
check if udp packets and tcp packets are successfully delivered to user
space. If wrong udp packets are delivered, udpgso_bench_rx will exit
with "Initial byte out of range"

Signed-off-by: Maciej enczykowski <maze@google.com>
Signed-off-by: Lina Wang <lina.wang@mediatek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Lina Wang and committed by
David S. Miller
edae34a3 cf3ab8d4

+403
+3
tools/testing/selftests/net/Makefile
··· 25 25 TEST_PROGS += amt.sh 26 26 TEST_PROGS += unicast_extensions.sh 27 27 TEST_PROGS += udpgro_fwd.sh 28 + TEST_PROGS += udpgro_frglist.sh 28 29 TEST_PROGS += veth.sh 29 30 TEST_PROGS += ioam6.sh 30 31 TEST_PROGS += gro.sh ··· 61 60 62 61 KSFT_KHDR_INSTALL := 1 63 62 include ../lib.mk 63 + 64 + include bpf/Makefile 64 65 65 66 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma 66 67 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
+14
tools/testing/selftests/net/bpf/Makefile
··· 1 + # SPDX-License-Identifier: GPL-2.0 2 + 3 + CLANG ?= clang 4 + CCINCLUDE += -I../../bpf 5 + CCINCLUDE += -I../../../../../usr/include/ 6 + 7 + TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o 8 + all: $(TEST_CUSTOM_PROGS) 9 + 10 + $(OUTPUT)/%.o: %.c 11 + $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@ 12 + 13 + clean: 14 + rm -f $(TEST_CUSTOM_PROGS)
+285
tools/testing/selftests/net/bpf/nat6to4.c
··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* 3 + * This code is taken from the Android Open Source Project and the author 4 + * (Maciej Żenczykowski) has gave permission to relicense it under the 5 + * GPLv2. Therefore this program is free software; 6 + * You can redistribute it and/or modify it under the terms of the GNU 7 + * General Public License version 2 as published by the Free Software 8 + * Foundation 9 + 10 + * The original headers, including the original license headers, are 11 + * included below for completeness. 12 + * 13 + * Copyright (C) 2019 The Android Open Source Project 14 + * 15 + * Licensed under the Apache License, Version 2.0 (the "License"); 16 + * you may not use this file except in compliance with the License. 17 + * You may obtain a copy of the License at 18 + * 19 + * http://www.apache.org/licenses/LICENSE-2.0 20 + * 21 + * Unless required by applicable law or agreed to in writing, software 22 + * distributed under the License is distributed on an "AS IS" BASIS, 23 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 + * See the License for the specific language governing permissions and 25 + * limitations under the License. 26 + */ 27 + #include <linux/bpf.h> 28 + #include <linux/if.h> 29 + #include <linux/if_ether.h> 30 + #include <linux/if_packet.h> 31 + #include <linux/in.h> 32 + #include <linux/in6.h> 33 + #include <linux/ip.h> 34 + #include <linux/ipv6.h> 35 + #include <linux/pkt_cls.h> 36 + #include <linux/swab.h> 37 + #include <stdbool.h> 38 + #include <stdint.h> 39 + 40 + 41 + #include <linux/udp.h> 42 + 43 + #include <bpf/bpf_helpers.h> 44 + #include <bpf/bpf_endian.h> 45 + 46 + #define IP_DF 0x4000 // Flag: "Don't Fragment" 47 + 48 + SEC("schedcls/ingress6/nat_6") 49 + int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb) 50 + { 51 + const int l2_header_size = sizeof(struct ethhdr); 52 + void *data = (void *)(long)skb->data; 53 + const void *data_end = (void *)(long)skb->data_end; 54 + const struct ethhdr * const eth = data; // used iff is_ethernet 55 + const struct ipv6hdr * const ip6 = (void *)(eth + 1); 56 + 57 + // Require ethernet dst mac address to be our unicast address. 58 + if (skb->pkt_type != PACKET_HOST) 59 + return TC_ACT_OK; 60 + 61 + // Must be meta-ethernet IPv6 frame 62 + if (skb->protocol != bpf_htons(ETH_P_IPV6)) 63 + return TC_ACT_OK; 64 + 65 + // Must have (ethernet and) ipv6 header 66 + if (data + l2_header_size + sizeof(*ip6) > data_end) 67 + return TC_ACT_OK; 68 + 69 + // Ethertype - if present - must be IPv6 70 + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) 71 + return TC_ACT_OK; 72 + 73 + // IP version must be 6 74 + if (ip6->version != 6) 75 + return TC_ACT_OK; 76 + // Maximum IPv6 payload length that can be translated to IPv4 77 + if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr)) 78 + return TC_ACT_OK; 79 + switch (ip6->nexthdr) { 80 + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 81 + case IPPROTO_UDP: // address means there is no need to update their checksums. 82 + case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers, 83 + case IPPROTO_ESP: // since there is never a checksum to update. 84 + break; 85 + default: // do not know how to handle anything else 86 + return TC_ACT_OK; 87 + } 88 + 89 + struct ethhdr eth2; // used iff is_ethernet 90 + 91 + eth2 = *eth; // Copy over the ethernet header (src/dst mac) 92 + eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype 93 + 94 + struct iphdr ip = { 95 + .version = 4, // u4 96 + .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4 97 + .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8 98 + .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16 99 + .id = 0, // u16 100 + .frag_off = bpf_htons(IP_DF), // u16 101 + .ttl = ip6->hop_limit, // u8 102 + .protocol = ip6->nexthdr, // u8 103 + .check = 0, // u16 104 + .saddr = 0x0201a8c0, // u32 105 + .daddr = 0x0101a8c0, // u32 106 + }; 107 + 108 + // Calculate the IPv4 one's complement checksum of the IPv4 header. 109 + __wsum sum4 = 0; 110 + 111 + for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i) 112 + sum4 += ((__u16 *)&ip)[i]; 113 + 114 + // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4 115 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE 116 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 117 + ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF 118 + 119 + // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header. 120 + __wsum sum6 = 0; 121 + // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits) 122 + for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i) 123 + sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation 124 + 125 + // Note that there is no L4 checksum update: we are relying on the checksum neutrality 126 + // of the ipv6 address chosen by netd's ClatdController. 127 + 128 + // Packet mutations begin - point of no return, but if this first modification fails 129 + // the packet is probably still pristine, so let clatd handle it. 130 + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0)) 131 + return TC_ACT_OK; 132 + bpf_csum_update(skb, sum6); 133 + 134 + data = (void *)(long)skb->data; 135 + data_end = (void *)(long)skb->data_end; 136 + if (data + l2_header_size + sizeof(struct iphdr) > data_end) 137 + return TC_ACT_SHOT; 138 + 139 + struct ethhdr *new_eth = data; 140 + 141 + // Copy over the updated ethernet header 142 + *new_eth = eth2; 143 + 144 + // Copy over the new ipv4 header. 145 + *(struct iphdr *)(new_eth + 1) = ip; 146 + return bpf_redirect(skb->ifindex, BPF_F_INGRESS); 147 + } 148 + 149 + SEC("schedcls/egress4/snat4") 150 + int sched_cls_egress4_snat4_prog(struct __sk_buff *skb) 151 + { 152 + const int l2_header_size = sizeof(struct ethhdr); 153 + void *data = (void *)(long)skb->data; 154 + const void *data_end = (void *)(long)skb->data_end; 155 + const struct ethhdr *const eth = data; // used iff is_ethernet 156 + const struct iphdr *const ip4 = (void *)(eth + 1); 157 + 158 + // Must be meta-ethernet IPv4 frame 159 + if (skb->protocol != bpf_htons(ETH_P_IP)) 160 + return TC_ACT_OK; 161 + 162 + // Must have ipv4 header 163 + if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end) 164 + return TC_ACT_OK; 165 + 166 + // Ethertype - if present - must be IPv4 167 + if (eth->h_proto != bpf_htons(ETH_P_IP)) 168 + return TC_ACT_OK; 169 + 170 + // IP version must be 4 171 + if (ip4->version != 4) 172 + return TC_ACT_OK; 173 + 174 + // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header 175 + if (ip4->ihl != 5) 176 + return TC_ACT_OK; 177 + 178 + // Maximum IPv6 payload length that can be translated to IPv4 179 + if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr)) 180 + return TC_ACT_OK; 181 + 182 + // Calculate the IPv4 one's complement checksum of the IPv4 header. 183 + __wsum sum4 = 0; 184 + 185 + for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i) 186 + sum4 += ((__u16 *)ip4)[i]; 187 + 188 + // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4 189 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE 190 + sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16 191 + // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF 192 + if (sum4 != 0xFFFF) 193 + return TC_ACT_OK; 194 + 195 + // Minimum IPv4 total length is the size of the header 196 + if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4)) 197 + return TC_ACT_OK; 198 + 199 + // We are incapable of dealing with IPv4 fragments 200 + if (ip4->frag_off & ~bpf_htons(IP_DF)) 201 + return TC_ACT_OK; 202 + 203 + switch (ip4->protocol) { 204 + case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6 205 + case IPPROTO_GRE: // address means there is no need to update their checksums. 206 + case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers, 207 + break; // since there is never a checksum to update. 208 + 209 + case IPPROTO_UDP: // See above comment, but must also have UDP header... 210 + if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end) 211 + return TC_ACT_OK; 212 + const struct udphdr *uh = (const struct udphdr *)(ip4 + 1); 213 + // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the 214 + // checksum. Otherwise the network or more likely the NAT64 gateway might 215 + // drop the packet because in most cases IPv6/UDP packets with a zero checksum 216 + // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff() 217 + if (!uh->check) 218 + return TC_ACT_OK; 219 + break; 220 + 221 + default: // do not know how to handle anything else 222 + return TC_ACT_OK; 223 + } 224 + struct ethhdr eth2; // used iff is_ethernet 225 + 226 + eth2 = *eth; // Copy over the ethernet header (src/dst mac) 227 + eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype 228 + 229 + struct ipv6hdr ip6 = { 230 + .version = 6, // __u8:4 231 + .priority = ip4->tos >> 4, // __u8:4 232 + .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3] 233 + .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16 234 + .nexthdr = ip4->protocol, // __u8 235 + .hop_limit = ip4->ttl, // __u8 236 + }; 237 + ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); 238 + ip6.saddr.in6_u.u6_addr32[1] = 0; 239 + ip6.saddr.in6_u.u6_addr32[2] = 0; 240 + ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1); 241 + ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8); 242 + ip6.daddr.in6_u.u6_addr32[1] = 0; 243 + ip6.daddr.in6_u.u6_addr32[2] = 0; 244 + ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2); 245 + 246 + // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header. 247 + __wsum sum6 = 0; 248 + // We'll end up with a non-zero sum due to ip6.version == 6 249 + for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i) 250 + sum6 += ((__u16 *)&ip6)[i]; 251 + 252 + // Packet mutations begin - point of no return, but if this first modification fails 253 + // the packet is probably still pristine, so let clatd handle it. 254 + if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0)) 255 + return TC_ACT_OK; 256 + 257 + // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet. 258 + // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload, 259 + // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum. 260 + // However, we've already verified the ipv4 checksum is correct and thus 0. 261 + // Thus we only need to add the ipv6 header's sum. 262 + // 263 + // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error 264 + // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details). 265 + bpf_csum_update(skb, sum6); 266 + 267 + // bpf_skb_change_proto() invalidates all pointers - reload them. 268 + data = (void *)(long)skb->data; 269 + data_end = (void *)(long)skb->data_end; 270 + 271 + // I cannot think of any valid way for this error condition to trigger, however I do 272 + // believe the explicit check is required to keep the in kernel ebpf verifier happy. 273 + if (data + l2_header_size + sizeof(ip6) > data_end) 274 + return TC_ACT_SHOT; 275 + 276 + struct ethhdr *new_eth = data; 277 + 278 + // Copy over the updated ethernet header 279 + *new_eth = eth2; 280 + // Copy over the new ipv4 header. 281 + *(struct ipv6hdr *)(new_eth + 1) = ip6; 282 + return TC_ACT_OK; 283 + } 284 + 285 + char _license[] SEC("license") = ("GPL");
+101
tools/testing/selftests/net/udpgro_frglist.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # Run a series of udpgro benchmarks 5 + 6 + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" 7 + 8 + cleanup() { 9 + local -r jobs="$(jobs -p)" 10 + local -r ns="$(ip netns list|grep $PEER_NS)" 11 + 12 + [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null 13 + [ -n "$ns" ] && ip netns del $ns 2>/dev/null 14 + } 15 + trap cleanup EXIT 16 + 17 + run_one() { 18 + # use 'rx' as separator between sender args and receiver args 19 + local -r all="$@" 20 + local -r tx_args=${all%rx*} 21 + local rx_args=${all#*rx} 22 + 23 + 24 + 25 + ip netns add "${PEER_NS}" 26 + ip -netns "${PEER_NS}" link set lo up 27 + ip link add type veth 28 + ip link set dev veth0 up 29 + ip addr add dev veth0 192.168.1.2/24 30 + ip addr add dev veth0 2001:db8::2/64 nodad 31 + 32 + ip link set dev veth1 netns "${PEER_NS}" 33 + ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 34 + ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad 35 + ip -netns "${PEER_NS}" link set dev veth1 up 36 + ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on 37 + 38 + 39 + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy 40 + tc -n "${PEER_NS}" qdisc add dev veth1 clsact 41 + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action 42 + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action 43 + echo ${rx_args} 44 + ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & 45 + 46 + # Hack: let bg programs complete the startup 47 + sleep 0.1 48 + ./udpgso_bench_tx ${tx_args} 49 + } 50 + 51 + run_in_netns() { 52 + local -r args=$@ 53 + echo ${args} 54 + ./in_netns.sh $0 __subprocess ${args} 55 + } 56 + 57 + run_udp() { 58 + local -r args=$@ 59 + 60 + echo "udp gso - over veth touching data" 61 + run_in_netns ${args} -u -S 0 rx -4 -v 62 + 63 + echo "udp gso and gro - over veth touching data" 64 + run_in_netns ${args} -S 0 rx -4 -G 65 + } 66 + 67 + run_tcp() { 68 + local -r args=$@ 69 + 70 + echo "tcp - over veth touching data" 71 + run_in_netns ${args} -t rx -4 -t 72 + } 73 + 74 + run_all() { 75 + local -r core_args="-l 4" 76 + local -r ipv4_args="${core_args} -4 -D 192.168.1.1" 77 + local -r ipv6_args="${core_args} -6 -D 2001:db8::1" 78 + 79 + echo "ipv6" 80 + run_tcp "${ipv6_args}" 81 + run_udp "${ipv6_args}" 82 + } 83 + 84 + if [ ! -f ../bpf/xdp_dummy.o ]; then 85 + echo "Missing xdp_dummy helper. Build bpf selftest first" 86 + exit -1 87 + fi 88 + 89 + if [ ! -f bpf/nat6to4.o ]; then 90 + echo "Missing nat6to4 helper. Build bpfnat6to4.o selftest first" 91 + exit -1 92 + fi 93 + 94 + if [[ $# -eq 0 ]]; then 95 + run_all 96 + elif [[ $1 == "__subprocess" ]]; then 97 + shift 98 + run_one $@ 99 + else 100 + run_in_netns $@ 101 + fi