Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/net: test sk_filter support for SKF_NET_OFF on frags

Verify that a classic BPF linux socket filter correctly matches
packet contents. Including when accessing contents in an
skb_frag.

1. Open a SOCK_RAW socket with a classic BPF filter on UDP dport 8000.
2. Open a tap device with IFF_NAPI_FRAGS to inject skbs with frags.
3. Send a packet for which the UDP header is in frag[0].
4. Receive this packet to demonstrate that the socket accepted it.

Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20250408132833.195491-3-willemdebruijn.kernel@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Willem de Bruijn and committed by
Alexei Starovoitov
fcd7132c d4bac028

+277
+1
tools/testing/selftests/net/.gitignore
··· 39 39 sk_bind_sendto_listen 40 40 sk_connect_zero_addr 41 41 sk_so_peek_off 42 + skf_net_off 42 43 socket 43 44 so_incoming_cpu 44 45 so_netns_cookie
+2
tools/testing/selftests/net/Makefile
··· 106 106 TEST_PROGS += busy_poll_test.sh 107 107 TEST_GEN_PROGS += proc_net_pktgen 108 108 TEST_PROGS += lwt_dst_cache_ref_loop.sh 109 + TEST_PROGS += skf_net_off.sh 110 + TEST_GEN_FILES += skf_net_off 109 111 110 112 # YNL files, must be before "include ..lib.mk" 111 113 YNL_GEN_FILES := busy_poller netlink-dumps
+244
tools/testing/selftests/net/skf_net_off.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* Open a tun device. 4 + * 5 + * [modifications: use IFF_NAPI_FRAGS, add sk filter] 6 + * 7 + * Expects the device to have been configured previously, e.g.: 8 + * sudo ip tuntap add name tap1 mode tap 9 + * sudo ip link set tap1 up 10 + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 11 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad 12 + * 13 + * And to avoid premature pskb_may_pull: 14 + * 15 + * sudo ethtool -K tap1 gro off 16 + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' 17 + */ 18 + 19 + #define _GNU_SOURCE 20 + 21 + #include <arpa/inet.h> 22 + #include <errno.h> 23 + #include <error.h> 24 + #include <fcntl.h> 25 + #include <getopt.h> 26 + #include <linux/filter.h> 27 + #include <linux/if.h> 28 + #include <linux/if_packet.h> 29 + #include <linux/if_tun.h> 30 + #include <linux/ipv6.h> 31 + #include <netinet/if_ether.h> 32 + #include <netinet/in.h> 33 + #include <netinet/ip.h> 34 + #include <netinet/ip6.h> 35 + #include <netinet/udp.h> 36 + #include <poll.h> 37 + #include <signal.h> 38 + #include <stdbool.h> 39 + #include <stddef.h> 40 + #include <stdio.h> 41 + #include <stdlib.h> 42 + #include <string.h> 43 + #include <sys/ioctl.h> 44 + #include <sys/socket.h> 45 + #include <sys/poll.h> 46 + #include <sys/types.h> 47 + #include <sys/uio.h> 48 + #include <unistd.h> 49 + 50 + static bool cfg_do_filter; 51 + static bool cfg_do_frags; 52 + static int cfg_dst_port = 8000; 53 + static char *cfg_ifname; 54 + 55 + static int tun_open(const char *tun_name) 56 + { 57 + struct ifreq ifr = {0}; 58 + int fd, ret; 59 + 60 + fd = open("/dev/net/tun", O_RDWR); 61 + if (fd == -1) 62 + error(1, errno, "open /dev/net/tun"); 63 + 64 + ifr.ifr_flags = IFF_TAP; 65 + if (cfg_do_frags) 66 + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; 67 + 68 + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); 69 + 70 + ret = ioctl(fd, TUNSETIFF, &ifr); 71 + if (ret) 72 + error(1, ret, "ioctl TUNSETIFF"); 73 + 74 + return fd; 75 + } 76 + 77 + static void sk_set_filter(int fd) 78 + { 79 + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); 80 + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); 81 + 82 + /* Filter UDP packets with destination port cfg_dst_port */ 83 + struct sock_filter filter_code[] = { 84 + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), 85 + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), 86 + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), 87 + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), 88 + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), 89 + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), 90 + BPF_STMT(BPF_RET + BPF_K, 0), 91 + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), 92 + }; 93 + 94 + struct sock_fprog filter = { 95 + sizeof(filter_code) / sizeof(filter_code[0]), 96 + filter_code, 97 + }; 98 + 99 + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) 100 + error(1, errno, "setsockopt attach filter"); 101 + } 102 + 103 + static int raw_open(void) 104 + { 105 + int fd; 106 + 107 + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); 108 + if (fd == -1) 109 + error(1, errno, "socket raw (udp)"); 110 + 111 + if (cfg_do_filter) 112 + sk_set_filter(fd); 113 + 114 + return fd; 115 + } 116 + 117 + static void tun_write(int fd) 118 + { 119 + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; 120 + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; 121 + struct tun_pi pi = {0}; 122 + struct ipv6hdr ip6h = {0}; 123 + struct udphdr uh = {0}; 124 + struct ethhdr eth = {0}; 125 + uint32_t payload; 126 + struct iovec iov[5]; 127 + int ret; 128 + 129 + pi.proto = htons(ETH_P_IPV6); 130 + 131 + memcpy(eth.h_source, eth_src, sizeof(eth_src)); 132 + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); 133 + eth.h_proto = htons(ETH_P_IPV6); 134 + 135 + ip6h.version = 6; 136 + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); 137 + ip6h.nexthdr = IPPROTO_UDP; 138 + ip6h.hop_limit = 8; 139 + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) 140 + error(1, errno, "inet_pton src"); 141 + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) 142 + error(1, errno, "inet_pton src"); 143 + 144 + uh.source = htons(8000); 145 + uh.dest = htons(cfg_dst_port); 146 + uh.len = ip6h.payload_len; 147 + uh.check = 0; 148 + 149 + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ 150 + 151 + iov[0].iov_base = &pi; 152 + iov[0].iov_len = sizeof(pi); 153 + iov[1].iov_base = &eth; 154 + iov[1].iov_len = sizeof(eth); 155 + iov[2].iov_base = &ip6h; 156 + iov[2].iov_len = sizeof(ip6h); 157 + iov[3].iov_base = &uh; 158 + iov[3].iov_len = sizeof(uh); 159 + iov[4].iov_base = &payload; 160 + iov[4].iov_len = sizeof(payload); 161 + 162 + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); 163 + if (ret <= 0) 164 + error(1, errno, "writev"); 165 + } 166 + 167 + static void raw_read(int fd) 168 + { 169 + struct timeval tv = { .tv_usec = 100 * 1000 }; 170 + struct msghdr msg = {0}; 171 + struct iovec iov[2]; 172 + struct udphdr uh; 173 + uint32_t payload[2]; 174 + int ret; 175 + 176 + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) 177 + error(1, errno, "setsockopt rcvtimeo udp"); 178 + 179 + iov[0].iov_base = &uh; 180 + iov[0].iov_len = sizeof(uh); 181 + 182 + iov[1].iov_base = payload; 183 + iov[1].iov_len = sizeof(payload); 184 + 185 + msg.msg_iov = iov; 186 + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); 187 + 188 + ret = recvmsg(fd, &msg, 0); 189 + if (ret <= 0) 190 + error(1, errno, "read raw"); 191 + if (ret != sizeof(uh) + sizeof(payload[0])) 192 + error(1, errno, "read raw: len=%d\n", ret); 193 + 194 + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); 195 + } 196 + 197 + static void parse_opts(int argc, char **argv) 198 + { 199 + int c; 200 + 201 + while ((c = getopt(argc, argv, "fFi:")) != -1) { 202 + switch (c) { 203 + case 'f': 204 + cfg_do_filter = true; 205 + printf("bpf filter enabled\n"); 206 + break; 207 + case 'F': 208 + cfg_do_frags = true; 209 + printf("napi frags mode enabled\n"); 210 + break; 211 + case 'i': 212 + cfg_ifname = optarg; 213 + break; 214 + default: 215 + error(1, 0, "unknown option %c", optopt); 216 + break; 217 + } 218 + } 219 + 220 + if (!cfg_ifname) 221 + error(1, 0, "must specify tap interface name (-i)"); 222 + } 223 + 224 + int main(int argc, char **argv) 225 + { 226 + int fdt, fdr; 227 + 228 + parse_opts(argc, argv); 229 + 230 + fdr = raw_open(); 231 + fdt = tun_open(cfg_ifname); 232 + 233 + tun_write(fdt); 234 + raw_read(fdr); 235 + 236 + if (close(fdt)) 237 + error(1, errno, "close tun"); 238 + if (close(fdr)) 239 + error(1, errno, "close udp"); 240 + 241 + fprintf(stderr, "OK\n"); 242 + return 0; 243 + } 244 +
+30
tools/testing/selftests/net/skf_net_off.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + readonly NS="ns-$(mktemp -u XXXXXX)" 5 + 6 + cleanup() { 7 + ip netns del $NS 8 + } 9 + 10 + ip netns add $NS 11 + trap cleanup EXIT 12 + 13 + ip -netns $NS link set lo up 14 + ip -netns $NS tuntap add name tap1 mode tap 15 + ip -netns $NS link set tap1 up 16 + ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 17 + ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad 18 + ip netns exec $NS ethtool -K tap1 gro off 19 + 20 + # disable early demux, else udp_v6_early_demux pulls udp header into linear 21 + ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 22 + 23 + echo "no filter" 24 + ip netns exec $NS ./skf_net_off -i tap1 25 + 26 + echo "filter, linear skb (-f)" 27 + ip netns exec $NS ./skf_net_off -i tap1 -f 28 + 29 + echo "filter, fragmented skb (-f) (-F)" 30 + ip netns exec $NS ./skf_net_off -i tap1 -f -F