Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/net: add csum offload test

Test NIC hardware checksum offload:

- Rx + Tx
- IPv4 + IPv6
- TCP + UDP

Optional features:

- zero checksum 0xFFFF
- checksum disable 0x0000
- transport encap headers
- randomization

See file header for detailed comments.

Expected results differ depending on NIC features:

- CHECKSUM_UNNECESSARY vs CHECKSUM_COMPLETE
- NETIF_F_HW_CSUM (csum_start/csum_off) vs NETIF_F_IP(V6)_CSUM

Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20221128140210.553391-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Willem de Bruijn and committed by
Jakub Kicinski
91a7de85 5cb0c51f

+988
+1
tools/testing/selftests/net/.gitignore
··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 2 bind_bhash 3 + csum 3 4 cmsg_sender 4 5 fin_ack_lat 5 6 gro
+1
tools/testing/selftests/net/Makefile
··· 74 74 TEST_GEN_PROGS += so_incoming_cpu 75 75 TEST_PROGS += sctp_vrf.sh 76 76 TEST_GEN_FILES += sctp_hello 77 + TEST_GEN_FILES += csum 77 78 78 79 TEST_FILES := settings 79 80
+986
tools/testing/selftests/net/csum.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP. 4 + * 5 + * The test runs on two machines to exercise the NIC. For this reason it 6 + * is not integrated in kselftests. 7 + * 8 + * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS)) 9 + * 10 + * Rx: 11 + * 12 + * The sender sends packets with a known checksum field using PF_INET(6) 13 + * SOCK_RAW sockets. 14 + * 15 + * good packet: $CMD [-t] 16 + * bad packet: $CMD [-t] -E 17 + * 18 + * The receiver reads UDP packets with a UDP socket. This is not an 19 + * option for TCP packets ('-t'). Optionally insert an iptables filter 20 + * to avoid these entering the real protocol stack. 21 + * 22 + * The receiver also reads all packets with a PF_PACKET socket, to 23 + * observe whether both good and bad packets arrive on the host. And to 24 + * read the optional TP_STATUS_CSUM_VALID bit. This requires setting 25 + * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY. 26 + * 27 + * Tx: 28 + * 29 + * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx 30 + * checksum offload. 31 + * 32 + * The sender can sends packets with a UDP socket. 33 + * 34 + * Optionally crafts a packet that sums up to zero to verify that the 35 + * device writes negative zero 0xFFFF in this case to distinguish from 36 + * 0x0000 (checksum disabled), as required by RFC 768. Hit this case 37 + * by choosing a specific source port. 38 + * 39 + * good packet: $CMD -U 40 + * zero csum: $CMD -U -Z 41 + * 42 + * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR, 43 + * to cover more protocols. PF_PACKET requires passing src and dst mac 44 + * addresses. 45 + * 46 + * good packet: $CMD -s $smac -d $dmac -p [-t] 47 + * 48 + * Argument '-z' sends UDP packets with a 0x000 checksum disabled field, 49 + * to verify that the NIC passes these packets unmodified. 50 + * 51 + * Argument '-e' adds a transport mode encapsulation header between 52 + * network and transport header. This will fail for devices that parse 53 + * headers. Should work on devices that implement protocol agnostic tx 54 + * checksum offload (NETIF_F_HW_CSUM). 55 + * 56 + * Argument '-r $SEED' optionally randomizes header, payload and length 57 + * to increase coverage between packets sent. SEED 1 further chooses a 58 + * different seed for each run (and logs this for reproducibility). It 59 + * is advised to enable this for extra coverage in continuous testing. 60 + */ 61 + 62 + #define _GNU_SOURCE 63 + 64 + #include <arpa/inet.h> 65 + #include <asm/byteorder.h> 66 + #include <errno.h> 67 + #include <error.h> 68 + #include <linux/filter.h> 69 + #include <linux/if_packet.h> 70 + #include <linux/ipv6.h> 71 + #include <linux/virtio_net.h> 72 + #include <net/ethernet.h> 73 + #include <net/if.h> 74 + #include <netinet/if_ether.h> 75 + #include <netinet/in.h> 76 + #include <netinet/ip.h> 77 + #include <netinet/ip6.h> 78 + #include <netinet/tcp.h> 79 + #include <netinet/udp.h> 80 + #include <poll.h> 81 + #include <sched.h> 82 + #include <stdbool.h> 83 + #include <stddef.h> 84 + #include <stdint.h> 85 + #include <stdio.h> 86 + #include <stdlib.h> 87 + #include <string.h> 88 + #include <sys/socket.h> 89 + #include <sys/stat.h> 90 + #include <sys/time.h> 91 + #include <sys/types.h> 92 + #include <unistd.h> 93 + 94 + static bool cfg_bad_csum; 95 + static int cfg_family = PF_INET6; 96 + static int cfg_num_pkt = 4; 97 + static bool cfg_do_rx = true; 98 + static bool cfg_do_tx = true; 99 + static bool cfg_encap; 100 + static char *cfg_ifname = "eth0"; 101 + static char *cfg_mac_dst; 102 + static char *cfg_mac_src; 103 + static int cfg_proto = IPPROTO_UDP; 104 + static int cfg_payload_char = 'a'; 105 + static int cfg_payload_len = 100; 106 + static uint16_t cfg_port_dst = 34000; 107 + static uint16_t cfg_port_src = 33000; 108 + static uint16_t cfg_port_src_encap = 33001; 109 + static unsigned int cfg_random_seed; 110 + static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */ 111 + static bool cfg_send_pfpacket; 112 + static bool cfg_send_udp; 113 + static int cfg_timeout_ms = 2000; 114 + static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */ 115 + static bool cfg_zero_sum; /* create packet that adds up to zero */ 116 + 117 + static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET}; 118 + static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET}; 119 + static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6}; 120 + static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6}; 121 + 122 + #define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr)) 123 + #define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr)) 124 + #define MAX_PAYLOAD_LEN 1024 125 + 126 + /* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */ 127 + struct udp_encap_hdr { 128 + uint8_t nexthdr; 129 + uint8_t padding[3]; 130 + }; 131 + 132 + /* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */ 133 + static void *iph_addr_p; 134 + 135 + static unsigned long gettimeofday_ms(void) 136 + { 137 + struct timeval tv; 138 + 139 + gettimeofday(&tv, NULL); 140 + return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL); 141 + } 142 + 143 + static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum) 144 + { 145 + uint16_t *words = (uint16_t *)data; 146 + int i; 147 + 148 + for (i = 0; i < len / 2; i++) 149 + sum += words[i]; 150 + 151 + if (len & 1) 152 + sum += ((unsigned char *)data)[len - 1]; 153 + 154 + return sum; 155 + } 156 + 157 + static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) 158 + { 159 + sum = checksum_nofold(data, len, sum); 160 + 161 + while (sum > 0xFFFF) 162 + sum = (sum & 0xFFFF) + (sum >> 16); 163 + 164 + return ~sum; 165 + } 166 + 167 + static uint16_t checksum(void *th, uint16_t proto, size_t len) 168 + { 169 + uint32_t sum; 170 + int alen; 171 + 172 + alen = cfg_family == PF_INET6 ? 32 : 8; 173 + 174 + sum = checksum_nofold(iph_addr_p, alen, 0); 175 + sum += htons(proto); 176 + sum += htons(len); 177 + 178 + /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */ 179 + if (cfg_do_tx && cfg_send_pfpacket) 180 + return ~checksum_fold(NULL, 0, sum); 181 + else 182 + return checksum_fold(th, len, sum); 183 + } 184 + 185 + static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len) 186 + { 187 + struct iphdr *iph = _iph; 188 + 189 + memset(iph, 0, sizeof(*iph)); 190 + 191 + iph->version = 4; 192 + iph->ihl = 5; 193 + iph->ttl = 8; 194 + iph->protocol = proto; 195 + iph->saddr = cfg_saddr4.sin_addr.s_addr; 196 + iph->daddr = cfg_daddr4.sin_addr.s_addr; 197 + iph->tot_len = htons(sizeof(*iph) + len); 198 + iph->check = checksum_fold(iph, sizeof(*iph), 0); 199 + 200 + iph_addr_p = &iph->saddr; 201 + 202 + return iph + 1; 203 + } 204 + 205 + static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len) 206 + { 207 + struct ipv6hdr *ip6h = _ip6h; 208 + 209 + memset(ip6h, 0, sizeof(*ip6h)); 210 + 211 + ip6h->version = 6; 212 + ip6h->payload_len = htons(len); 213 + ip6h->nexthdr = proto; 214 + ip6h->hop_limit = 64; 215 + ip6h->saddr = cfg_saddr6.sin6_addr; 216 + ip6h->daddr = cfg_daddr6.sin6_addr; 217 + 218 + iph_addr_p = &ip6h->saddr; 219 + 220 + return ip6h + 1; 221 + } 222 + 223 + static void *build_packet_udp(void *_uh) 224 + { 225 + struct udphdr *uh = _uh; 226 + 227 + uh->source = htons(cfg_port_src); 228 + uh->dest = htons(cfg_port_dst); 229 + uh->len = htons(sizeof(*uh) + cfg_payload_len); 230 + uh->check = 0; 231 + 232 + /* choose source port so that uh->check adds up to zero */ 233 + if (cfg_zero_sum) { 234 + uh->source = 0; 235 + uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); 236 + 237 + fprintf(stderr, "tx: changing sport: %hu -> %hu\n", 238 + cfg_port_src, ntohs(uh->source)); 239 + cfg_port_src = ntohs(uh->source); 240 + } 241 + 242 + if (cfg_zero_disable) 243 + uh->check = 0; 244 + else 245 + uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len); 246 + 247 + if (cfg_bad_csum) 248 + uh->check = ~uh->check; 249 + 250 + fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check); 251 + return uh + 1; 252 + } 253 + 254 + static void *build_packet_tcp(void *_th) 255 + { 256 + struct tcphdr *th = _th; 257 + 258 + th->source = htons(cfg_port_src); 259 + th->dest = htons(cfg_port_dst); 260 + th->doff = 5; 261 + th->check = 0; 262 + 263 + th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len); 264 + 265 + if (cfg_bad_csum) 266 + th->check = ~th->check; 267 + 268 + fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check); 269 + return th + 1; 270 + } 271 + 272 + static char *build_packet_udp_encap(void *_uh) 273 + { 274 + struct udphdr *uh = _uh; 275 + struct udp_encap_hdr *eh = _uh + sizeof(*uh); 276 + 277 + /* outer dst == inner dst, to simplify BPF filter 278 + * outer src != inner src, to demultiplex on recv 279 + */ 280 + uh->dest = htons(cfg_port_dst); 281 + uh->source = htons(cfg_port_src_encap); 282 + uh->check = 0; 283 + uh->len = htons(sizeof(*uh) + 284 + sizeof(*eh) + 285 + sizeof(struct tcphdr) + 286 + cfg_payload_len); 287 + 288 + eh->nexthdr = IPPROTO_TCP; 289 + 290 + return build_packet_tcp(eh + 1); 291 + } 292 + 293 + static char *build_packet(char *buf, int max_len, int *len) 294 + { 295 + uint8_t proto; 296 + char *off; 297 + int tlen; 298 + 299 + if (cfg_random_seed) { 300 + int *buf32 = (void *)buf; 301 + int i; 302 + 303 + for (i = 0; i < (max_len / sizeof(int)); i++) 304 + buf32[i] = rand(); 305 + } else { 306 + memset(buf, cfg_payload_char, max_len); 307 + } 308 + 309 + if (cfg_proto == IPPROTO_UDP) 310 + tlen = sizeof(struct udphdr) + cfg_payload_len; 311 + else 312 + tlen = sizeof(struct tcphdr) + cfg_payload_len; 313 + 314 + if (cfg_encap) { 315 + proto = IPPROTO_UDP; 316 + tlen += ENC_HEADER_LEN; 317 + } else { 318 + proto = cfg_proto; 319 + } 320 + 321 + if (cfg_family == PF_INET) 322 + off = build_packet_ipv4(buf, proto, tlen); 323 + else 324 + off = build_packet_ipv6(buf, proto, tlen); 325 + 326 + if (cfg_encap) 327 + off = build_packet_udp_encap(off); 328 + else if (cfg_proto == IPPROTO_UDP) 329 + off = build_packet_udp(off); 330 + else 331 + off = build_packet_tcp(off); 332 + 333 + /* only pass the payload, but still compute headers for cfg_zero_sum */ 334 + if (cfg_send_udp) { 335 + *len = cfg_payload_len; 336 + return off; 337 + } 338 + 339 + *len = off - buf + cfg_payload_len; 340 + return buf; 341 + } 342 + 343 + static int open_inet(int ipproto, int protocol) 344 + { 345 + int fd; 346 + 347 + fd = socket(cfg_family, ipproto, protocol); 348 + if (fd == -1) 349 + error(1, errno, "socket inet"); 350 + 351 + if (cfg_family == PF_INET6) { 352 + /* may have been updated by cfg_zero_sum */ 353 + cfg_saddr6.sin6_port = htons(cfg_port_src); 354 + 355 + if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6))) 356 + error(1, errno, "bind dgram 6"); 357 + if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) 358 + error(1, errno, "connect dgram 6"); 359 + } else { 360 + /* may have been updated by cfg_zero_sum */ 361 + cfg_saddr4.sin_port = htons(cfg_port_src); 362 + 363 + if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4))) 364 + error(1, errno, "bind dgram 4"); 365 + if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) 366 + error(1, errno, "connect dgram 4"); 367 + } 368 + 369 + return fd; 370 + } 371 + 372 + static int open_packet(void) 373 + { 374 + int fd, one = 1; 375 + 376 + fd = socket(PF_PACKET, SOCK_RAW, 0); 377 + if (fd == -1) 378 + error(1, errno, "socket packet"); 379 + 380 + if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one))) 381 + error(1, errno, "setsockopt packet_vnet_ndr"); 382 + 383 + return fd; 384 + } 385 + 386 + static void send_inet(int fd, const char *buf, int len) 387 + { 388 + int ret; 389 + 390 + ret = write(fd, buf, len); 391 + if (ret == -1) 392 + error(1, errno, "write"); 393 + if (ret != len) 394 + error(1, 0, "write: %d", ret); 395 + } 396 + 397 + static void eth_str_to_addr(const char *str, unsigned char *eth) 398 + { 399 + if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", 400 + &eth[0], &eth[1], &eth[2], &eth[3], &eth[4], &eth[5]) != 6) 401 + error(1, 0, "cannot parse mac addr %s", str); 402 + } 403 + 404 + static void send_packet(int fd, const char *buf, int len) 405 + { 406 + struct virtio_net_hdr vh = {0}; 407 + struct sockaddr_ll addr = {0}; 408 + struct msghdr msg = {0}; 409 + struct ethhdr eth; 410 + struct iovec iov[3]; 411 + int ret; 412 + 413 + addr.sll_family = AF_PACKET; 414 + addr.sll_halen = ETH_ALEN; 415 + addr.sll_ifindex = if_nametoindex(cfg_ifname); 416 + if (!addr.sll_ifindex) 417 + error(1, errno, "if_nametoindex %s", cfg_ifname); 418 + 419 + vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; 420 + if (cfg_family == PF_INET6) { 421 + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr); 422 + addr.sll_protocol = htons(ETH_P_IPV6); 423 + } else { 424 + vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr); 425 + addr.sll_protocol = htons(ETH_P_IP); 426 + } 427 + 428 + if (cfg_encap) 429 + vh.csum_start += ENC_HEADER_LEN; 430 + 431 + if (cfg_proto == IPPROTO_TCP) { 432 + vh.csum_offset = __builtin_offsetof(struct tcphdr, check); 433 + vh.hdr_len = vh.csum_start + sizeof(struct tcphdr); 434 + } else { 435 + vh.csum_offset = __builtin_offsetof(struct udphdr, check); 436 + vh.hdr_len = vh.csum_start + sizeof(struct udphdr); 437 + } 438 + 439 + eth_str_to_addr(cfg_mac_src, eth.h_source); 440 + eth_str_to_addr(cfg_mac_dst, eth.h_dest); 441 + eth.h_proto = addr.sll_protocol; 442 + 443 + iov[0].iov_base = &vh; 444 + iov[0].iov_len = sizeof(vh); 445 + 446 + iov[1].iov_base = &eth; 447 + iov[1].iov_len = sizeof(eth); 448 + 449 + iov[2].iov_base = (void *)buf; 450 + iov[2].iov_len = len; 451 + 452 + msg.msg_iov = iov; 453 + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); 454 + 455 + msg.msg_name = &addr; 456 + msg.msg_namelen = sizeof(addr); 457 + 458 + ret = sendmsg(fd, &msg, 0); 459 + if (ret == -1) 460 + error(1, errno, "sendmsg packet"); 461 + if (ret != sizeof(vh) + sizeof(eth) + len) 462 + error(1, errno, "sendmsg packet: %u", ret); 463 + } 464 + 465 + static int recv_prepare_udp(void) 466 + { 467 + int fd; 468 + 469 + fd = socket(cfg_family, SOCK_DGRAM, 0); 470 + if (fd == -1) 471 + error(1, errno, "socket r"); 472 + 473 + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 474 + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) 475 + error(1, errno, "setsockopt SO_RCVBUF r"); 476 + 477 + if (cfg_family == PF_INET6) { 478 + if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6))) 479 + error(1, errno, "bind r"); 480 + } else { 481 + if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4))) 482 + error(1, errno, "bind r"); 483 + } 484 + 485 + return fd; 486 + } 487 + 488 + /* Filter out all traffic that is not cfg_proto with our destination port. 489 + * 490 + * Otherwise background noise may cause PF_PACKET receive queue overflow, 491 + * dropping the expected packets and failing the test. 492 + */ 493 + static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport) 494 + { 495 + struct sock_filter filter[] = { 496 + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), 497 + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), 498 + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr), 499 + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2), 500 + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), 501 + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0), 502 + BPF_STMT(BPF_RET + BPF_K, 0), 503 + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), 504 + }; 505 + struct sock_fprog prog = {}; 506 + 507 + prog.filter = filter; 508 + prog.len = sizeof(filter) / sizeof(struct sock_filter); 509 + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) 510 + error(1, errno, "setsockopt filter"); 511 + } 512 + 513 + static void recv_prepare_packet_filter(int fd) 514 + { 515 + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ 516 + 517 + if (cfg_family == AF_INET) 518 + __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol), 519 + sizeof(struct iphdr) + off_dport); 520 + else 521 + __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr), 522 + sizeof(struct ipv6hdr) + off_dport); 523 + } 524 + 525 + static void recv_prepare_packet_bind(int fd) 526 + { 527 + struct sockaddr_ll laddr = {0}; 528 + 529 + laddr.sll_family = AF_PACKET; 530 + 531 + if (cfg_family == PF_INET) 532 + laddr.sll_protocol = htons(ETH_P_IP); 533 + else 534 + laddr.sll_protocol = htons(ETH_P_IPV6); 535 + 536 + laddr.sll_ifindex = if_nametoindex(cfg_ifname); 537 + if (!laddr.sll_ifindex) 538 + error(1, 0, "if_nametoindex %s", cfg_ifname); 539 + 540 + if (bind(fd, (void *)&laddr, sizeof(laddr))) 541 + error(1, errno, "bind pf_packet"); 542 + } 543 + 544 + static int recv_prepare_packet(void) 545 + { 546 + int fd, one = 1; 547 + 548 + fd = socket(PF_PACKET, SOCK_DGRAM, 0); 549 + if (fd == -1) 550 + error(1, errno, "socket p"); 551 + 552 + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 553 + &cfg_rcvbuf, sizeof(cfg_rcvbuf))) 554 + error(1, errno, "setsockopt SO_RCVBUF p"); 555 + 556 + /* enable auxdata to recv checksum status (valid vs unknown) */ 557 + if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one))) 558 + error(1, errno, "setsockopt auxdata"); 559 + 560 + /* install filter to restrict packet flow to match */ 561 + recv_prepare_packet_filter(fd); 562 + 563 + /* bind to address family to start packet flow */ 564 + recv_prepare_packet_bind(fd); 565 + 566 + return fd; 567 + } 568 + 569 + static int recv_udp(int fd) 570 + { 571 + static char buf[MAX_PAYLOAD_LEN]; 572 + int ret, count = 0; 573 + 574 + while (1) { 575 + ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT); 576 + if (ret == -1 && errno == EAGAIN) 577 + break; 578 + if (ret == -1) 579 + error(1, errno, "recv r"); 580 + 581 + fprintf(stderr, "rx: udp: len=%u\n", ret); 582 + count++; 583 + } 584 + 585 + return count; 586 + } 587 + 588 + static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field) 589 + { 590 + uint16_t csum; 591 + 592 + csum = checksum(th, cfg_proto, len); 593 + 594 + fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n", 595 + sport, len, csum_field, csum); 596 + 597 + /* csum must be zero unless cfg_bad_csum indicates bad csum */ 598 + if (csum && !cfg_bad_csum) { 599 + fprintf(stderr, "pkt: bad csum\n"); 600 + return 1; 601 + } else if (cfg_bad_csum && !csum) { 602 + fprintf(stderr, "pkt: good csum, while bad expected\n"); 603 + return 1; 604 + } 605 + 606 + if (cfg_zero_sum && csum_field != 0xFFFF) { 607 + fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field); 608 + return 1; 609 + } 610 + 611 + return 0; 612 + } 613 + 614 + static int recv_verify_packet_tcp(void *th, int len) 615 + { 616 + struct tcphdr *tcph = th; 617 + 618 + if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst)) 619 + return -1; 620 + 621 + return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check); 622 + } 623 + 624 + static int recv_verify_packet_udp_encap(void *th, int len) 625 + { 626 + struct udp_encap_hdr *eh = th; 627 + 628 + if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP) 629 + return -1; 630 + 631 + return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh)); 632 + } 633 + 634 + static int recv_verify_packet_udp(void *th, int len) 635 + { 636 + struct udphdr *udph = th; 637 + 638 + if (len < sizeof(*udph)) 639 + return -1; 640 + 641 + if (udph->dest != htons(cfg_port_dst)) 642 + return -1; 643 + 644 + if (udph->source == htons(cfg_port_src_encap)) 645 + return recv_verify_packet_udp_encap(udph + 1, 646 + len - sizeof(*udph)); 647 + 648 + return recv_verify_csum(th, len, ntohs(udph->source), udph->check); 649 + } 650 + 651 + static int recv_verify_packet_ipv4(void *nh, int len) 652 + { 653 + struct iphdr *iph = nh; 654 + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; 655 + 656 + if (len < sizeof(*iph) || iph->protocol != proto) 657 + return -1; 658 + 659 + iph_addr_p = &iph->saddr; 660 + if (proto == IPPROTO_TCP) 661 + return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); 662 + else 663 + return recv_verify_packet_udp(iph + 1, len - sizeof(*iph)); 664 + } 665 + 666 + static int recv_verify_packet_ipv6(void *nh, int len) 667 + { 668 + struct ipv6hdr *ip6h = nh; 669 + uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; 670 + 671 + if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) 672 + return -1; 673 + 674 + iph_addr_p = &ip6h->saddr; 675 + 676 + if (proto == IPPROTO_TCP) 677 + return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); 678 + else 679 + return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); 680 + } 681 + 682 + /* return whether auxdata includes TP_STATUS_CSUM_VALID */ 683 + static bool recv_verify_packet_csum(struct msghdr *msg) 684 + { 685 + struct tpacket_auxdata *aux = NULL; 686 + struct cmsghdr *cm; 687 + 688 + if (msg->msg_flags & MSG_CTRUNC) 689 + error(1, 0, "cmsg: truncated"); 690 + 691 + for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) { 692 + if (cm->cmsg_level != SOL_PACKET || 693 + cm->cmsg_type != PACKET_AUXDATA) 694 + error(1, 0, "cmsg: level=%d type=%d\n", 695 + cm->cmsg_level, cm->cmsg_type); 696 + 697 + if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata))) 698 + error(1, 0, "cmsg: len=%lu expected=%lu", 699 + cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata))); 700 + 701 + aux = (void *)CMSG_DATA(cm); 702 + } 703 + 704 + if (!aux) 705 + error(1, 0, "cmsg: no auxdata"); 706 + 707 + return aux->tp_status & TP_STATUS_CSUM_VALID; 708 + } 709 + 710 + static int recv_packet(int fd) 711 + { 712 + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; 713 + unsigned long total = 0, bad_csums = 0, bad_validations = 0; 714 + char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; 715 + struct pkt *buf = (void *)_buf; 716 + struct msghdr msg = {0}; 717 + struct iovec iov; 718 + int len, ret; 719 + 720 + iov.iov_base = _buf; 721 + iov.iov_len = sizeof(_buf); 722 + 723 + msg.msg_iov = &iov; 724 + msg.msg_iovlen = 1; 725 + 726 + msg.msg_control = ctrl; 727 + msg.msg_controllen = sizeof(ctrl); 728 + 729 + while (1) { 730 + msg.msg_flags = 0; 731 + 732 + len = recvmsg(fd, &msg, MSG_DONTWAIT); 733 + if (len == -1 && errno == EAGAIN) 734 + break; 735 + if (len == -1) 736 + error(1, errno, "recv p"); 737 + 738 + if (cfg_family == PF_INET6) 739 + ret = recv_verify_packet_ipv6(buf, len); 740 + else 741 + ret = recv_verify_packet_ipv4(buf, len); 742 + 743 + if (ret == -1 /* skip: non-matching */) 744 + continue; 745 + 746 + total++; 747 + if (ret == 1) 748 + bad_csums++; 749 + 750 + /* Fail if kernel returns valid for known bad csum. 751 + * Do not fail if kernel does not validate a good csum: 752 + * Absence of validation does not imply invalid. 753 + */ 754 + if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { 755 + fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); 756 + bad_validations++; 757 + } 758 + } 759 + 760 + if (bad_csums || bad_validations) 761 + error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n", 762 + total, bad_csums, bad_validations); 763 + 764 + return total; 765 + } 766 + 767 + static void parse_args(int argc, char *const argv[]) 768 + { 769 + const char *daddr = NULL, *saddr = NULL; 770 + int c; 771 + 772 + while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) { 773 + switch (c) { 774 + case '4': 775 + cfg_family = PF_INET; 776 + break; 777 + case '6': 778 + cfg_family = PF_INET6; 779 + break; 780 + case 'd': 781 + cfg_mac_dst = optarg; 782 + break; 783 + case 'D': 784 + daddr = optarg; 785 + break; 786 + case 'e': 787 + cfg_encap = true; 788 + break; 789 + case 'E': 790 + cfg_bad_csum = true; 791 + break; 792 + case 'i': 793 + cfg_ifname = optarg; 794 + break; 795 + case 'l': 796 + cfg_payload_len = strtol(optarg, NULL, 0); 797 + break; 798 + case 'L': 799 + cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000; 800 + break; 801 + case 'n': 802 + cfg_num_pkt = strtol(optarg, NULL, 0); 803 + break; 804 + case 'r': 805 + cfg_random_seed = strtol(optarg, NULL, 0); 806 + break; 807 + case 'P': 808 + cfg_send_pfpacket = true; 809 + break; 810 + case 'R': 811 + /* only Rx: used with two machine tests */ 812 + cfg_do_tx = false; 813 + break; 814 + case 's': 815 + cfg_mac_src = optarg; 816 + break; 817 + case 'S': 818 + saddr = optarg; 819 + break; 820 + case 't': 821 + cfg_proto = IPPROTO_TCP; 822 + break; 823 + case 'T': 824 + /* only Tx: used with two machine tests */ 825 + cfg_do_rx = false; 826 + break; 827 + case 'u': 828 + cfg_proto = IPPROTO_UDP; 829 + break; 830 + case 'U': 831 + /* send using real udp socket, 832 + * to exercise tx checksum offload 833 + */ 834 + cfg_send_udp = true; 835 + break; 836 + case 'z': 837 + cfg_zero_disable = true; 838 + break; 839 + case 'Z': 840 + cfg_zero_sum = true; 841 + break; 842 + default: 843 + error(1, 0, "unknown arg %c", c); 844 + } 845 + } 846 + 847 + if (!daddr || !saddr) 848 + error(1, 0, "Must pass -D <daddr> and -S <saddr>"); 849 + 850 + if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst)) 851 + error(1, 0, "Transmit with pf_packet requires mac addresses"); 852 + 853 + if (cfg_payload_len > MAX_PAYLOAD_LEN) 854 + error(1, 0, "Payload length exceeds max"); 855 + 856 + if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable)) 857 + error(1, 0, "Only UDP supports zero csum"); 858 + 859 + if (cfg_zero_sum && !cfg_send_udp) 860 + error(1, 0, "Zero checksum conversion requires -U for tx csum offload"); 861 + if (cfg_zero_sum && cfg_bad_csum) 862 + error(1, 0, "Cannot combine zero checksum conversion and invalid checksum"); 863 + if (cfg_zero_sum && cfg_random_seed) 864 + error(1, 0, "Cannot combine zero checksum conversion with randomization"); 865 + 866 + if (cfg_family == PF_INET6) { 867 + cfg_saddr6.sin6_port = htons(cfg_port_src); 868 + cfg_daddr6.sin6_port = htons(cfg_port_dst); 869 + 870 + if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1) 871 + error(1, errno, "Cannot parse ipv6 -D"); 872 + if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1) 873 + error(1, errno, "Cannot parse ipv6 -S"); 874 + } else { 875 + cfg_saddr4.sin_port = htons(cfg_port_src); 876 + cfg_daddr4.sin_port = htons(cfg_port_dst); 877 + 878 + if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1) 879 + error(1, errno, "Cannot parse ipv4 -D"); 880 + if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1) 881 + error(1, errno, "Cannot parse ipv4 -S"); 882 + } 883 + 884 + if (cfg_do_tx && cfg_random_seed) { 885 + /* special case: time-based seed */ 886 + if (cfg_random_seed == 1) 887 + cfg_random_seed = (unsigned int)gettimeofday_ms(); 888 + srand(cfg_random_seed); 889 + fprintf(stderr, "randomization seed: %u\n", cfg_random_seed); 890 + } 891 + } 892 + 893 + static void do_tx(void) 894 + { 895 + static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN]; 896 + char *buf; 897 + int fd, len, i; 898 + 899 + buf = build_packet(_buf, sizeof(_buf), &len); 900 + 901 + if (cfg_send_pfpacket) 902 + fd = open_packet(); 903 + else if (cfg_send_udp) 904 + fd = open_inet(SOCK_DGRAM, 0); 905 + else 906 + fd = open_inet(SOCK_RAW, IPPROTO_RAW); 907 + 908 + for (i = 0; i < cfg_num_pkt; i++) { 909 + if (cfg_send_pfpacket) 910 + send_packet(fd, buf, len); 911 + else 912 + send_inet(fd, buf, len); 913 + 914 + /* randomize each packet individually to increase coverage */ 915 + if (cfg_random_seed) { 916 + cfg_payload_len = rand() % MAX_PAYLOAD_LEN; 917 + buf = build_packet(_buf, sizeof(_buf), &len); 918 + } 919 + } 920 + 921 + if (close(fd)) 922 + error(1, errno, "close tx"); 923 + } 924 + 925 + static void do_rx(int fdp, int fdr) 926 + { 927 + unsigned long count_udp = 0, count_pkt = 0; 928 + long tleft, tstop; 929 + struct pollfd pfd; 930 + 931 + tstop = gettimeofday_ms() + cfg_timeout_ms; 932 + tleft = cfg_timeout_ms; 933 + 934 + do { 935 + pfd.events = POLLIN; 936 + pfd.fd = fdp; 937 + if (poll(&pfd, 1, tleft) == -1) 938 + error(1, errno, "poll"); 939 + 940 + if (pfd.revents & POLLIN) 941 + count_pkt += recv_packet(fdp); 942 + 943 + if (cfg_proto == IPPROTO_UDP) 944 + count_udp += recv_udp(fdr); 945 + 946 + tleft = tstop - gettimeofday_ms(); 947 + } while (tleft > 0); 948 + 949 + if (close(fdr)) 950 + error(1, errno, "close r"); 951 + if (close(fdp)) 952 + error(1, errno, "close p"); 953 + 954 + if (count_pkt < cfg_num_pkt) 955 + error(1, 0, "rx: missing packets at pf_packet: %lu < %u", 956 + count_pkt, cfg_num_pkt); 957 + 958 + if (cfg_proto == IPPROTO_UDP) { 959 + if (cfg_bad_csum && count_udp) 960 + error(1, 0, "rx: unexpected packets at udp"); 961 + if (!cfg_bad_csum && !count_udp) 962 + error(1, 0, "rx: missing packets at udp"); 963 + } 964 + } 965 + 966 + int main(int argc, char *const argv[]) 967 + { 968 + int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */ 969 + 970 + parse_args(argc, argv); 971 + 972 + /* open receive sockets before transmitting */ 973 + if (cfg_do_rx) { 974 + fdp = recv_prepare_packet(); 975 + fdr = recv_prepare_udp(); 976 + } 977 + 978 + if (cfg_do_tx) 979 + do_tx(); 980 + 981 + if (cfg_do_rx) 982 + do_rx(fdp, fdr); 983 + 984 + fprintf(stderr, "OK\n"); 985 + return 0; 986 + }