Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.9-rc2 1295 lines 31 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* Copyright(c) 2017 - 2018 Intel Corporation. */ 3 4#include <asm/barrier.h> 5#include <errno.h> 6#include <getopt.h> 7#include <libgen.h> 8#include <linux/bpf.h> 9#include <linux/compiler.h> 10#include <linux/if_link.h> 11#include <linux/if_xdp.h> 12#include <linux/if_ether.h> 13#include <linux/ip.h> 14#include <linux/udp.h> 15#include <arpa/inet.h> 16#include <locale.h> 17#include <net/ethernet.h> 18#include <net/if.h> 19#include <poll.h> 20#include <pthread.h> 21#include <signal.h> 22#include <stdbool.h> 23#include <stdio.h> 24#include <stdlib.h> 25#include <string.h> 26#include <sys/mman.h> 27#include <sys/resource.h> 28#include <sys/socket.h> 29#include <sys/types.h> 30#include <time.h> 31#include <unistd.h> 32 33#include <bpf/libbpf.h> 34#include <bpf/xsk.h> 35#include <bpf/bpf.h> 36#include "xdpsock.h" 37 38#ifndef SOL_XDP 39#define SOL_XDP 283 40#endif 41 42#ifndef AF_XDP 43#define AF_XDP 44 44#endif 45 46#ifndef PF_XDP 47#define PF_XDP AF_XDP 48#endif 49 50#define NUM_FRAMES (4 * 1024) 51#define MIN_PKT_SIZE 64 52 53#define DEBUG_HEXDUMP 0 54 55typedef __u64 u64; 56typedef __u32 u32; 57typedef __u16 u16; 58typedef __u8 u8; 59 60static unsigned long prev_time; 61 62enum benchmark_type { 63 BENCH_RXDROP = 0, 64 BENCH_TXONLY = 1, 65 BENCH_L2FWD = 2, 66}; 67 68static enum benchmark_type opt_bench = BENCH_RXDROP; 69static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 70static const char *opt_if = ""; 71static int opt_ifindex; 72static int opt_queue; 73static unsigned long opt_duration; 74static unsigned long start_time; 75static bool benchmark_done; 76static u32 opt_batch_size = 64; 77static int opt_pkt_count; 78static u16 opt_pkt_size = MIN_PKT_SIZE; 79static u32 opt_pkt_fill_pattern = 0x12345678; 80static bool opt_extra_stats; 81static int opt_poll; 82static int opt_interval = 1; 83static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; 84static u32 opt_umem_flags; 85static int opt_unaligned_chunks; 86static int opt_mmap_flags; 87static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 88static int opt_timeout = 1000; 89static bool opt_need_wakeup = true; 90static u32 opt_num_xsks = 1; 91static u32 prog_id; 92 93struct xsk_umem_info { 94 struct xsk_ring_prod fq; 95 struct xsk_ring_cons cq; 96 struct xsk_umem *umem; 97 void *buffer; 98}; 99 100struct xsk_socket_info { 101 struct xsk_ring_cons rx; 102 struct xsk_ring_prod tx; 103 struct xsk_umem_info *umem; 104 struct xsk_socket *xsk; 105 unsigned long rx_npkts; 106 unsigned long tx_npkts; 107 unsigned long rx_dropped_npkts; 108 unsigned long rx_invalid_npkts; 109 unsigned long tx_invalid_npkts; 110 unsigned long rx_full_npkts; 111 unsigned long rx_fill_empty_npkts; 112 unsigned long tx_empty_npkts; 113 unsigned long prev_rx_npkts; 114 unsigned long prev_tx_npkts; 115 unsigned long prev_rx_dropped_npkts; 116 unsigned long prev_rx_invalid_npkts; 117 unsigned long prev_tx_invalid_npkts; 118 unsigned long prev_rx_full_npkts; 119 unsigned long prev_rx_fill_empty_npkts; 120 unsigned long prev_tx_empty_npkts; 121 u32 outstanding_tx; 122}; 123 124static int num_socks; 125struct xsk_socket_info *xsks[MAX_SOCKS]; 126 127static unsigned long get_nsecs(void) 128{ 129 struct timespec ts; 130 131 clock_gettime(CLOCK_MONOTONIC, &ts); 132 return ts.tv_sec * 1000000000UL + ts.tv_nsec; 133} 134 135static void print_benchmark(bool running) 136{ 137 const char *bench_str = "INVALID"; 138 139 if (opt_bench == BENCH_RXDROP) 140 bench_str = "rxdrop"; 141 else if (opt_bench == BENCH_TXONLY) 142 bench_str = "txonly"; 143 else if (opt_bench == BENCH_L2FWD) 144 bench_str = "l2fwd"; 145 146 printf("%s:%d %s ", opt_if, opt_queue, bench_str); 147 if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) 148 printf("xdp-skb "); 149 else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) 150 printf("xdp-drv "); 151 else 152 printf(" "); 153 154 if (opt_poll) 155 printf("poll() "); 156 157 if (running) { 158 printf("running..."); 159 fflush(stdout); 160 } 161} 162 163static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) 164{ 165 struct xdp_statistics stats; 166 socklen_t optlen; 167 int err; 168 169 optlen = sizeof(stats); 170 err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); 171 if (err) 172 return err; 173 174 if (optlen == sizeof(struct xdp_statistics)) { 175 xsk->rx_dropped_npkts = stats.rx_dropped; 176 xsk->rx_invalid_npkts = stats.rx_invalid_descs; 177 xsk->tx_invalid_npkts = stats.tx_invalid_descs; 178 xsk->rx_full_npkts = stats.rx_ring_full; 179 xsk->rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; 180 xsk->tx_empty_npkts = stats.tx_ring_empty_descs; 181 return 0; 182 } 183 184 return -EINVAL; 185} 186 187static void dump_stats(void) 188{ 189 unsigned long now = get_nsecs(); 190 long dt = now - prev_time; 191 int i; 192 193 prev_time = now; 194 195 for (i = 0; i < num_socks && xsks[i]; i++) { 196 char *fmt = "%-15s %'-11.0f %'-11lu\n"; 197 double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, 198 tx_invalid_pps, tx_empty_pps; 199 200 rx_pps = (xsks[i]->rx_npkts - xsks[i]->prev_rx_npkts) * 201 1000000000. / dt; 202 tx_pps = (xsks[i]->tx_npkts - xsks[i]->prev_tx_npkts) * 203 1000000000. / dt; 204 205 printf("\n sock%d@", i); 206 print_benchmark(false); 207 printf("\n"); 208 209 printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", 210 dt / 1000000000.); 211 printf(fmt, "rx", rx_pps, xsks[i]->rx_npkts); 212 printf(fmt, "tx", tx_pps, xsks[i]->tx_npkts); 213 214 xsks[i]->prev_rx_npkts = xsks[i]->rx_npkts; 215 xsks[i]->prev_tx_npkts = xsks[i]->tx_npkts; 216 217 if (opt_extra_stats) { 218 if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) { 219 dropped_pps = (xsks[i]->rx_dropped_npkts - 220 xsks[i]->prev_rx_dropped_npkts) * 1000000000. / dt; 221 rx_invalid_pps = (xsks[i]->rx_invalid_npkts - 222 xsks[i]->prev_rx_invalid_npkts) * 1000000000. / dt; 223 tx_invalid_pps = (xsks[i]->tx_invalid_npkts - 224 xsks[i]->prev_tx_invalid_npkts) * 1000000000. / dt; 225 full_pps = (xsks[i]->rx_full_npkts - 226 xsks[i]->prev_rx_full_npkts) * 1000000000. / dt; 227 fill_empty_pps = (xsks[i]->rx_fill_empty_npkts - 228 xsks[i]->prev_rx_fill_empty_npkts) 229 * 1000000000. / dt; 230 tx_empty_pps = (xsks[i]->tx_empty_npkts - 231 xsks[i]->prev_tx_empty_npkts) * 1000000000. / dt; 232 233 printf(fmt, "rx dropped", dropped_pps, 234 xsks[i]->rx_dropped_npkts); 235 printf(fmt, "rx invalid", rx_invalid_pps, 236 xsks[i]->rx_invalid_npkts); 237 printf(fmt, "tx invalid", tx_invalid_pps, 238 xsks[i]->tx_invalid_npkts); 239 printf(fmt, "rx queue full", full_pps, 240 xsks[i]->rx_full_npkts); 241 printf(fmt, "fill ring empty", fill_empty_pps, 242 xsks[i]->rx_fill_empty_npkts); 243 printf(fmt, "tx ring empty", tx_empty_pps, 244 xsks[i]->tx_empty_npkts); 245 246 xsks[i]->prev_rx_dropped_npkts = xsks[i]->rx_dropped_npkts; 247 xsks[i]->prev_rx_invalid_npkts = xsks[i]->rx_invalid_npkts; 248 xsks[i]->prev_tx_invalid_npkts = xsks[i]->tx_invalid_npkts; 249 xsks[i]->prev_rx_full_npkts = xsks[i]->rx_full_npkts; 250 xsks[i]->prev_rx_fill_empty_npkts = xsks[i]->rx_fill_empty_npkts; 251 xsks[i]->prev_tx_empty_npkts = xsks[i]->tx_empty_npkts; 252 } else { 253 printf("%-15s\n", "Error retrieving extra stats"); 254 } 255 } 256 } 257} 258 259static bool is_benchmark_done(void) 260{ 261 if (opt_duration > 0) { 262 unsigned long dt = (get_nsecs() - start_time); 263 264 if (dt >= opt_duration) 265 benchmark_done = true; 266 } 267 return benchmark_done; 268} 269 270static void *poller(void *arg) 271{ 272 (void)arg; 273 while (!is_benchmark_done()) { 274 sleep(opt_interval); 275 dump_stats(); 276 } 277 278 return NULL; 279} 280 281static void remove_xdp_program(void) 282{ 283 u32 curr_prog_id = 0; 284 285 if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { 286 printf("bpf_get_link_xdp_id failed\n"); 287 exit(EXIT_FAILURE); 288 } 289 if (prog_id == curr_prog_id) 290 bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); 291 else if (!curr_prog_id) 292 printf("couldn't find a prog id on a given interface\n"); 293 else 294 printf("program on interface changed, not removing\n"); 295} 296 297static void int_exit(int sig) 298{ 299 benchmark_done = true; 300} 301 302static void xdpsock_cleanup(void) 303{ 304 struct xsk_umem *umem = xsks[0]->umem->umem; 305 int i; 306 307 dump_stats(); 308 for (i = 0; i < num_socks; i++) 309 xsk_socket__delete(xsks[i]->xsk); 310 (void)xsk_umem__delete(umem); 311 remove_xdp_program(); 312} 313 314static void __exit_with_error(int error, const char *file, const char *func, 315 int line) 316{ 317 fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, 318 line, error, strerror(error)); 319 dump_stats(); 320 remove_xdp_program(); 321 exit(EXIT_FAILURE); 322} 323 324#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ 325 __LINE__) 326static void swap_mac_addresses(void *data) 327{ 328 struct ether_header *eth = (struct ether_header *)data; 329 struct ether_addr *src_addr = (struct ether_addr *)&eth->ether_shost; 330 struct ether_addr *dst_addr = (struct ether_addr *)&eth->ether_dhost; 331 struct ether_addr tmp; 332 333 tmp = *src_addr; 334 *src_addr = *dst_addr; 335 *dst_addr = tmp; 336} 337 338static void hex_dump(void *pkt, size_t length, u64 addr) 339{ 340 const unsigned char *address = (unsigned char *)pkt; 341 const unsigned char *line = address; 342 size_t line_size = 32; 343 unsigned char c; 344 char buf[32]; 345 int i = 0; 346 347 if (!DEBUG_HEXDUMP) 348 return; 349 350 sprintf(buf, "addr=%llu", addr); 351 printf("length = %zu\n", length); 352 printf("%s | ", buf); 353 while (length-- > 0) { 354 printf("%02X ", *address++); 355 if (!(++i % line_size) || (length == 0 && i % line_size)) { 356 if (length == 0) { 357 while (i++ % line_size) 358 printf("__ "); 359 } 360 printf(" | "); /* right close */ 361 while (line < address) { 362 c = *line++; 363 printf("%c", (c < 33 || c == 255) ? 0x2E : c); 364 } 365 printf("\n"); 366 if (length > 0) 367 printf("%s | ", buf); 368 } 369 } 370 printf("\n"); 371} 372 373static void *memset32_htonl(void *dest, u32 val, u32 size) 374{ 375 u32 *ptr = (u32 *)dest; 376 int i; 377 378 val = htonl(val); 379 380 for (i = 0; i < (size & (~0x3)); i += 4) 381 ptr[i >> 2] = val; 382 383 for (; i < size; i++) 384 ((char *)dest)[i] = ((char *)&val)[i & 3]; 385 386 return dest; 387} 388 389/* 390 * This function code has been taken from 391 * Linux kernel lib/checksum.c 392 */ 393static inline unsigned short from32to16(unsigned int x) 394{ 395 /* add up 16-bit and 16-bit for 16+c bit */ 396 x = (x & 0xffff) + (x >> 16); 397 /* add up carry.. */ 398 x = (x & 0xffff) + (x >> 16); 399 return x; 400} 401 402/* 403 * This function code has been taken from 404 * Linux kernel lib/checksum.c 405 */ 406static unsigned int do_csum(const unsigned char *buff, int len) 407{ 408 unsigned int result = 0; 409 int odd; 410 411 if (len <= 0) 412 goto out; 413 odd = 1 & (unsigned long)buff; 414 if (odd) { 415#ifdef __LITTLE_ENDIAN 416 result += (*buff << 8); 417#else 418 result = *buff; 419#endif 420 len--; 421 buff++; 422 } 423 if (len >= 2) { 424 if (2 & (unsigned long)buff) { 425 result += *(unsigned short *)buff; 426 len -= 2; 427 buff += 2; 428 } 429 if (len >= 4) { 430 const unsigned char *end = buff + 431 ((unsigned int)len & ~3); 432 unsigned int carry = 0; 433 434 do { 435 unsigned int w = *(unsigned int *)buff; 436 437 buff += 4; 438 result += carry; 439 result += w; 440 carry = (w > result); 441 } while (buff < end); 442 result += carry; 443 result = (result & 0xffff) + (result >> 16); 444 } 445 if (len & 2) { 446 result += *(unsigned short *)buff; 447 buff += 2; 448 } 449 } 450 if (len & 1) 451#ifdef __LITTLE_ENDIAN 452 result += *buff; 453#else 454 result += (*buff << 8); 455#endif 456 result = from32to16(result); 457 if (odd) 458 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 459out: 460 return result; 461} 462 463__sum16 ip_fast_csum(const void *iph, unsigned int ihl); 464 465/* 466 * This is a version of ip_compute_csum() optimized for IP headers, 467 * which always checksum on 4 octet boundaries. 468 * This function code has been taken from 469 * Linux kernel lib/checksum.c 470 */ 471__sum16 ip_fast_csum(const void *iph, unsigned int ihl) 472{ 473 return (__force __sum16)~do_csum(iph, ihl * 4); 474} 475 476/* 477 * Fold a partial checksum 478 * This function code has been taken from 479 * Linux kernel include/asm-generic/checksum.h 480 */ 481static inline __sum16 csum_fold(__wsum csum) 482{ 483 u32 sum = (__force u32)csum; 484 485 sum = (sum & 0xffff) + (sum >> 16); 486 sum = (sum & 0xffff) + (sum >> 16); 487 return (__force __sum16)~sum; 488} 489 490/* 491 * This function code has been taken from 492 * Linux kernel lib/checksum.c 493 */ 494static inline u32 from64to32(u64 x) 495{ 496 /* add up 32-bit and 32-bit for 32+c bit */ 497 x = (x & 0xffffffff) + (x >> 32); 498 /* add up carry.. */ 499 x = (x & 0xffffffff) + (x >> 32); 500 return (u32)x; 501} 502 503__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, 504 __u32 len, __u8 proto, __wsum sum); 505 506/* 507 * This function code has been taken from 508 * Linux kernel lib/checksum.c 509 */ 510__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, 511 __u32 len, __u8 proto, __wsum sum) 512{ 513 unsigned long long s = (__force u32)sum; 514 515 s += (__force u32)saddr; 516 s += (__force u32)daddr; 517#ifdef __BIG_ENDIAN__ 518 s += proto + len; 519#else 520 s += (proto + len) << 8; 521#endif 522 return (__force __wsum)from64to32(s); 523} 524 525/* 526 * This function has been taken from 527 * Linux kernel include/asm-generic/checksum.h 528 */ 529static inline __sum16 530csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, 531 __u8 proto, __wsum sum) 532{ 533 return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); 534} 535 536static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, 537 u8 proto, u16 *udp_pkt) 538{ 539 u32 csum = 0; 540 u32 cnt = 0; 541 542 /* udp hdr and data */ 543 for (; cnt < len; cnt += 2) 544 csum += udp_pkt[cnt >> 1]; 545 546 return csum_tcpudp_magic(saddr, daddr, len, proto, csum); 547} 548 549#define ETH_FCS_SIZE 4 550 551#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ 552 sizeof(struct udphdr)) 553 554#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) 555#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) 556#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) 557#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) 558 559static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; 560 561static void gen_eth_hdr_data(void) 562{ 563 struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + 564 sizeof(struct ethhdr) + 565 sizeof(struct iphdr)); 566 struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + 567 sizeof(struct ethhdr)); 568 struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; 569 570 /* ethernet header */ 571 memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); 572 memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); 573 eth_hdr->h_proto = htons(ETH_P_IP); 574 575 /* IP header */ 576 ip_hdr->version = IPVERSION; 577 ip_hdr->ihl = 0x5; /* 20 byte header */ 578 ip_hdr->tos = 0x0; 579 ip_hdr->tot_len = htons(IP_PKT_SIZE); 580 ip_hdr->id = 0; 581 ip_hdr->frag_off = 0; 582 ip_hdr->ttl = IPDEFTTL; 583 ip_hdr->protocol = IPPROTO_UDP; 584 ip_hdr->saddr = htonl(0x0a0a0a10); 585 ip_hdr->daddr = htonl(0x0a0a0a20); 586 587 /* IP header checksum */ 588 ip_hdr->check = 0; 589 ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); 590 591 /* UDP header */ 592 udp_hdr->source = htons(0x1000); 593 udp_hdr->dest = htons(0x1000); 594 udp_hdr->len = htons(UDP_PKT_SIZE); 595 596 /* UDP data */ 597 memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, 598 UDP_PKT_DATA_SIZE); 599 600 /* UDP header checksum */ 601 udp_hdr->check = 0; 602 udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, 603 IPPROTO_UDP, (u16 *)udp_hdr); 604} 605 606static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) 607{ 608 memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, 609 PKT_SIZE); 610} 611 612static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) 613{ 614 struct xsk_umem_info *umem; 615 struct xsk_umem_config cfg = { 616 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, 617 .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, 618 .frame_size = opt_xsk_frame_size, 619 .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, 620 .flags = opt_umem_flags 621 }; 622 int ret; 623 624 umem = calloc(1, sizeof(*umem)); 625 if (!umem) 626 exit_with_error(errno); 627 628 ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, 629 &cfg); 630 if (ret) 631 exit_with_error(-ret); 632 633 umem->buffer = buffer; 634 return umem; 635} 636 637static void xsk_populate_fill_ring(struct xsk_umem_info *umem) 638{ 639 int ret, i; 640 u32 idx; 641 642 ret = xsk_ring_prod__reserve(&umem->fq, 643 XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); 644 if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) 645 exit_with_error(-ret); 646 for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) 647 *xsk_ring_prod__fill_addr(&umem->fq, idx++) = 648 i * opt_xsk_frame_size; 649 xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); 650} 651 652static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, 653 bool rx, bool tx) 654{ 655 struct xsk_socket_config cfg; 656 struct xsk_socket_info *xsk; 657 struct xsk_ring_cons *rxr; 658 struct xsk_ring_prod *txr; 659 int ret; 660 661 xsk = calloc(1, sizeof(*xsk)); 662 if (!xsk) 663 exit_with_error(errno); 664 665 xsk->umem = umem; 666 cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 667 cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 668 if (opt_num_xsks > 1) 669 cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; 670 else 671 cfg.libbpf_flags = 0; 672 cfg.xdp_flags = opt_xdp_flags; 673 cfg.bind_flags = opt_xdp_bind_flags; 674 675 rxr = rx ? &xsk->rx : NULL; 676 txr = tx ? &xsk->tx : NULL; 677 ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, 678 rxr, txr, &cfg); 679 if (ret) 680 exit_with_error(-ret); 681 682 ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); 683 if (ret) 684 exit_with_error(-ret); 685 686 return xsk; 687} 688 689static struct option long_options[] = { 690 {"rxdrop", no_argument, 0, 'r'}, 691 {"txonly", no_argument, 0, 't'}, 692 {"l2fwd", no_argument, 0, 'l'}, 693 {"interface", required_argument, 0, 'i'}, 694 {"queue", required_argument, 0, 'q'}, 695 {"poll", no_argument, 0, 'p'}, 696 {"xdp-skb", no_argument, 0, 'S'}, 697 {"xdp-native", no_argument, 0, 'N'}, 698 {"interval", required_argument, 0, 'n'}, 699 {"zero-copy", no_argument, 0, 'z'}, 700 {"copy", no_argument, 0, 'c'}, 701 {"frame-size", required_argument, 0, 'f'}, 702 {"no-need-wakeup", no_argument, 0, 'm'}, 703 {"unaligned", no_argument, 0, 'u'}, 704 {"shared-umem", no_argument, 0, 'M'}, 705 {"force", no_argument, 0, 'F'}, 706 {"duration", required_argument, 0, 'd'}, 707 {"batch-size", required_argument, 0, 'b'}, 708 {"tx-pkt-count", required_argument, 0, 'C'}, 709 {"tx-pkt-size", required_argument, 0, 's'}, 710 {"tx-pkt-pattern", required_argument, 0, 'P'}, 711 {"extra-stats", no_argument, 0, 'x'}, 712 {0, 0, 0, 0} 713}; 714 715static void usage(const char *prog) 716{ 717 const char *str = 718 " Usage: %s [OPTIONS]\n" 719 " Options:\n" 720 " -r, --rxdrop Discard all incoming packets (default)\n" 721 " -t, --txonly Only send packets\n" 722 " -l, --l2fwd MAC swap L2 forwarding\n" 723 " -i, --interface=n Run on interface n\n" 724 " -q, --queue=n Use queue n (default 0)\n" 725 " -p, --poll Use poll syscall\n" 726 " -S, --xdp-skb=n Use XDP skb-mod\n" 727 " -N, --xdp-native=n Enforce XDP native mode\n" 728 " -n, --interval=n Specify statistics update interval (default 1 sec).\n" 729 " -z, --zero-copy Force zero-copy mode.\n" 730 " -c, --copy Force copy mode.\n" 731 " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" 732 " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" 733 " -u, --unaligned Enable unaligned chunk placement\n" 734 " -M, --shared-umem Enable XDP_SHARED_UMEM\n" 735 " -F, --force Force loading the XDP prog\n" 736 " -d, --duration=n Duration in secs to run command.\n" 737 " Default: forever.\n" 738 " -b, --batch-size=n Batch size for sending or receiving\n" 739 " packets. Default: %d\n" 740 " -C, --tx-pkt-count=n Number of packets to send.\n" 741 " Default: Continuous packets.\n" 742 " -s, --tx-pkt-size=n Transmit packet size.\n" 743 " (Default: %d bytes)\n" 744 " Min size: %d, Max size %d.\n" 745 " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" 746 " -x, --extra-stats Display extra statistics.\n" 747 "\n"; 748 fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, 749 opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, 750 XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); 751 752 exit(EXIT_FAILURE); 753} 754 755static void parse_command_line(int argc, char **argv) 756{ 757 int option_index, c; 758 759 opterr = 0; 760 761 for (;;) { 762 c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:x", 763 long_options, &option_index); 764 if (c == -1) 765 break; 766 767 switch (c) { 768 case 'r': 769 opt_bench = BENCH_RXDROP; 770 break; 771 case 't': 772 opt_bench = BENCH_TXONLY; 773 break; 774 case 'l': 775 opt_bench = BENCH_L2FWD; 776 break; 777 case 'i': 778 opt_if = optarg; 779 break; 780 case 'q': 781 opt_queue = atoi(optarg); 782 break; 783 case 'p': 784 opt_poll = 1; 785 break; 786 case 'S': 787 opt_xdp_flags |= XDP_FLAGS_SKB_MODE; 788 opt_xdp_bind_flags |= XDP_COPY; 789 break; 790 case 'N': 791 /* default, set below */ 792 break; 793 case 'n': 794 opt_interval = atoi(optarg); 795 break; 796 case 'z': 797 opt_xdp_bind_flags |= XDP_ZEROCOPY; 798 break; 799 case 'c': 800 opt_xdp_bind_flags |= XDP_COPY; 801 break; 802 case 'u': 803 opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; 804 opt_unaligned_chunks = 1; 805 opt_mmap_flags = MAP_HUGETLB; 806 break; 807 case 'F': 808 opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 809 break; 810 case 'f': 811 opt_xsk_frame_size = atoi(optarg); 812 break; 813 case 'm': 814 opt_need_wakeup = false; 815 opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; 816 break; 817 case 'M': 818 opt_num_xsks = MAX_SOCKS; 819 break; 820 case 'd': 821 opt_duration = atoi(optarg); 822 opt_duration *= 1000000000; 823 break; 824 case 'b': 825 opt_batch_size = atoi(optarg); 826 break; 827 case 'C': 828 opt_pkt_count = atoi(optarg); 829 break; 830 case 's': 831 opt_pkt_size = atoi(optarg); 832 if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) || 833 opt_pkt_size < MIN_PKT_SIZE) { 834 fprintf(stderr, 835 "ERROR: Invalid frame size %d\n", 836 opt_pkt_size); 837 usage(basename(argv[0])); 838 } 839 break; 840 case 'P': 841 opt_pkt_fill_pattern = strtol(optarg, NULL, 16); 842 break; 843 case 'x': 844 opt_extra_stats = 1; 845 break; 846 default: 847 usage(basename(argv[0])); 848 } 849 } 850 851 if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE)) 852 opt_xdp_flags |= XDP_FLAGS_DRV_MODE; 853 854 opt_ifindex = if_nametoindex(opt_if); 855 if (!opt_ifindex) { 856 fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", 857 opt_if); 858 usage(basename(argv[0])); 859 } 860 861 if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) && 862 !opt_unaligned_chunks) { 863 fprintf(stderr, "--frame-size=%d is not a power of two\n", 864 opt_xsk_frame_size); 865 usage(basename(argv[0])); 866 } 867} 868 869static void kick_tx(struct xsk_socket_info *xsk) 870{ 871 int ret; 872 873 ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); 874 if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || 875 errno == EBUSY || errno == ENETDOWN) 876 return; 877 exit_with_error(errno); 878} 879 880static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, 881 struct pollfd *fds) 882{ 883 struct xsk_umem_info *umem = xsk->umem; 884 u32 idx_cq = 0, idx_fq = 0; 885 unsigned int rcvd; 886 size_t ndescs; 887 888 if (!xsk->outstanding_tx) 889 return; 890 891 if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) 892 kick_tx(xsk); 893 894 ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : 895 xsk->outstanding_tx; 896 897 /* re-add completed Tx buffers */ 898 rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq); 899 if (rcvd > 0) { 900 unsigned int i; 901 int ret; 902 903 ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); 904 while (ret != rcvd) { 905 if (ret < 0) 906 exit_with_error(-ret); 907 if (xsk_ring_prod__needs_wakeup(&umem->fq)) 908 ret = poll(fds, num_socks, opt_timeout); 909 ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); 910 } 911 912 for (i = 0; i < rcvd; i++) 913 *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = 914 *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++); 915 916 xsk_ring_prod__submit(&xsk->umem->fq, rcvd); 917 xsk_ring_cons__release(&xsk->umem->cq, rcvd); 918 xsk->outstanding_tx -= rcvd; 919 xsk->tx_npkts += rcvd; 920 } 921} 922 923static inline void complete_tx_only(struct xsk_socket_info *xsk, 924 int batch_size) 925{ 926 unsigned int rcvd; 927 u32 idx; 928 929 if (!xsk->outstanding_tx) 930 return; 931 932 if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) 933 kick_tx(xsk); 934 935 rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); 936 if (rcvd > 0) { 937 xsk_ring_cons__release(&xsk->umem->cq, rcvd); 938 xsk->outstanding_tx -= rcvd; 939 xsk->tx_npkts += rcvd; 940 } 941} 942 943static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) 944{ 945 unsigned int rcvd, i; 946 u32 idx_rx = 0, idx_fq = 0; 947 int ret; 948 949 rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); 950 if (!rcvd) { 951 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) 952 ret = poll(fds, num_socks, opt_timeout); 953 return; 954 } 955 956 ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); 957 while (ret != rcvd) { 958 if (ret < 0) 959 exit_with_error(-ret); 960 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) 961 ret = poll(fds, num_socks, opt_timeout); 962 ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); 963 } 964 965 for (i = 0; i < rcvd; i++) { 966 u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; 967 u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; 968 u64 orig = xsk_umem__extract_addr(addr); 969 970 addr = xsk_umem__add_offset_to_addr(addr); 971 char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); 972 973 hex_dump(pkt, len, addr); 974 *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; 975 } 976 977 xsk_ring_prod__submit(&xsk->umem->fq, rcvd); 978 xsk_ring_cons__release(&xsk->rx, rcvd); 979 xsk->rx_npkts += rcvd; 980} 981 982static void rx_drop_all(void) 983{ 984 struct pollfd fds[MAX_SOCKS] = {}; 985 int i, ret; 986 987 for (i = 0; i < num_socks; i++) { 988 fds[i].fd = xsk_socket__fd(xsks[i]->xsk); 989 fds[i].events = POLLIN; 990 } 991 992 for (;;) { 993 if (opt_poll) { 994 ret = poll(fds, num_socks, opt_timeout); 995 if (ret <= 0) 996 continue; 997 } 998 999 for (i = 0; i < num_socks; i++) 1000 rx_drop(xsks[i], fds); 1001 1002 if (benchmark_done) 1003 break; 1004 } 1005} 1006 1007static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size) 1008{ 1009 u32 idx; 1010 unsigned int i; 1011 1012 while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < 1013 batch_size) { 1014 complete_tx_only(xsk, batch_size); 1015 } 1016 1017 for (i = 0; i < batch_size; i++) { 1018 struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, 1019 idx + i); 1020 tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; 1021 tx_desc->len = PKT_SIZE; 1022 } 1023 1024 xsk_ring_prod__submit(&xsk->tx, batch_size); 1025 xsk->outstanding_tx += batch_size; 1026 frame_nb += batch_size; 1027 frame_nb %= NUM_FRAMES; 1028 complete_tx_only(xsk, batch_size); 1029} 1030 1031static inline int get_batch_size(int pkt_cnt) 1032{ 1033 if (!opt_pkt_count) 1034 return opt_batch_size; 1035 1036 if (pkt_cnt + opt_batch_size <= opt_pkt_count) 1037 return opt_batch_size; 1038 1039 return opt_pkt_count - pkt_cnt; 1040} 1041 1042static void complete_tx_only_all(void) 1043{ 1044 bool pending; 1045 int i; 1046 1047 do { 1048 pending = false; 1049 for (i = 0; i < num_socks; i++) { 1050 if (xsks[i]->outstanding_tx) { 1051 complete_tx_only(xsks[i], opt_batch_size); 1052 pending = !!xsks[i]->outstanding_tx; 1053 } 1054 } 1055 } while (pending); 1056} 1057 1058static void tx_only_all(void) 1059{ 1060 struct pollfd fds[MAX_SOCKS] = {}; 1061 u32 frame_nb[MAX_SOCKS] = {}; 1062 int pkt_cnt = 0; 1063 int i, ret; 1064 1065 for (i = 0; i < num_socks; i++) { 1066 fds[0].fd = xsk_socket__fd(xsks[i]->xsk); 1067 fds[0].events = POLLOUT; 1068 } 1069 1070 while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { 1071 int batch_size = get_batch_size(pkt_cnt); 1072 1073 if (opt_poll) { 1074 ret = poll(fds, num_socks, opt_timeout); 1075 if (ret <= 0) 1076 continue; 1077 1078 if (!(fds[0].revents & POLLOUT)) 1079 continue; 1080 } 1081 1082 for (i = 0; i < num_socks; i++) 1083 tx_only(xsks[i], frame_nb[i], batch_size); 1084 1085 pkt_cnt += batch_size; 1086 1087 if (benchmark_done) 1088 break; 1089 } 1090 1091 if (opt_pkt_count) 1092 complete_tx_only_all(); 1093} 1094 1095static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) 1096{ 1097 unsigned int rcvd, i; 1098 u32 idx_rx = 0, idx_tx = 0; 1099 int ret; 1100 1101 complete_tx_l2fwd(xsk, fds); 1102 1103 rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); 1104 if (!rcvd) { 1105 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) 1106 ret = poll(fds, num_socks, opt_timeout); 1107 return; 1108 } 1109 1110 ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); 1111 while (ret != rcvd) { 1112 if (ret < 0) 1113 exit_with_error(-ret); 1114 if (xsk_ring_prod__needs_wakeup(&xsk->tx)) 1115 kick_tx(xsk); 1116 ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); 1117 } 1118 1119 for (i = 0; i < rcvd; i++) { 1120 u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; 1121 u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; 1122 u64 orig = addr; 1123 1124 addr = xsk_umem__add_offset_to_addr(addr); 1125 char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); 1126 1127 swap_mac_addresses(pkt); 1128 1129 hex_dump(pkt, len, addr); 1130 xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig; 1131 xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; 1132 } 1133 1134 xsk_ring_prod__submit(&xsk->tx, rcvd); 1135 xsk_ring_cons__release(&xsk->rx, rcvd); 1136 1137 xsk->rx_npkts += rcvd; 1138 xsk->outstanding_tx += rcvd; 1139} 1140 1141static void l2fwd_all(void) 1142{ 1143 struct pollfd fds[MAX_SOCKS] = {}; 1144 int i, ret; 1145 1146 for (i = 0; i < num_socks; i++) { 1147 fds[i].fd = xsk_socket__fd(xsks[i]->xsk); 1148 fds[i].events = POLLOUT | POLLIN; 1149 } 1150 1151 for (;;) { 1152 if (opt_poll) { 1153 ret = poll(fds, num_socks, opt_timeout); 1154 if (ret <= 0) 1155 continue; 1156 } 1157 1158 for (i = 0; i < num_socks; i++) 1159 l2fwd(xsks[i], fds); 1160 1161 if (benchmark_done) 1162 break; 1163 } 1164} 1165 1166static void load_xdp_program(char **argv, struct bpf_object **obj) 1167{ 1168 struct bpf_prog_load_attr prog_load_attr = { 1169 .prog_type = BPF_PROG_TYPE_XDP, 1170 }; 1171 char xdp_filename[256]; 1172 int prog_fd; 1173 1174 snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); 1175 prog_load_attr.file = xdp_filename; 1176 1177 if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd)) 1178 exit(EXIT_FAILURE); 1179 if (prog_fd < 0) { 1180 fprintf(stderr, "ERROR: no program found: %s\n", 1181 strerror(prog_fd)); 1182 exit(EXIT_FAILURE); 1183 } 1184 1185 if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { 1186 fprintf(stderr, "ERROR: link set xdp fd failed\n"); 1187 exit(EXIT_FAILURE); 1188 } 1189} 1190 1191static void enter_xsks_into_map(struct bpf_object *obj) 1192{ 1193 struct bpf_map *map; 1194 int i, xsks_map; 1195 1196 map = bpf_object__find_map_by_name(obj, "xsks_map"); 1197 xsks_map = bpf_map__fd(map); 1198 if (xsks_map < 0) { 1199 fprintf(stderr, "ERROR: no xsks map found: %s\n", 1200 strerror(xsks_map)); 1201 exit(EXIT_FAILURE); 1202 } 1203 1204 for (i = 0; i < num_socks; i++) { 1205 int fd = xsk_socket__fd(xsks[i]->xsk); 1206 int key, ret; 1207 1208 key = i; 1209 ret = bpf_map_update_elem(xsks_map, &key, &fd, 0); 1210 if (ret) { 1211 fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); 1212 exit(EXIT_FAILURE); 1213 } 1214 } 1215} 1216 1217int main(int argc, char **argv) 1218{ 1219 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 1220 bool rx = false, tx = false; 1221 struct xsk_umem_info *umem; 1222 struct bpf_object *obj; 1223 pthread_t pt; 1224 int i, ret; 1225 void *bufs; 1226 1227 parse_command_line(argc, argv); 1228 1229 if (setrlimit(RLIMIT_MEMLOCK, &r)) { 1230 fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", 1231 strerror(errno)); 1232 exit(EXIT_FAILURE); 1233 } 1234 1235 if (opt_num_xsks > 1) 1236 load_xdp_program(argv, &obj); 1237 1238 /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ 1239 bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, 1240 PROT_READ | PROT_WRITE, 1241 MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0); 1242 if (bufs == MAP_FAILED) { 1243 printf("ERROR: mmap failed\n"); 1244 exit(EXIT_FAILURE); 1245 } 1246 1247 /* Create sockets... */ 1248 umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); 1249 if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) { 1250 rx = true; 1251 xsk_populate_fill_ring(umem); 1252 } 1253 if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY) 1254 tx = true; 1255 for (i = 0; i < opt_num_xsks; i++) 1256 xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); 1257 1258 if (opt_bench == BENCH_TXONLY) { 1259 gen_eth_hdr_data(); 1260 1261 for (i = 0; i < NUM_FRAMES; i++) 1262 gen_eth_frame(umem, i * opt_xsk_frame_size); 1263 } 1264 1265 if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) 1266 enter_xsks_into_map(obj); 1267 1268 signal(SIGINT, int_exit); 1269 signal(SIGTERM, int_exit); 1270 signal(SIGABRT, int_exit); 1271 1272 setlocale(LC_ALL, ""); 1273 1274 ret = pthread_create(&pt, NULL, poller, NULL); 1275 if (ret) 1276 exit_with_error(ret); 1277 1278 prev_time = get_nsecs(); 1279 start_time = prev_time; 1280 1281 if (opt_bench == BENCH_RXDROP) 1282 rx_drop_all(); 1283 else if (opt_bench == BENCH_TXONLY) 1284 tx_only_all(); 1285 else 1286 l2fwd_all(); 1287 1288 benchmark_done = true; 1289 1290 pthread_join(pt, NULL); 1291 1292 xdpsock_cleanup(); 1293 1294 return 0; 1295}