Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/bpf: add xdp noinline test

add large semi-artificial XDP test with 18 functions to stress test
bpf call verification logic

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

authored by

Alexei Starovoitov and committed by
Daniel Borkmann
b0b04fc4 3bc35c63

+916 -1
+2 -1
tools/testing/selftests/bpf/Makefile
··· 18 18 TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ 19 19 test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ 20 20 sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ 21 - test_l4lb_noinline.o 21 + test_l4lb_noinline.o test_xdp_noinline.o 22 22 23 23 TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ 24 24 test_offload.py ··· 54 54 -Wno-compare-distinct-pointer-types 55 55 56 56 $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline 57 + $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline 57 58 58 59 %.o: %.c 59 60 $(CLANG) $(CLANG_FLAGS) \
+81
tools/testing/selftests/bpf/test_progs.c
··· 257 257 test_l4lb(file2); 258 258 } 259 259 260 + static void test_xdp_noinline(void) 261 + { 262 + const char *file = "./test_xdp_noinline.o"; 263 + unsigned int nr_cpus = bpf_num_possible_cpus(); 264 + struct vip key = {.protocol = 6}; 265 + struct vip_meta { 266 + __u32 flags; 267 + __u32 vip_num; 268 + } value = {.vip_num = VIP_NUM}; 269 + __u32 stats_key = VIP_NUM; 270 + struct vip_stats { 271 + __u64 bytes; 272 + __u64 pkts; 273 + } stats[nr_cpus]; 274 + struct real_definition { 275 + union { 276 + __be32 dst; 277 + __be32 dstv6[4]; 278 + }; 279 + __u8 flags; 280 + } real_def = {.dst = MAGIC_VAL}; 281 + __u32 ch_key = 11, real_num = 3; 282 + __u32 duration, retval, size; 283 + int err, i, prog_fd, map_fd; 284 + __u64 bytes = 0, pkts = 0; 285 + struct bpf_object *obj; 286 + char buf[128]; 287 + u32 *magic = (u32 *)buf; 288 + 289 + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); 290 + if (err) { 291 + error_cnt++; 292 + return; 293 + } 294 + 295 + map_fd = bpf_find_map(__func__, obj, "vip_map"); 296 + if (map_fd < 0) 297 + goto out; 298 + bpf_map_update_elem(map_fd, &key, &value, 0); 299 + 300 + map_fd = bpf_find_map(__func__, obj, "ch_rings"); 301 + if (map_fd < 0) 302 + goto out; 303 + bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); 304 + 305 + map_fd = bpf_find_map(__func__, obj, "reals"); 306 + if (map_fd < 0) 307 + goto out; 308 + bpf_map_update_elem(map_fd, &real_num, &real_def, 0); 309 + 310 + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), 311 + buf, &size, &retval, &duration); 312 + CHECK(err || errno || retval != 1 || size != 54 || 313 + *magic != MAGIC_VAL, "ipv4", 314 + "err %d errno %d retval %d size %d magic %x\n", 315 + err, errno, retval, size, *magic); 316 + 317 + err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), 318 + buf, &size, &retval, &duration); 319 + CHECK(err || errno || retval != 1 || size != 74 || 320 + *magic != MAGIC_VAL, "ipv6", 321 + "err %d errno %d retval %d size %d magic %x\n", 322 + err, errno, retval, size, *magic); 323 + 324 + map_fd = bpf_find_map(__func__, obj, "stats"); 325 + if (map_fd < 0) 326 + goto out; 327 + bpf_map_lookup_elem(map_fd, &stats_key, stats); 328 + for (i = 0; i < nr_cpus; i++) { 329 + bytes += stats[i].bytes; 330 + pkts += stats[i].pkts; 331 + } 332 + if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) { 333 + error_cnt++; 334 + printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts); 335 + } 336 + out: 337 + bpf_object__close(obj); 338 + } 339 + 260 340 static void test_tcp_estats(void) 261 341 { 262 342 const char *file = "./test_tcp_estats.o"; ··· 846 766 test_pkt_access(); 847 767 test_xdp(); 848 768 test_l4lb_all(); 769 + test_xdp_noinline(); 849 770 test_tcp_estats(); 850 771 test_bpf_obj_id(); 851 772 test_pkt_md_access();
+833
tools/testing/selftests/bpf/test_xdp_noinline.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2017 Facebook 3 + #include <stddef.h> 4 + #include <stdbool.h> 5 + #include <string.h> 6 + #include <linux/pkt_cls.h> 7 + #include <linux/bpf.h> 8 + #include <linux/in.h> 9 + #include <linux/if_ether.h> 10 + #include <linux/ip.h> 11 + #include <linux/ipv6.h> 12 + #include <linux/icmp.h> 13 + #include <linux/icmpv6.h> 14 + #include <linux/tcp.h> 15 + #include <linux/udp.h> 16 + #include "bpf_helpers.h" 17 + 18 + #define bpf_printk(fmt, ...) \ 19 + ({ \ 20 + char ____fmt[] = fmt; \ 21 + bpf_trace_printk(____fmt, sizeof(____fmt), \ 22 + ##__VA_ARGS__); \ 23 + }) 24 + 25 + static __u32 rol32(__u32 word, unsigned int shift) 26 + { 27 + return (word << shift) | (word >> ((-shift) & 31)); 28 + } 29 + 30 + /* copy paste of jhash from kernel sources to make sure llvm 31 + * can compile it into valid sequence of bpf instructions 32 + */ 33 + #define __jhash_mix(a, b, c) \ 34 + { \ 35 + a -= c; a ^= rol32(c, 4); c += b; \ 36 + b -= a; b ^= rol32(a, 6); a += c; \ 37 + c -= b; c ^= rol32(b, 8); b += a; \ 38 + a -= c; a ^= rol32(c, 16); c += b; \ 39 + b -= a; b ^= rol32(a, 19); a += c; \ 40 + c -= b; c ^= rol32(b, 4); b += a; \ 41 + } 42 + 43 + #define __jhash_final(a, b, c) \ 44 + { \ 45 + c ^= b; c -= rol32(b, 14); \ 46 + a ^= c; a -= rol32(c, 11); \ 47 + b ^= a; b -= rol32(a, 25); \ 48 + c ^= b; c -= rol32(b, 16); \ 49 + a ^= c; a -= rol32(c, 4); \ 50 + b ^= a; b -= rol32(a, 14); \ 51 + c ^= b; c -= rol32(b, 24); \ 52 + } 53 + 54 + #define JHASH_INITVAL 0xdeadbeef 55 + 56 + typedef unsigned int u32; 57 + 58 + static __attribute__ ((noinline)) 59 + u32 jhash(const void *key, u32 length, u32 initval) 60 + { 61 + u32 a, b, c; 62 + const unsigned char *k = key; 63 + 64 + a = b = c = JHASH_INITVAL + length + initval; 65 + 66 + while (length > 12) { 67 + a += *(u32 *)(k); 68 + b += *(u32 *)(k + 4); 69 + c += *(u32 *)(k + 8); 70 + __jhash_mix(a, b, c); 71 + length -= 12; 72 + k += 12; 73 + } 74 + switch (length) { 75 + case 12: c += (u32)k[11]<<24; 76 + case 11: c += (u32)k[10]<<16; 77 + case 10: c += (u32)k[9]<<8; 78 + case 9: c += k[8]; 79 + case 8: b += (u32)k[7]<<24; 80 + case 7: b += (u32)k[6]<<16; 81 + case 6: b += (u32)k[5]<<8; 82 + case 5: b += k[4]; 83 + case 4: a += (u32)k[3]<<24; 84 + case 3: a += (u32)k[2]<<16; 85 + case 2: a += (u32)k[1]<<8; 86 + case 1: a += k[0]; 87 + __jhash_final(a, b, c); 88 + case 0: /* Nothing left to add */ 89 + break; 90 + } 91 + 92 + return c; 93 + } 94 + 95 + static __attribute__ ((noinline)) 96 + u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) 97 + { 98 + a += initval; 99 + b += initval; 100 + c += initval; 101 + __jhash_final(a, b, c); 102 + return c; 103 + } 104 + 105 + static __attribute__ ((noinline)) 106 + u32 jhash_2words(u32 a, u32 b, u32 initval) 107 + { 108 + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); 109 + } 110 + 111 + struct flow_key { 112 + union { 113 + __be32 src; 114 + __be32 srcv6[4]; 115 + }; 116 + union { 117 + __be32 dst; 118 + __be32 dstv6[4]; 119 + }; 120 + union { 121 + __u32 ports; 122 + __u16 port16[2]; 123 + }; 124 + __u8 proto; 125 + }; 126 + 127 + struct packet_description { 128 + struct flow_key flow; 129 + __u8 flags; 130 + }; 131 + 132 + struct ctl_value { 133 + union { 134 + __u64 value; 135 + __u32 ifindex; 136 + __u8 mac[6]; 137 + }; 138 + }; 139 + 140 + struct vip_definition { 141 + union { 142 + __be32 vip; 143 + __be32 vipv6[4]; 144 + }; 145 + __u16 port; 146 + __u16 family; 147 + __u8 proto; 148 + }; 149 + 150 + struct vip_meta { 151 + __u32 flags; 152 + __u32 vip_num; 153 + }; 154 + 155 + struct real_pos_lru { 156 + __u32 pos; 157 + __u64 atime; 158 + }; 159 + 160 + struct real_definition { 161 + union { 162 + __be32 dst; 163 + __be32 dstv6[4]; 164 + }; 165 + __u8 flags; 166 + }; 167 + 168 + struct lb_stats { 169 + __u64 v2; 170 + __u64 v1; 171 + }; 172 + 173 + struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = { 174 + .type = BPF_MAP_TYPE_HASH, 175 + .key_size = sizeof(struct vip_definition), 176 + .value_size = sizeof(struct vip_meta), 177 + .max_entries = 512, 178 + .map_flags = 0, 179 + }; 180 + 181 + struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = { 182 + .type = BPF_MAP_TYPE_LRU_HASH, 183 + .key_size = sizeof(struct flow_key), 184 + .value_size = sizeof(struct real_pos_lru), 185 + .max_entries = 300, 186 + .map_flags = 1U << 1, 187 + }; 188 + 189 + struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = { 190 + .type = BPF_MAP_TYPE_ARRAY, 191 + .key_size = sizeof(__u32), 192 + .value_size = sizeof(__u32), 193 + .max_entries = 12 * 655, 194 + .map_flags = 0, 195 + }; 196 + 197 + struct bpf_map_def __attribute__ ((section("maps"), used)) reals = { 198 + .type = BPF_MAP_TYPE_ARRAY, 199 + .key_size = sizeof(__u32), 200 + .value_size = sizeof(struct real_definition), 201 + .max_entries = 40, 202 + .map_flags = 0, 203 + }; 204 + 205 + struct bpf_map_def __attribute__ ((section("maps"), used)) stats = { 206 + .type = BPF_MAP_TYPE_PERCPU_ARRAY, 207 + .key_size = sizeof(__u32), 208 + .value_size = sizeof(struct lb_stats), 209 + .max_entries = 515, 210 + .map_flags = 0, 211 + }; 212 + 213 + struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = { 214 + .type = BPF_MAP_TYPE_ARRAY, 215 + .key_size = sizeof(__u32), 216 + .value_size = sizeof(struct ctl_value), 217 + .max_entries = 16, 218 + .map_flags = 0, 219 + }; 220 + 221 + struct eth_hdr { 222 + unsigned char eth_dest[6]; 223 + unsigned char eth_source[6]; 224 + unsigned short eth_proto; 225 + }; 226 + 227 + static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) 228 + { 229 + __u64 off = sizeof(struct eth_hdr); 230 + if (is_ipv6) { 231 + off += sizeof(struct ipv6hdr); 232 + if (is_icmp) 233 + off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr); 234 + } else { 235 + off += sizeof(struct iphdr); 236 + if (is_icmp) 237 + off += sizeof(struct icmphdr) + sizeof(struct iphdr); 238 + } 239 + return off; 240 + } 241 + 242 + static __attribute__ ((noinline)) 243 + bool parse_udp(void *data, void *data_end, 244 + bool is_ipv6, struct packet_description *pckt) 245 + { 246 + 247 + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); 248 + __u64 off = calc_offset(is_ipv6, is_icmp); 249 + struct udphdr *udp; 250 + udp = data + off; 251 + 252 + if (udp + 1 > data_end) 253 + return 0; 254 + if (!is_icmp) { 255 + pckt->flow.port16[0] = udp->source; 256 + pckt->flow.port16[1] = udp->dest; 257 + } else { 258 + pckt->flow.port16[0] = udp->dest; 259 + pckt->flow.port16[1] = udp->source; 260 + } 261 + return 1; 262 + } 263 + 264 + static __attribute__ ((noinline)) 265 + bool parse_tcp(void *data, void *data_end, 266 + bool is_ipv6, struct packet_description *pckt) 267 + { 268 + 269 + bool is_icmp = !((pckt->flags & (1 << 0)) == 0); 270 + __u64 off = calc_offset(is_ipv6, is_icmp); 271 + struct tcphdr *tcp; 272 + 273 + tcp = data + off; 274 + if (tcp + 1 > data_end) 275 + return 0; 276 + if (tcp->syn) 277 + pckt->flags |= (1 << 1); 278 + if (!is_icmp) { 279 + pckt->flow.port16[0] = tcp->source; 280 + pckt->flow.port16[1] = tcp->dest; 281 + } else { 282 + pckt->flow.port16[0] = tcp->dest; 283 + pckt->flow.port16[1] = tcp->source; 284 + } 285 + return 1; 286 + } 287 + 288 + static __attribute__ ((noinline)) 289 + bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, 290 + struct packet_description *pckt, 291 + struct real_definition *dst, __u32 pkt_bytes) 292 + { 293 + struct eth_hdr *new_eth; 294 + struct eth_hdr *old_eth; 295 + struct ipv6hdr *ip6h; 296 + __u32 ip_suffix; 297 + void *data_end; 298 + void *data; 299 + 300 + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) 301 + return 0; 302 + data = (void *)(long)xdp->data; 303 + data_end = (void *)(long)xdp->data_end; 304 + new_eth = data; 305 + ip6h = data + sizeof(struct eth_hdr); 306 + old_eth = data + sizeof(struct ipv6hdr); 307 + if (new_eth + 1 > data_end || 308 + old_eth + 1 > data_end || ip6h + 1 > data_end) 309 + return 0; 310 + memcpy(new_eth->eth_dest, cval->mac, 6); 311 + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); 312 + new_eth->eth_proto = 56710; 313 + ip6h->version = 6; 314 + ip6h->priority = 0; 315 + memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl)); 316 + 317 + ip6h->nexthdr = IPPROTO_IPV6; 318 + ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0]; 319 + ip6h->payload_len = 320 + __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr)); 321 + ip6h->hop_limit = 4; 322 + 323 + ip6h->saddr.in6_u.u6_addr32[0] = 1; 324 + ip6h->saddr.in6_u.u6_addr32[1] = 2; 325 + ip6h->saddr.in6_u.u6_addr32[2] = 3; 326 + ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; 327 + memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); 328 + return 1; 329 + } 330 + 331 + static __attribute__ ((noinline)) 332 + bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, 333 + struct packet_description *pckt, 334 + struct real_definition *dst, __u32 pkt_bytes) 335 + { 336 + 337 + __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]); 338 + struct eth_hdr *new_eth; 339 + struct eth_hdr *old_eth; 340 + __u16 *next_iph_u16; 341 + struct iphdr *iph; 342 + __u32 csum = 0; 343 + void *data_end; 344 + void *data; 345 + 346 + ip_suffix <<= 15; 347 + ip_suffix ^= pckt->flow.src; 348 + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) 349 + return 0; 350 + data = (void *)(long)xdp->data; 351 + data_end = (void *)(long)xdp->data_end; 352 + new_eth = data; 353 + iph = data + sizeof(struct eth_hdr); 354 + old_eth = data + sizeof(struct iphdr); 355 + if (new_eth + 1 > data_end || 356 + old_eth + 1 > data_end || iph + 1 > data_end) 357 + return 0; 358 + memcpy(new_eth->eth_dest, cval->mac, 6); 359 + memcpy(new_eth->eth_source, old_eth->eth_dest, 6); 360 + new_eth->eth_proto = 8; 361 + iph->version = 4; 362 + iph->ihl = 5; 363 + iph->frag_off = 0; 364 + iph->protocol = IPPROTO_IPIP; 365 + iph->check = 0; 366 + iph->tos = 1; 367 + iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr)); 368 + /* don't update iph->daddr, since it will overwrite old eth_proto 369 + * and multiple iterations of bpf_prog_run() will fail 370 + */ 371 + 372 + iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst; 373 + iph->ttl = 4; 374 + 375 + next_iph_u16 = (__u16 *) iph; 376 + #pragma clang loop unroll(full) 377 + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) 378 + csum += *next_iph_u16++; 379 + iph->check = ~((csum & 0xffff) + (csum >> 16)); 380 + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) 381 + return 0; 382 + return 1; 383 + } 384 + 385 + static __attribute__ ((noinline)) 386 + bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) 387 + { 388 + struct eth_hdr *new_eth; 389 + struct eth_hdr *old_eth; 390 + 391 + old_eth = *data; 392 + new_eth = *data + sizeof(struct ipv6hdr); 393 + memcpy(new_eth->eth_source, old_eth->eth_source, 6); 394 + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); 395 + if (inner_v4) 396 + new_eth->eth_proto = 8; 397 + else 398 + new_eth->eth_proto = 56710; 399 + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) 400 + return 0; 401 + *data = (void *)(long)xdp->data; 402 + *data_end = (void *)(long)xdp->data_end; 403 + return 1; 404 + } 405 + 406 + static __attribute__ ((noinline)) 407 + bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) 408 + { 409 + struct eth_hdr *new_eth; 410 + struct eth_hdr *old_eth; 411 + 412 + old_eth = *data; 413 + new_eth = *data + sizeof(struct iphdr); 414 + memcpy(new_eth->eth_source, old_eth->eth_source, 6); 415 + memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); 416 + new_eth->eth_proto = 8; 417 + if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) 418 + return 0; 419 + *data = (void *)(long)xdp->data; 420 + *data_end = (void *)(long)xdp->data_end; 421 + return 1; 422 + } 423 + 424 + static __attribute__ ((noinline)) 425 + int swap_mac_and_send(void *data, void *data_end) 426 + { 427 + unsigned char tmp_mac[6]; 428 + struct eth_hdr *eth; 429 + 430 + eth = data; 431 + memcpy(tmp_mac, eth->eth_source, 6); 432 + memcpy(eth->eth_source, eth->eth_dest, 6); 433 + memcpy(eth->eth_dest, tmp_mac, 6); 434 + return XDP_TX; 435 + } 436 + 437 + static __attribute__ ((noinline)) 438 + int send_icmp_reply(void *data, void *data_end) 439 + { 440 + struct icmphdr *icmp_hdr; 441 + __u16 *next_iph_u16; 442 + __u32 tmp_addr = 0; 443 + struct iphdr *iph; 444 + __u32 csum1 = 0; 445 + __u32 csum = 0; 446 + __u64 off = 0; 447 + 448 + if (data + sizeof(struct eth_hdr) 449 + + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end) 450 + return XDP_DROP; 451 + off += sizeof(struct eth_hdr); 452 + iph = data + off; 453 + off += sizeof(struct iphdr); 454 + icmp_hdr = data + off; 455 + icmp_hdr->type = 0; 456 + icmp_hdr->checksum += 0x0007; 457 + iph->ttl = 4; 458 + tmp_addr = iph->daddr; 459 + iph->daddr = iph->saddr; 460 + iph->saddr = tmp_addr; 461 + iph->check = 0; 462 + next_iph_u16 = (__u16 *) iph; 463 + #pragma clang loop unroll(full) 464 + for (int i = 0; i < sizeof(struct iphdr) >> 1; i++) 465 + csum += *next_iph_u16++; 466 + iph->check = ~((csum & 0xffff) + (csum >> 16)); 467 + return swap_mac_and_send(data, data_end); 468 + } 469 + 470 + static __attribute__ ((noinline)) 471 + int send_icmp6_reply(void *data, void *data_end) 472 + { 473 + struct icmp6hdr *icmp_hdr; 474 + struct ipv6hdr *ip6h; 475 + __be32 tmp_addr[4]; 476 + __u64 off = 0; 477 + 478 + if (data + sizeof(struct eth_hdr) 479 + + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end) 480 + return XDP_DROP; 481 + off += sizeof(struct eth_hdr); 482 + ip6h = data + off; 483 + off += sizeof(struct ipv6hdr); 484 + icmp_hdr = data + off; 485 + icmp_hdr->icmp6_type = 129; 486 + icmp_hdr->icmp6_cksum -= 0x0001; 487 + ip6h->hop_limit = 4; 488 + memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16); 489 + memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16); 490 + memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16); 491 + return swap_mac_and_send(data, data_end); 492 + } 493 + 494 + static __attribute__ ((noinline)) 495 + int parse_icmpv6(void *data, void *data_end, __u64 off, 496 + struct packet_description *pckt) 497 + { 498 + struct icmp6hdr *icmp_hdr; 499 + struct ipv6hdr *ip6h; 500 + 501 + icmp_hdr = data + off; 502 + if (icmp_hdr + 1 > data_end) 503 + return XDP_DROP; 504 + if (icmp_hdr->icmp6_type == 128) 505 + return send_icmp6_reply(data, data_end); 506 + if (icmp_hdr->icmp6_type != 3) 507 + return XDP_PASS; 508 + off += sizeof(struct icmp6hdr); 509 + ip6h = data + off; 510 + if (ip6h + 1 > data_end) 511 + return XDP_DROP; 512 + pckt->flow.proto = ip6h->nexthdr; 513 + pckt->flags |= (1 << 0); 514 + memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16); 515 + memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16); 516 + return -1; 517 + } 518 + 519 + static __attribute__ ((noinline)) 520 + int parse_icmp(void *data, void *data_end, __u64 off, 521 + struct packet_description *pckt) 522 + { 523 + struct icmphdr *icmp_hdr; 524 + struct iphdr *iph; 525 + 526 + icmp_hdr = data + off; 527 + if (icmp_hdr + 1 > data_end) 528 + return XDP_DROP; 529 + if (icmp_hdr->type == 8) 530 + return send_icmp_reply(data, data_end); 531 + if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4)) 532 + return XDP_PASS; 533 + off += sizeof(struct icmphdr); 534 + iph = data + off; 535 + if (iph + 1 > data_end) 536 + return XDP_DROP; 537 + if (iph->ihl != 5) 538 + return XDP_DROP; 539 + pckt->flow.proto = iph->protocol; 540 + pckt->flags |= (1 << 0); 541 + pckt->flow.src = iph->daddr; 542 + pckt->flow.dst = iph->saddr; 543 + return -1; 544 + } 545 + 546 + static __attribute__ ((noinline)) 547 + __u32 get_packet_hash(struct packet_description *pckt, 548 + bool hash_16bytes) 549 + { 550 + if (hash_16bytes) 551 + return jhash_2words(jhash(pckt->flow.srcv6, 16, 12), 552 + pckt->flow.ports, 24); 553 + else 554 + return jhash_2words(pckt->flow.src, pckt->flow.ports, 555 + 24); 556 + } 557 + 558 + __attribute__ ((noinline)) 559 + static bool get_packet_dst(struct real_definition **real, 560 + struct packet_description *pckt, 561 + struct vip_meta *vip_info, 562 + bool is_ipv6, void *lru_map) 563 + { 564 + struct real_pos_lru new_dst_lru = { }; 565 + bool hash_16bytes = is_ipv6; 566 + __u32 *real_pos, hash, key; 567 + __u64 cur_time; 568 + 569 + if (vip_info->flags & (1 << 2)) 570 + hash_16bytes = 1; 571 + if (vip_info->flags & (1 << 3)) { 572 + pckt->flow.port16[0] = pckt->flow.port16[1]; 573 + memset(pckt->flow.srcv6, 0, 16); 574 + } 575 + hash = get_packet_hash(pckt, hash_16bytes); 576 + if (hash != 0x358459b7 /* jhash of ipv4 packet */ && 577 + hash != 0x2f4bc6bb /* jhash of ipv6 packet */) 578 + return 0; 579 + key = 2 * vip_info->vip_num + hash % 2; 580 + real_pos = bpf_map_lookup_elem(&ch_rings, &key); 581 + if (!real_pos) 582 + return 0; 583 + key = *real_pos; 584 + *real = bpf_map_lookup_elem(&reals, &key); 585 + if (!(*real)) 586 + return 0; 587 + if (!(vip_info->flags & (1 << 1))) { 588 + __u32 conn_rate_key = 512 + 2; 589 + struct lb_stats *conn_rate_stats = 590 + bpf_map_lookup_elem(&stats, &conn_rate_key); 591 + 592 + if (!conn_rate_stats) 593 + return 1; 594 + cur_time = bpf_ktime_get_ns(); 595 + if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) { 596 + conn_rate_stats->v1 = 1; 597 + conn_rate_stats->v2 = cur_time; 598 + } else { 599 + conn_rate_stats->v1 += 1; 600 + if (conn_rate_stats->v1 >= 1) 601 + return 1; 602 + } 603 + if (pckt->flow.proto == IPPROTO_UDP) 604 + new_dst_lru.atime = cur_time; 605 + new_dst_lru.pos = key; 606 + bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0); 607 + } 608 + return 1; 609 + } 610 + 611 + __attribute__ ((noinline)) 612 + static void connection_table_lookup(struct real_definition **real, 613 + struct packet_description *pckt, 614 + void *lru_map) 615 + { 616 + 617 + struct real_pos_lru *dst_lru; 618 + __u64 cur_time; 619 + __u32 key; 620 + 621 + dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow); 622 + if (!dst_lru) 623 + return; 624 + if (pckt->flow.proto == IPPROTO_UDP) { 625 + cur_time = bpf_ktime_get_ns(); 626 + if (cur_time - dst_lru->atime > 300000) 627 + return; 628 + dst_lru->atime = cur_time; 629 + } 630 + key = dst_lru->pos; 631 + *real = bpf_map_lookup_elem(&reals, &key); 632 + } 633 + 634 + /* don't believe your eyes! 635 + * below function has 6 arguments whereas bpf and llvm allow maximum of 5 636 + * but since it's _static_ llvm can optimize one argument away 637 + */ 638 + __attribute__ ((noinline)) 639 + static int process_l3_headers_v6(struct packet_description *pckt, 640 + __u8 *protocol, __u64 off, 641 + __u16 *pkt_bytes, void *data, 642 + void *data_end) 643 + { 644 + struct ipv6hdr *ip6h; 645 + __u64 iph_len; 646 + int action; 647 + 648 + ip6h = data + off; 649 + if (ip6h + 1 > data_end) 650 + return XDP_DROP; 651 + iph_len = sizeof(struct ipv6hdr); 652 + *protocol = ip6h->nexthdr; 653 + pckt->flow.proto = *protocol; 654 + *pkt_bytes = __builtin_bswap16(ip6h->payload_len); 655 + off += iph_len; 656 + if (*protocol == 45) { 657 + return XDP_DROP; 658 + } else if (*protocol == 59) { 659 + action = parse_icmpv6(data, data_end, off, pckt); 660 + if (action >= 0) 661 + return action; 662 + } else { 663 + memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16); 664 + memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16); 665 + } 666 + return -1; 667 + } 668 + 669 + __attribute__ ((noinline)) 670 + static int process_l3_headers_v4(struct packet_description *pckt, 671 + __u8 *protocol, __u64 off, 672 + __u16 *pkt_bytes, void *data, 673 + void *data_end) 674 + { 675 + struct iphdr *iph; 676 + __u64 iph_len; 677 + int action; 678 + 679 + iph = data + off; 680 + if (iph + 1 > data_end) 681 + return XDP_DROP; 682 + if (iph->ihl != 5) 683 + return XDP_DROP; 684 + *protocol = iph->protocol; 685 + pckt->flow.proto = *protocol; 686 + *pkt_bytes = __builtin_bswap16(iph->tot_len); 687 + off += 20; 688 + if (iph->frag_off & 65343) 689 + return XDP_DROP; 690 + if (*protocol == IPPROTO_ICMP) { 691 + action = parse_icmp(data, data_end, off, pckt); 692 + if (action >= 0) 693 + return action; 694 + } else { 695 + pckt->flow.src = iph->saddr; 696 + pckt->flow.dst = iph->daddr; 697 + } 698 + return -1; 699 + } 700 + 701 + __attribute__ ((noinline)) 702 + static int process_packet(void *data, __u64 off, void *data_end, 703 + bool is_ipv6, struct xdp_md *xdp) 704 + { 705 + 706 + struct real_definition *dst = NULL; 707 + struct packet_description pckt = { }; 708 + struct vip_definition vip = { }; 709 + struct lb_stats *data_stats; 710 + struct eth_hdr *eth = data; 711 + void *lru_map = &lru_cache; 712 + struct vip_meta *vip_info; 713 + __u32 lru_stats_key = 513; 714 + __u32 mac_addr_pos = 0; 715 + __u32 stats_key = 512; 716 + struct ctl_value *cval; 717 + __u16 pkt_bytes; 718 + __u64 iph_len; 719 + __u8 protocol; 720 + __u32 vip_num; 721 + int action; 722 + 723 + if (is_ipv6) 724 + action = process_l3_headers_v6(&pckt, &protocol, off, 725 + &pkt_bytes, data, data_end); 726 + else 727 + action = process_l3_headers_v4(&pckt, &protocol, off, 728 + &pkt_bytes, data, data_end); 729 + if (action >= 0) 730 + return action; 731 + protocol = pckt.flow.proto; 732 + if (protocol == IPPROTO_TCP) { 733 + if (!parse_tcp(data, data_end, is_ipv6, &pckt)) 734 + return XDP_DROP; 735 + } else if (protocol == IPPROTO_UDP) { 736 + if (!parse_udp(data, data_end, is_ipv6, &pckt)) 737 + return XDP_DROP; 738 + } else { 739 + return XDP_TX; 740 + } 741 + 742 + if (is_ipv6) 743 + memcpy(vip.vipv6, pckt.flow.dstv6, 16); 744 + else 745 + vip.vip = pckt.flow.dst; 746 + vip.port = pckt.flow.port16[1]; 747 + vip.proto = pckt.flow.proto; 748 + vip_info = bpf_map_lookup_elem(&vip_map, &vip); 749 + if (!vip_info) { 750 + vip.port = 0; 751 + vip_info = bpf_map_lookup_elem(&vip_map, &vip); 752 + if (!vip_info) 753 + return XDP_PASS; 754 + if (!(vip_info->flags & (1 << 4))) 755 + pckt.flow.port16[1] = 0; 756 + } 757 + if (data_end - data > 1400) 758 + return XDP_DROP; 759 + data_stats = bpf_map_lookup_elem(&stats, &stats_key); 760 + if (!data_stats) 761 + return XDP_DROP; 762 + data_stats->v1 += 1; 763 + if (!dst) { 764 + if (vip_info->flags & (1 << 0)) 765 + pckt.flow.port16[0] = 0; 766 + if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1))) 767 + connection_table_lookup(&dst, &pckt, lru_map); 768 + if (dst) 769 + goto out; 770 + if (pckt.flow.proto == IPPROTO_TCP) { 771 + struct lb_stats *lru_stats = 772 + bpf_map_lookup_elem(&stats, &lru_stats_key); 773 + 774 + if (!lru_stats) 775 + return XDP_DROP; 776 + if (pckt.flags & (1 << 1)) 777 + lru_stats->v1 += 1; 778 + else 779 + lru_stats->v2 += 1; 780 + } 781 + if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map)) 782 + return XDP_DROP; 783 + data_stats->v2 += 1; 784 + } 785 + out: 786 + cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos); 787 + if (!cval) 788 + return XDP_DROP; 789 + if (dst->flags & (1 << 0)) { 790 + if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes)) 791 + return XDP_DROP; 792 + } else { 793 + if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) 794 + return XDP_DROP; 795 + } 796 + vip_num = vip_info->vip_num; 797 + data_stats = bpf_map_lookup_elem(&stats, &vip_num); 798 + if (!data_stats) 799 + return XDP_DROP; 800 + data_stats->v1 += 1; 801 + data_stats->v2 += pkt_bytes; 802 + 803 + data = (void *)(long)xdp->data; 804 + data_end = (void *)(long)xdp->data_end; 805 + if (data + 4 > data_end) 806 + return XDP_DROP; 807 + *(u32 *)data = dst->dst; 808 + return XDP_DROP; 809 + } 810 + 811 + __attribute__ ((section("xdp-test"), used)) 812 + int balancer_ingress(struct xdp_md *ctx) 813 + { 814 + void *data = (void *)(long)ctx->data; 815 + void *data_end = (void *)(long)ctx->data_end; 816 + struct eth_hdr *eth = data; 817 + __u32 eth_proto; 818 + __u32 nh_off; 819 + 820 + nh_off = sizeof(struct eth_hdr); 821 + if (data + nh_off > data_end) 822 + return XDP_DROP; 823 + eth_proto = eth->eth_proto; 824 + if (eth_proto == 8) 825 + return process_packet(data, nh_off, data_end, 0, ctx); 826 + else if (eth_proto == 56710) 827 + return process_packet(data, nh_off, data_end, 1, ctx); 828 + else 829 + return XDP_DROP; 830 + } 831 + 832 + char _license[] __attribute__ ((section("license"), used)) = "GPL"; 833 + int _version __attribute__ ((section("version"), used)) = 1;