Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v5.15 541 lines 12 kB view raw
1/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) 2 * 3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 4 */ 5#include "vmlinux.h" 6#include "xdp_sample.bpf.h" 7#include "xdp_sample_shared.h" 8#include "hash_func01.h" 9 10/* Special map type that can XDP_REDIRECT frames to another CPU */ 11struct { 12 __uint(type, BPF_MAP_TYPE_CPUMAP); 13 __uint(key_size, sizeof(u32)); 14 __uint(value_size, sizeof(struct bpf_cpumap_val)); 15} cpu_map SEC(".maps"); 16 17/* Set of maps controlling available CPU, and for iterating through 18 * selectable redirect CPUs. 19 */ 20struct { 21 __uint(type, BPF_MAP_TYPE_ARRAY); 22 __type(key, u32); 23 __type(value, u32); 24} cpus_available SEC(".maps"); 25 26struct { 27 __uint(type, BPF_MAP_TYPE_ARRAY); 28 __type(key, u32); 29 __type(value, u32); 30 __uint(max_entries, 1); 31} cpus_count SEC(".maps"); 32 33struct { 34 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 35 __type(key, u32); 36 __type(value, u32); 37 __uint(max_entries, 1); 38} cpus_iterator SEC(".maps"); 39 40struct { 41 __uint(type, BPF_MAP_TYPE_DEVMAP); 42 __uint(key_size, sizeof(int)); 43 __uint(value_size, sizeof(struct bpf_devmap_val)); 44 __uint(max_entries, 1); 45} tx_port SEC(".maps"); 46 47char tx_mac_addr[ETH_ALEN]; 48 49/* Helper parse functions */ 50 51static __always_inline 52bool parse_eth(struct ethhdr *eth, void *data_end, 53 u16 *eth_proto, u64 *l3_offset) 54{ 55 u16 eth_type; 56 u64 offset; 57 58 offset = sizeof(*eth); 59 if ((void *)eth + offset > data_end) 60 return false; 61 62 eth_type = eth->h_proto; 63 64 /* Skip non 802.3 Ethertypes */ 65 if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0)) 66 return false; 67 68 /* Handle VLAN tagged packet */ 69 if (eth_type == bpf_htons(ETH_P_8021Q) || 70 eth_type == bpf_htons(ETH_P_8021AD)) { 71 struct vlan_hdr *vlan_hdr; 72 73 vlan_hdr = (void *)eth + offset; 74 offset += sizeof(*vlan_hdr); 75 if ((void *)eth + offset > data_end) 76 return false; 77 eth_type = vlan_hdr->h_vlan_encapsulated_proto; 78 } 79 /* Handle double VLAN tagged packet */ 80 if (eth_type == bpf_htons(ETH_P_8021Q) || 81 eth_type == bpf_htons(ETH_P_8021AD)) { 82 struct vlan_hdr *vlan_hdr; 83 84 vlan_hdr = (void *)eth + offset; 85 offset += sizeof(*vlan_hdr); 86 if ((void *)eth + offset > data_end) 87 return false; 88 eth_type = vlan_hdr->h_vlan_encapsulated_proto; 89 } 90 91 *eth_proto = bpf_ntohs(eth_type); 92 *l3_offset = offset; 93 return true; 94} 95 96static __always_inline 97u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) 98{ 99 void *data_end = (void *)(long)ctx->data_end; 100 void *data = (void *)(long)ctx->data; 101 struct iphdr *iph = data + nh_off; 102 struct udphdr *udph; 103 u16 dport; 104 105 if (iph + 1 > data_end) 106 return 0; 107 if (!(iph->protocol == IPPROTO_UDP)) 108 return 0; 109 110 udph = (void *)(iph + 1); 111 if (udph + 1 > data_end) 112 return 0; 113 114 dport = bpf_ntohs(udph->dest); 115 return dport; 116} 117 118static __always_inline 119int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) 120{ 121 void *data_end = (void *)(long)ctx->data_end; 122 void *data = (void *)(long)ctx->data; 123 struct iphdr *iph = data + nh_off; 124 125 if (iph + 1 > data_end) 126 return 0; 127 return iph->protocol; 128} 129 130static __always_inline 131int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) 132{ 133 void *data_end = (void *)(long)ctx->data_end; 134 void *data = (void *)(long)ctx->data; 135 struct ipv6hdr *ip6h = data + nh_off; 136 137 if (ip6h + 1 > data_end) 138 return 0; 139 return ip6h->nexthdr; 140} 141 142SEC("xdp") 143int xdp_prognum0_no_touch(struct xdp_md *ctx) 144{ 145 u32 key = bpf_get_smp_processor_id(); 146 struct datarec *rec; 147 u32 *cpu_selected; 148 u32 cpu_dest = 0; 149 u32 key0 = 0; 150 151 /* Only use first entry in cpus_available */ 152 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 153 if (!cpu_selected) 154 return XDP_ABORTED; 155 cpu_dest = *cpu_selected; 156 157 rec = bpf_map_lookup_elem(&rx_cnt, &key); 158 if (!rec) 159 return XDP_PASS; 160 NO_TEAR_INC(rec->processed); 161 162 if (cpu_dest >= nr_cpus) { 163 NO_TEAR_INC(rec->issue); 164 return XDP_ABORTED; 165 } 166 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 167} 168 169SEC("xdp") 170int xdp_prognum1_touch_data(struct xdp_md *ctx) 171{ 172 void *data_end = (void *)(long)ctx->data_end; 173 void *data = (void *)(long)ctx->data; 174 u32 key = bpf_get_smp_processor_id(); 175 struct ethhdr *eth = data; 176 struct datarec *rec; 177 u32 *cpu_selected; 178 u32 cpu_dest = 0; 179 u32 key0 = 0; 180 u16 eth_type; 181 182 /* Only use first entry in cpus_available */ 183 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 184 if (!cpu_selected) 185 return XDP_ABORTED; 186 cpu_dest = *cpu_selected; 187 188 /* Validate packet length is minimum Eth header size */ 189 if (eth + 1 > data_end) 190 return XDP_ABORTED; 191 192 rec = bpf_map_lookup_elem(&rx_cnt, &key); 193 if (!rec) 194 return XDP_PASS; 195 NO_TEAR_INC(rec->processed); 196 197 /* Read packet data, and use it (drop non 802.3 Ethertypes) */ 198 eth_type = eth->h_proto; 199 if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) { 200 NO_TEAR_INC(rec->dropped); 201 return XDP_DROP; 202 } 203 204 if (cpu_dest >= nr_cpus) { 205 NO_TEAR_INC(rec->issue); 206 return XDP_ABORTED; 207 } 208 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 209} 210 211SEC("xdp") 212int xdp_prognum2_round_robin(struct xdp_md *ctx) 213{ 214 void *data_end = (void *)(long)ctx->data_end; 215 void *data = (void *)(long)ctx->data; 216 u32 key = bpf_get_smp_processor_id(); 217 struct datarec *rec; 218 u32 cpu_dest = 0; 219 u32 key0 = 0; 220 221 u32 *cpu_selected; 222 u32 *cpu_iterator; 223 u32 *cpu_max; 224 u32 cpu_idx; 225 226 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 227 if (!cpu_max) 228 return XDP_ABORTED; 229 230 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); 231 if (!cpu_iterator) 232 return XDP_ABORTED; 233 cpu_idx = *cpu_iterator; 234 235 *cpu_iterator += 1; 236 if (*cpu_iterator == *cpu_max) 237 *cpu_iterator = 0; 238 239 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 240 if (!cpu_selected) 241 return XDP_ABORTED; 242 cpu_dest = *cpu_selected; 243 244 rec = bpf_map_lookup_elem(&rx_cnt, &key); 245 if (!rec) 246 return XDP_PASS; 247 NO_TEAR_INC(rec->processed); 248 249 if (cpu_dest >= nr_cpus) { 250 NO_TEAR_INC(rec->issue); 251 return XDP_ABORTED; 252 } 253 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 254} 255 256SEC("xdp") 257int xdp_prognum3_proto_separate(struct xdp_md *ctx) 258{ 259 void *data_end = (void *)(long)ctx->data_end; 260 void *data = (void *)(long)ctx->data; 261 u32 key = bpf_get_smp_processor_id(); 262 struct ethhdr *eth = data; 263 u8 ip_proto = IPPROTO_UDP; 264 struct datarec *rec; 265 u16 eth_proto = 0; 266 u64 l3_offset = 0; 267 u32 cpu_dest = 0; 268 u32 *cpu_lookup; 269 u32 cpu_idx = 0; 270 271 rec = bpf_map_lookup_elem(&rx_cnt, &key); 272 if (!rec) 273 return XDP_PASS; 274 NO_TEAR_INC(rec->processed); 275 276 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 277 return XDP_PASS; /* Just skip */ 278 279 /* Extract L4 protocol */ 280 switch (eth_proto) { 281 case ETH_P_IP: 282 ip_proto = get_proto_ipv4(ctx, l3_offset); 283 break; 284 case ETH_P_IPV6: 285 ip_proto = get_proto_ipv6(ctx, l3_offset); 286 break; 287 case ETH_P_ARP: 288 cpu_idx = 0; /* ARP packet handled on separate CPU */ 289 break; 290 default: 291 cpu_idx = 0; 292 } 293 294 /* Choose CPU based on L4 protocol */ 295 switch (ip_proto) { 296 case IPPROTO_ICMP: 297 case IPPROTO_ICMPV6: 298 cpu_idx = 2; 299 break; 300 case IPPROTO_TCP: 301 cpu_idx = 0; 302 break; 303 case IPPROTO_UDP: 304 cpu_idx = 1; 305 break; 306 default: 307 cpu_idx = 0; 308 } 309 310 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 311 if (!cpu_lookup) 312 return XDP_ABORTED; 313 cpu_dest = *cpu_lookup; 314 315 if (cpu_dest >= nr_cpus) { 316 NO_TEAR_INC(rec->issue); 317 return XDP_ABORTED; 318 } 319 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 320} 321 322SEC("xdp") 323int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) 324{ 325 void *data_end = (void *)(long)ctx->data_end; 326 void *data = (void *)(long)ctx->data; 327 u32 key = bpf_get_smp_processor_id(); 328 struct ethhdr *eth = data; 329 u8 ip_proto = IPPROTO_UDP; 330 struct datarec *rec; 331 u16 eth_proto = 0; 332 u64 l3_offset = 0; 333 u32 cpu_dest = 0; 334 u32 *cpu_lookup; 335 u32 cpu_idx = 0; 336 u16 dest_port; 337 338 rec = bpf_map_lookup_elem(&rx_cnt, &key); 339 if (!rec) 340 return XDP_PASS; 341 NO_TEAR_INC(rec->processed); 342 343 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 344 return XDP_PASS; /* Just skip */ 345 346 /* Extract L4 protocol */ 347 switch (eth_proto) { 348 case ETH_P_IP: 349 ip_proto = get_proto_ipv4(ctx, l3_offset); 350 break; 351 case ETH_P_IPV6: 352 ip_proto = get_proto_ipv6(ctx, l3_offset); 353 break; 354 case ETH_P_ARP: 355 cpu_idx = 0; /* ARP packet handled on separate CPU */ 356 break; 357 default: 358 cpu_idx = 0; 359 } 360 361 /* Choose CPU based on L4 protocol */ 362 switch (ip_proto) { 363 case IPPROTO_ICMP: 364 case IPPROTO_ICMPV6: 365 cpu_idx = 2; 366 break; 367 case IPPROTO_TCP: 368 cpu_idx = 0; 369 break; 370 case IPPROTO_UDP: 371 cpu_idx = 1; 372 /* DDoS filter UDP port 9 (pktgen) */ 373 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); 374 if (dest_port == 9) { 375 NO_TEAR_INC(rec->dropped); 376 return XDP_DROP; 377 } 378 break; 379 default: 380 cpu_idx = 0; 381 } 382 383 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 384 if (!cpu_lookup) 385 return XDP_ABORTED; 386 cpu_dest = *cpu_lookup; 387 388 if (cpu_dest >= nr_cpus) { 389 NO_TEAR_INC(rec->issue); 390 return XDP_ABORTED; 391 } 392 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 393} 394 395/* Hashing initval */ 396#define INITVAL 15485863 397 398static __always_inline 399u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 400{ 401 void *data_end = (void *)(long)ctx->data_end; 402 void *data = (void *)(long)ctx->data; 403 struct iphdr *iph = data + nh_off; 404 u32 cpu_hash; 405 406 if (iph + 1 > data_end) 407 return 0; 408 409 cpu_hash = iph->saddr + iph->daddr; 410 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); 411 412 return cpu_hash; 413} 414 415static __always_inline 416u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 417{ 418 void *data_end = (void *)(long)ctx->data_end; 419 void *data = (void *)(long)ctx->data; 420 struct ipv6hdr *ip6h = data + nh_off; 421 u32 cpu_hash; 422 423 if (ip6h + 1 > data_end) 424 return 0; 425 426 cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0]; 427 cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1]; 428 cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2]; 429 cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3]; 430 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); 431 432 return cpu_hash; 433} 434 435/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The 436 * hashing scheme is symmetric, meaning swapping IP src/dest still hit 437 * same CPU. 438 */ 439SEC("xdp") 440int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) 441{ 442 void *data_end = (void *)(long)ctx->data_end; 443 void *data = (void *)(long)ctx->data; 444 u32 key = bpf_get_smp_processor_id(); 445 struct ethhdr *eth = data; 446 struct datarec *rec; 447 u16 eth_proto = 0; 448 u64 l3_offset = 0; 449 u32 cpu_dest = 0; 450 u32 cpu_idx = 0; 451 u32 *cpu_lookup; 452 u32 key0 = 0; 453 u32 *cpu_max; 454 u32 cpu_hash; 455 456 rec = bpf_map_lookup_elem(&rx_cnt, &key); 457 if (!rec) 458 return XDP_PASS; 459 NO_TEAR_INC(rec->processed); 460 461 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 462 if (!cpu_max) 463 return XDP_ABORTED; 464 465 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 466 return XDP_PASS; /* Just skip */ 467 468 /* Hash for IPv4 and IPv6 */ 469 switch (eth_proto) { 470 case ETH_P_IP: 471 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); 472 break; 473 case ETH_P_IPV6: 474 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); 475 break; 476 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ 477 default: 478 cpu_hash = 0; 479 } 480 481 /* Choose CPU based on hash */ 482 cpu_idx = cpu_hash % *cpu_max; 483 484 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 485 if (!cpu_lookup) 486 return XDP_ABORTED; 487 cpu_dest = *cpu_lookup; 488 489 if (cpu_dest >= nr_cpus) { 490 NO_TEAR_INC(rec->issue); 491 return XDP_ABORTED; 492 } 493 return bpf_redirect_map(&cpu_map, cpu_dest, 0); 494} 495 496SEC("xdp_cpumap/redirect") 497int xdp_redirect_cpu_devmap(struct xdp_md *ctx) 498{ 499 void *data_end = (void *)(long)ctx->data_end; 500 void *data = (void *)(long)ctx->data; 501 struct ethhdr *eth = data; 502 u64 nh_off; 503 504 nh_off = sizeof(*eth); 505 if (data + nh_off > data_end) 506 return XDP_DROP; 507 508 swap_src_dst_mac(data); 509 return bpf_redirect_map(&tx_port, 0, 0); 510} 511 512SEC("xdp_cpumap/pass") 513int xdp_redirect_cpu_pass(struct xdp_md *ctx) 514{ 515 return XDP_PASS; 516} 517 518SEC("xdp_cpumap/drop") 519int xdp_redirect_cpu_drop(struct xdp_md *ctx) 520{ 521 return XDP_DROP; 522} 523 524SEC("xdp_devmap/egress") 525int xdp_redirect_egress_prog(struct xdp_md *ctx) 526{ 527 void *data_end = (void *)(long)ctx->data_end; 528 void *data = (void *)(long)ctx->data; 529 struct ethhdr *eth = data; 530 u64 nh_off; 531 532 nh_off = sizeof(*eth); 533 if (data + nh_off > data_end) 534 return XDP_DROP; 535 536 __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); 537 538 return XDP_PASS; 539} 540 541char _license[] SEC("license") = "GPL";