Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

flow_dissector: implements eBPF parser

This eBPF program extracts basic/control/ip address/ports keys from
incoming packets. It supports recursive parsing for IP encapsulation,
and VLAN, along with IPv4/IPv6 and extension headers. This program is
meant to show how flow dissection and key extraction can be done in
eBPF.

Link: http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf
Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Petar Penkov and committed by
Alexei Starovoitov
9c98b13c c22fbae7

+374 -1
+1 -1
tools/testing/selftests/bpf/Makefile
··· 35 35 test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \ 36 36 test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \ 37 37 get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \ 38 - test_skb_cgroup_id_kern.o 38 + test_skb_cgroup_id_kern.o bpf_flow.o 39 39 40 40 # Order correspond to 'make run_tests' order 41 41 TEST_PROGS := test_kmod.sh \
+373
tools/testing/selftests/bpf/bpf_flow.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <limits.h> 3 + #include <stddef.h> 4 + #include <stdbool.h> 5 + #include <string.h> 6 + #include <linux/pkt_cls.h> 7 + #include <linux/bpf.h> 8 + #include <linux/in.h> 9 + #include <linux/if_ether.h> 10 + #include <linux/icmp.h> 11 + #include <linux/ip.h> 12 + #include <linux/ipv6.h> 13 + #include <linux/tcp.h> 14 + #include <linux/udp.h> 15 + #include <linux/if_packet.h> 16 + #include <sys/socket.h> 17 + #include <linux/if_tunnel.h> 18 + #include <linux/mpls.h> 19 + #include "bpf_helpers.h" 20 + #include "bpf_endian.h" 21 + 22 + int _version SEC("version") = 1; 23 + #define PROG(F) SEC(#F) int bpf_func_##F 24 + 25 + /* These are the identifiers of the BPF programs that will be used in tail 26 + * calls. Name is limited to 16 characters, with the terminating character and 27 + * bpf_func_ above, we have only 6 to work with, anything after will be cropped. 28 + */ 29 + enum { 30 + IP, 31 + IPV6, 32 + IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */ 33 + IPV6FR, /* Fragmentation IPv6 Extension Header */ 34 + MPLS, 35 + VLAN, 36 + }; 37 + 38 + #define IP_MF 0x2000 39 + #define IP_OFFSET 0x1FFF 40 + #define IP6_MF 0x0001 41 + #define IP6_OFFSET 0xFFF8 42 + 43 + struct vlan_hdr { 44 + __be16 h_vlan_TCI; 45 + __be16 h_vlan_encapsulated_proto; 46 + }; 47 + 48 + struct gre_hdr { 49 + __be16 flags; 50 + __be16 proto; 51 + }; 52 + 53 + struct frag_hdr { 54 + __u8 nexthdr; 55 + __u8 reserved; 56 + __be16 frag_off; 57 + __be32 identification; 58 + }; 59 + 60 + struct bpf_map_def SEC("maps") jmp_table = { 61 + .type = BPF_MAP_TYPE_PROG_ARRAY, 62 + .key_size = sizeof(__u32), 63 + .value_size = sizeof(__u32), 64 + .max_entries = 8 65 + }; 66 + 67 + static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, 68 + __u16 hdr_size, 69 + void *buffer) 70 + { 71 + void *data_end = (void *)(long)skb->data_end; 72 + void *data = (void *)(long)skb->data; 73 + __u16 nhoff = skb->flow_keys->nhoff; 74 + __u8 *hdr; 75 + 76 + /* Verifies this variable offset does not overflow */ 77 + if (nhoff > (USHRT_MAX - hdr_size)) 78 + return NULL; 79 + 80 + hdr = data + nhoff; 81 + if (hdr + hdr_size <= data_end) 82 + return hdr; 83 + 84 + if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size)) 85 + return NULL; 86 + 87 + return buffer; 88 + } 89 + 90 + /* Dispatches on ETHERTYPE */ 91 + static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) 92 + { 93 + struct bpf_flow_keys *keys = skb->flow_keys; 94 + 95 + keys->n_proto = proto; 96 + switch (proto) { 97 + case bpf_htons(ETH_P_IP): 98 + bpf_tail_call(skb, &jmp_table, IP); 99 + break; 100 + case bpf_htons(ETH_P_IPV6): 101 + bpf_tail_call(skb, &jmp_table, IPV6); 102 + break; 103 + case bpf_htons(ETH_P_MPLS_MC): 104 + case bpf_htons(ETH_P_MPLS_UC): 105 + bpf_tail_call(skb, &jmp_table, MPLS); 106 + break; 107 + case bpf_htons(ETH_P_8021Q): 108 + case bpf_htons(ETH_P_8021AD): 109 + bpf_tail_call(skb, &jmp_table, VLAN); 110 + break; 111 + default: 112 + /* Protocol not supported */ 113 + return BPF_DROP; 114 + } 115 + 116 + return BPF_DROP; 117 + } 118 + 119 + SEC("dissect") 120 + int dissect(struct __sk_buff *skb) 121 + { 122 + if (!skb->vlan_present) 123 + return parse_eth_proto(skb, skb->protocol); 124 + else 125 + return parse_eth_proto(skb, skb->vlan_proto); 126 + } 127 + 128 + /* Parses on IPPROTO_* */ 129 + static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) 130 + { 131 + struct bpf_flow_keys *keys = skb->flow_keys; 132 + void *data_end = (void *)(long)skb->data_end; 133 + struct icmphdr *icmp, _icmp; 134 + struct gre_hdr *gre, _gre; 135 + struct ethhdr *eth, _eth; 136 + struct tcphdr *tcp, _tcp; 137 + struct udphdr *udp, _udp; 138 + 139 + keys->ip_proto = proto; 140 + switch (proto) { 141 + case IPPROTO_ICMP: 142 + icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); 143 + if (!icmp) 144 + return BPF_DROP; 145 + return BPF_OK; 146 + case IPPROTO_IPIP: 147 + keys->is_encap = true; 148 + return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); 149 + case IPPROTO_IPV6: 150 + keys->is_encap = true; 151 + return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6)); 152 + case IPPROTO_GRE: 153 + gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); 154 + if (!gre) 155 + return BPF_DROP; 156 + 157 + if (bpf_htons(gre->flags & GRE_VERSION)) 158 + /* Only inspect standard GRE packets with version 0 */ 159 + return BPF_OK; 160 + 161 + keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */ 162 + if (GRE_IS_CSUM(gre->flags)) 163 + keys->nhoff += 4; /* Step over chksum and Padding */ 164 + if (GRE_IS_KEY(gre->flags)) 165 + keys->nhoff += 4; /* Step over key */ 166 + if (GRE_IS_SEQ(gre->flags)) 167 + keys->nhoff += 4; /* Step over sequence number */ 168 + 169 + keys->is_encap = true; 170 + 171 + if (gre->proto == bpf_htons(ETH_P_TEB)) { 172 + eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), 173 + &_eth); 174 + if (!eth) 175 + return BPF_DROP; 176 + 177 + keys->nhoff += sizeof(*eth); 178 + 179 + return parse_eth_proto(skb, eth->h_proto); 180 + } else { 181 + return parse_eth_proto(skb, gre->proto); 182 + } 183 + case IPPROTO_TCP: 184 + tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); 185 + if (!tcp) 186 + return BPF_DROP; 187 + 188 + if (tcp->doff < 5) 189 + return BPF_DROP; 190 + 191 + if ((__u8 *)tcp + (tcp->doff << 2) > data_end) 192 + return BPF_DROP; 193 + 194 + keys->thoff = keys->nhoff; 195 + keys->sport = tcp->source; 196 + keys->dport = tcp->dest; 197 + return BPF_OK; 198 + case IPPROTO_UDP: 199 + case IPPROTO_UDPLITE: 200 + udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); 201 + if (!udp) 202 + return BPF_DROP; 203 + 204 + keys->thoff = keys->nhoff; 205 + keys->sport = udp->source; 206 + keys->dport = udp->dest; 207 + return BPF_OK; 208 + default: 209 + return BPF_DROP; 210 + } 211 + 212 + return BPF_DROP; 213 + } 214 + 215 + static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) 216 + { 217 + struct bpf_flow_keys *keys = skb->flow_keys; 218 + 219 + keys->ip_proto = nexthdr; 220 + switch (nexthdr) { 221 + case IPPROTO_HOPOPTS: 222 + case IPPROTO_DSTOPTS: 223 + bpf_tail_call(skb, &jmp_table, IPV6OP); 224 + break; 225 + case IPPROTO_FRAGMENT: 226 + bpf_tail_call(skb, &jmp_table, IPV6FR); 227 + break; 228 + default: 229 + return parse_ip_proto(skb, nexthdr); 230 + } 231 + 232 + return BPF_DROP; 233 + } 234 + 235 + PROG(IP)(struct __sk_buff *skb) 236 + { 237 + void *data_end = (void *)(long)skb->data_end; 238 + struct bpf_flow_keys *keys = skb->flow_keys; 239 + void *data = (void *)(long)skb->data; 240 + struct iphdr *iph, _iph; 241 + bool done = false; 242 + 243 + iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); 244 + if (!iph) 245 + return BPF_DROP; 246 + 247 + /* IP header cannot be smaller than 20 bytes */ 248 + if (iph->ihl < 5) 249 + return BPF_DROP; 250 + 251 + keys->addr_proto = ETH_P_IP; 252 + keys->ipv4_src = iph->saddr; 253 + keys->ipv4_dst = iph->daddr; 254 + 255 + keys->nhoff += iph->ihl << 2; 256 + if (data + keys->nhoff > data_end) 257 + return BPF_DROP; 258 + 259 + if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { 260 + keys->is_frag = true; 261 + if (iph->frag_off & bpf_htons(IP_OFFSET)) 262 + /* From second fragment on, packets do not have headers 263 + * we can parse. 264 + */ 265 + done = true; 266 + else 267 + keys->is_first_frag = true; 268 + } 269 + 270 + if (done) 271 + return BPF_OK; 272 + 273 + return parse_ip_proto(skb, iph->protocol); 274 + } 275 + 276 + PROG(IPV6)(struct __sk_buff *skb) 277 + { 278 + struct bpf_flow_keys *keys = skb->flow_keys; 279 + struct ipv6hdr *ip6h, _ip6h; 280 + 281 + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 282 + if (!ip6h) 283 + return BPF_DROP; 284 + 285 + keys->addr_proto = ETH_P_IPV6; 286 + memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); 287 + 288 + keys->nhoff += sizeof(struct ipv6hdr); 289 + 290 + return parse_ipv6_proto(skb, ip6h->nexthdr); 291 + } 292 + 293 + PROG(IPV6OP)(struct __sk_buff *skb) 294 + { 295 + struct ipv6_opt_hdr *ip6h, _ip6h; 296 + 297 + ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); 298 + if (!ip6h) 299 + return BPF_DROP; 300 + 301 + /* hlen is in 8-octets and does not include the first 8 bytes 302 + * of the header 303 + */ 304 + skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3; 305 + 306 + return parse_ipv6_proto(skb, ip6h->nexthdr); 307 + } 308 + 309 + PROG(IPV6FR)(struct __sk_buff *skb) 310 + { 311 + struct bpf_flow_keys *keys = skb->flow_keys; 312 + struct frag_hdr *fragh, _fragh; 313 + 314 + fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); 315 + if (!fragh) 316 + return BPF_DROP; 317 + 318 + keys->nhoff += sizeof(*fragh); 319 + keys->is_frag = true; 320 + if (!(fragh->frag_off & bpf_htons(IP6_OFFSET))) 321 + keys->is_first_frag = true; 322 + 323 + return parse_ipv6_proto(skb, fragh->nexthdr); 324 + } 325 + 326 + PROG(MPLS)(struct __sk_buff *skb) 327 + { 328 + struct mpls_label *mpls, _mpls; 329 + 330 + mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); 331 + if (!mpls) 332 + return BPF_DROP; 333 + 334 + return BPF_OK; 335 + } 336 + 337 + PROG(VLAN)(struct __sk_buff *skb) 338 + { 339 + struct bpf_flow_keys *keys = skb->flow_keys; 340 + struct vlan_hdr *vlan, _vlan; 341 + __be16 proto; 342 + 343 + /* Peek back to see if single or double-tagging */ 344 + if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto, 345 + sizeof(proto))) 346 + return BPF_DROP; 347 + 348 + /* Account for double-tagging */ 349 + if (proto == bpf_htons(ETH_P_8021AD)) { 350 + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 351 + if (!vlan) 352 + return BPF_DROP; 353 + 354 + if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) 355 + return BPF_DROP; 356 + 357 + keys->nhoff += sizeof(*vlan); 358 + } 359 + 360 + vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); 361 + if (!vlan) 362 + return BPF_DROP; 363 + 364 + keys->nhoff += sizeof(*vlan); 365 + /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ 366 + if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || 367 + vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) 368 + return BPF_DROP; 369 + 370 + return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); 371 + } 372 + 373 + char __license[] SEC("license") = "GPL";