Merge branch 'fix-bpf_redirect' · tjh.dev/kernel@79774d6

+15

include/linux/netdevice.h

··· 3354 3354 bool is_skb_forwardable(const struct net_device *dev, 3355 3355 const struct sk_buff *skb); 3356 3356 3357 + static __always_inline int ____dev_forward_skb(struct net_device *dev, 3358 + struct sk_buff *skb) 3359 + { 3360 + if (skb_orphan_frags(skb, GFP_ATOMIC) || 3361 + unlikely(!is_skb_forwardable(dev, skb))) { 3362 + atomic_long_inc(&dev->rx_dropped); 3363 + kfree_skb(skb); 3364 + return NET_RX_DROP; 3365 + } 3366 + 3367 + skb_scrub_packet(skb, true); 3368 + skb->priority = 0; 3369 + return 0; 3370 + } 3371 + 3357 3372 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); 3358 3373 3359 3374 extern int netdev_budget;

+6 -11

net/core/dev.c

··· 1766 1766 1767 1767 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) 1768 1768 { 1769 - if (skb_orphan_frags(skb, GFP_ATOMIC) || 1770 - unlikely(!is_skb_forwardable(dev, skb))) { 1771 - atomic_long_inc(&dev->rx_dropped); 1772 - kfree_skb(skb); 1773 - return NET_RX_DROP; 1769 + int ret = ____dev_forward_skb(dev, skb); 1770 + 1771 + if (likely(!ret)) { 1772 + skb->protocol = eth_type_trans(skb, dev); 1773 + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 1774 1774 } 1775 1775 1776 - skb_scrub_packet(skb, true); 1777 - skb->priority = 0; 1778 - skb->protocol = eth_type_trans(skb, dev); 1779 - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 1780 - 1781 - return 0; 1776 + return ret; 1782 1777 } 1783 1778 EXPORT_SYMBOL_GPL(__dev_forward_skb); 1784 1779

+60 -8

net/core/filter.c

··· 1628 1628 return dev_forward_skb(dev, skb); 1629 1629 } 1630 1630 1631 + static inline int __bpf_rx_skb_no_mac(struct net_device *dev, 1632 + struct sk_buff *skb) 1633 + { 1634 + int ret = ____dev_forward_skb(dev, skb); 1635 + 1636 + if (likely(!ret)) { 1637 + skb->dev = dev; 1638 + ret = netif_rx(skb); 1639 + } 1640 + 1641 + return ret; 1642 + } 1643 + 1631 1644 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) 1632 1645 { 1633 1646 int ret; ··· 1658 1645 __this_cpu_dec(xmit_recursion); 1659 1646 1660 1647 return ret; 1648 + } 1649 + 1650 + static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev, 1651 + u32 flags) 1652 + { 1653 + /* skb->mac_len is not set on normal egress */ 1654 + unsigned int mlen = skb->network_header - skb->mac_header; 1655 + 1656 + __skb_pull(skb, mlen); 1657 + 1658 + /* At ingress, the mac header has already been pulled once. 1659 + * At egress, skb_pospull_rcsum has to be done in case that 1660 + * the skb is originated from ingress (i.e. a forwarded skb) 1661 + * to ensure that rcsum starts at net header. 1662 + */ 1663 + if (!skb_at_tc_ingress(skb)) 1664 + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen); 1665 + skb_pop_mac_header(skb); 1666 + skb_reset_mac_len(skb); 1667 + return flags & BPF_F_INGRESS ? 1668 + __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb); 1669 + } 1670 + 1671 + static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev, 1672 + u32 flags) 1673 + { 1674 + bpf_push_mac_rcsum(skb); 1675 + return flags & BPF_F_INGRESS ? 1676 + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); 1677 + } 1678 + 1679 + static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, 1680 + u32 flags) 1681 + { 1682 + switch (dev->type) { 1683 + case ARPHRD_TUNNEL: 1684 + case ARPHRD_TUNNEL6: 1685 + case ARPHRD_SIT: 1686 + case ARPHRD_IPGRE: 1687 + case ARPHRD_VOID: 1688 + case ARPHRD_NONE: 1689 + return __bpf_redirect_no_mac(skb, dev, flags); 1690 + default: 1691 + return __bpf_redirect_common(skb, dev, flags); 1692 + } 1661 1693 } 1662 1694 1663 1695 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) ··· 1733 1675 return -ENOMEM; 1734 1676 } 1735 1677 1736 - bpf_push_mac_rcsum(clone); 1737 - 1738 - return flags & BPF_F_INGRESS ? 1739 - __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); 1678 + return __bpf_redirect(clone, dev, flags); 1740 1679 } 1741 1680 1742 1681 static const struct bpf_func_proto bpf_clone_redirect_proto = { ··· 1777 1722 return -EINVAL; 1778 1723 } 1779 1724 1780 - bpf_push_mac_rcsum(skb); 1781 - 1782 - return ri->flags & BPF_F_INGRESS ? 1783 - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); 1725 + return __bpf_redirect(skb, dev, ri->flags); 1784 1726 } 1785 1727 1786 1728 static const struct bpf_func_proto bpf_redirect_proto = {

+4

samples/bpf/Makefile

··· 27 27 hostprogs-y += test_current_task_under_cgroup 28 28 hostprogs-y += trace_event 29 29 hostprogs-y += sampleip 30 + hostprogs-y += tc_l2_redirect 30 31 31 32 test_verifier-objs := test_verifier.o libbpf.o 32 33 test_maps-objs := test_maps.o libbpf.o ··· 57 56 test_current_task_under_cgroup_user.o 58 57 trace_event-objs := bpf_load.o libbpf.o trace_event_user.o 59 58 sampleip-objs := bpf_load.o libbpf.o sampleip_user.o 59 + tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o 60 60 61 61 # Tell kbuild to always build the programs 62 62 always := $(hostprogs-y) ··· 74 72 always += trace_output_kern.o 75 73 always += tcbpf1_kern.o 76 74 always += tcbpf2_kern.o 75 + always += tc_l2_redirect_kern.o 77 76 always += lathist_kern.o 78 77 always += offwaketime_kern.o 79 78 always += spintest_kern.o ··· 114 111 HOSTLOADLIBES_test_current_task_under_cgroup += -lelf 115 112 HOSTLOADLIBES_trace_event += -lelf 116 113 HOSTLOADLIBES_sampleip += -lelf 114 + HOSTLOADLIBES_tc_l2_redirect += -l elf 117 115 118 116 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: 119 117 # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang

+173

samples/bpf/tc_l2_redirect.sh

··· 1 + #!/bin/bash 2 + 3 + [[ -z $TC ]] && TC='tc' 4 + [[ -z $IP ]] && IP='ip' 5 + 6 + REDIRECT_USER='./tc_l2_redirect' 7 + REDIRECT_BPF='./tc_l2_redirect_kern.o' 8 + 9 + RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter) 10 + IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding) 11 + 12 + function config_common { 13 + local tun_type=$1 14 + 15 + $IP netns add ns1 16 + $IP netns add ns2 17 + $IP link add ve1 type veth peer name vens1 18 + $IP link add ve2 type veth peer name vens2 19 + $IP link set dev ve1 up 20 + $IP link set dev ve2 up 21 + $IP link set dev ve1 mtu 1500 22 + $IP link set dev ve2 mtu 1500 23 + $IP link set dev vens1 netns ns1 24 + $IP link set dev vens2 netns ns2 25 + 26 + $IP -n ns1 link set dev lo up 27 + $IP -n ns1 link set dev vens1 up 28 + $IP -n ns1 addr add 10.1.1.101/24 dev vens1 29 + $IP -n ns1 addr add 2401:db01::65/64 dev vens1 nodad 30 + $IP -n ns1 route add default via 10.1.1.1 dev vens1 31 + $IP -n ns1 route add default via 2401:db01::1 dev vens1 32 + 33 + $IP -n ns2 link set dev lo up 34 + $IP -n ns2 link set dev vens2 up 35 + $IP -n ns2 addr add 10.2.1.102/24 dev vens2 36 + $IP -n ns2 addr add 2401:db02::66/64 dev vens2 nodad 37 + $IP -n ns2 addr add 10.10.1.102 dev lo 38 + $IP -n ns2 addr add 2401:face::66/64 dev lo nodad 39 + $IP -n ns2 link add ipt2 type ipip local 10.2.1.102 remote 10.2.1.1 40 + $IP -n ns2 link add ip6t2 type ip6tnl mode any local 2401:db02::66 remote 2401:db02::1 41 + $IP -n ns2 link set dev ipt2 up 42 + $IP -n ns2 link set dev ip6t2 up 43 + $IP netns exec ns2 $TC qdisc add dev vens2 clsact 44 + $IP netns exec ns2 $TC filter add dev vens2 ingress bpf da obj $REDIRECT_BPF sec drop_non_tun_vip 45 + if [[ $tun_type == "ipip" ]]; then 46 + $IP -n ns2 route add 10.1.1.0/24 dev ipt2 47 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 48 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ipt2.rp_filter=0 49 + else 50 + $IP -n ns2 route add 10.1.1.0/24 dev ip6t2 51 + $IP -n ns2 route add 2401:db01::/64 dev ip6t2 52 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.all.rp_filter=0 53 + $IP netns exec ns2 sysctl -q -w net.ipv4.conf.ip6t2.rp_filter=0 54 + fi 55 + 56 + $IP addr add 10.1.1.1/24 dev ve1 57 + $IP addr add 2401:db01::1/64 dev ve1 nodad 58 + $IP addr add 10.2.1.1/24 dev ve2 59 + $IP addr add 2401:db02::1/64 dev ve2 nodad 60 + 61 + $TC qdisc add dev ve2 clsact 62 + $TC filter add dev ve2 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_forward 63 + 64 + sysctl -q -w net.ipv4.conf.all.rp_filter=0 65 + sysctl -q -w net.ipv6.conf.all.forwarding=1 66 + } 67 + 68 + function cleanup { 69 + set +e 70 + [[ -z $DEBUG ]] || set +x 71 + $IP netns delete ns1 >& /dev/null 72 + $IP netns delete ns2 >& /dev/null 73 + $IP link del ve1 >& /dev/null 74 + $IP link del ve2 >& /dev/null 75 + $IP link del ipt >& /dev/null 76 + $IP link del ip6t >& /dev/null 77 + sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER 78 + sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING 79 + rm -f /sys/fs/bpf/tc/globals/tun_iface 80 + [[ -z $DEBUG ]] || set -x 81 + set -e 82 + } 83 + 84 + function l2_to_ipip { 85 + echo -n "l2_to_ipip $1: " 86 + 87 + local dir=$1 88 + 89 + config_common ipip 90 + 91 + $IP link add ipt type ipip external 92 + $IP link set dev ipt up 93 + sysctl -q -w net.ipv4.conf.ipt.rp_filter=0 94 + sysctl -q -w net.ipv4.conf.ipt.forwarding=1 95 + 96 + if [[ $dir == "egress" ]]; then 97 + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 98 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect 99 + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 100 + else 101 + $TC qdisc add dev ve1 clsact 102 + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_iptun_ingress_redirect 103 + fi 104 + 105 + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ipt/ifindex) 106 + 107 + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null 108 + 109 + if [[ $dir == "egress" ]]; then 110 + # test direct egress to ve2 (i.e. not forwarding from 111 + # ve1 to ve2). 112 + ping -c1 10.10.1.102 >& /dev/null 113 + fi 114 + 115 + cleanup 116 + 117 + echo "OK" 118 + } 119 + 120 + function l2_to_ip6tnl { 121 + echo -n "l2_to_ip6tnl $1: " 122 + 123 + local dir=$1 124 + 125 + config_common ip6tnl 126 + 127 + $IP link add ip6t type ip6tnl mode any external 128 + $IP link set dev ip6t up 129 + sysctl -q -w net.ipv4.conf.ip6t.rp_filter=0 130 + sysctl -q -w net.ipv4.conf.ip6t.forwarding=1 131 + 132 + if [[ $dir == "egress" ]]; then 133 + $IP route add 10.10.1.0/24 via 10.2.1.102 dev ve2 134 + $IP route add 2401:face::/64 via 2401:db02::66 dev ve2 135 + $TC filter add dev ve2 egress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect 136 + sysctl -q -w net.ipv4.conf.ve1.forwarding=1 137 + else 138 + $TC qdisc add dev ve1 clsact 139 + $TC filter add dev ve1 ingress bpf da obj $REDIRECT_BPF sec l2_to_ip6tun_ingress_redirect 140 + fi 141 + 142 + $REDIRECT_USER -U /sys/fs/bpf/tc/globals/tun_iface -i $(< /sys/class/net/ip6t/ifindex) 143 + 144 + $IP netns exec ns1 ping -c1 10.10.1.102 >& /dev/null 145 + $IP netns exec ns1 ping -6 -c1 2401:face::66 >& /dev/null 146 + 147 + if [[ $dir == "egress" ]]; then 148 + # test direct egress to ve2 (i.e. not forwarding from 149 + # ve1 to ve2). 150 + ping -c1 10.10.1.102 >& /dev/null 151 + ping -6 -c1 2401:face::66 >& /dev/null 152 + fi 153 + 154 + cleanup 155 + 156 + echo "OK" 157 + } 158 + 159 + cleanup 160 + test_names="l2_to_ipip l2_to_ip6tnl" 161 + test_dirs="ingress egress" 162 + if [[ $# -ge 2 ]]; then 163 + test_names=$1 164 + test_dirs=$2 165 + elif [[ $# -ge 1 ]]; then 166 + test_names=$1 167 + fi 168 + 169 + for t in $test_names; do 170 + for d in $test_dirs; do 171 + $t $d 172 + done 173 + done

+236

samples/bpf/tc_l2_redirect_kern.c

··· 1 + /* Copyright (c) 2016 Facebook 2 + * 3 + * This program is free software; you can redistribute it and/or 4 + * modify it under the terms of version 2 of the GNU General Public 5 + * License as published by the Free Software Foundation. 6 + */ 7 + #include <uapi/linux/bpf.h> 8 + #include <uapi/linux/if_ether.h> 9 + #include <uapi/linux/if_packet.h> 10 + #include <uapi/linux/ip.h> 11 + #include <uapi/linux/ipv6.h> 12 + #include <uapi/linux/in.h> 13 + #include <uapi/linux/tcp.h> 14 + #include <uapi/linux/filter.h> 15 + #include <uapi/linux/pkt_cls.h> 16 + #include <net/ipv6.h> 17 + #include "bpf_helpers.h" 18 + 19 + #define _htonl __builtin_bswap32 20 + 21 + #define PIN_GLOBAL_NS 2 22 + struct bpf_elf_map { 23 + __u32 type; 24 + __u32 size_key; 25 + __u32 size_value; 26 + __u32 max_elem; 27 + __u32 flags; 28 + __u32 id; 29 + __u32 pinning; 30 + }; 31 + 32 + /* copy of 'struct ethhdr' without __packed */ 33 + struct eth_hdr { 34 + unsigned char h_dest[ETH_ALEN]; 35 + unsigned char h_source[ETH_ALEN]; 36 + unsigned short h_proto; 37 + }; 38 + 39 + struct bpf_elf_map SEC("maps") tun_iface = { 40 + .type = BPF_MAP_TYPE_ARRAY, 41 + .size_key = sizeof(int), 42 + .size_value = sizeof(int), 43 + .pinning = PIN_GLOBAL_NS, 44 + .max_elem = 1, 45 + }; 46 + 47 + static __always_inline bool is_vip_addr(__be16 eth_proto, __be32 daddr) 48 + { 49 + if (eth_proto == htons(ETH_P_IP)) 50 + return (_htonl(0xffffff00) & daddr) == _htonl(0x0a0a0100); 51 + else if (eth_proto == htons(ETH_P_IPV6)) 52 + return (daddr == _htonl(0x2401face)); 53 + 54 + return false; 55 + } 56 + 57 + SEC("l2_to_iptun_ingress_forward") 58 + int _l2_to_iptun_ingress_forward(struct __sk_buff *skb) 59 + { 60 + struct bpf_tunnel_key tkey = {}; 61 + void *data = (void *)(long)skb->data; 62 + struct eth_hdr *eth = data; 63 + void *data_end = (void *)(long)skb->data_end; 64 + int key = 0, *ifindex; 65 + 66 + int ret; 67 + 68 + if (data + sizeof(*eth) > data_end) 69 + return TC_ACT_OK; 70 + 71 + ifindex = bpf_map_lookup_elem(&tun_iface, &key); 72 + if (!ifindex) 73 + return TC_ACT_OK; 74 + 75 + if (eth->h_proto == htons(ETH_P_IP)) { 76 + char fmt4[] = "ingress forward to ifindex:%d daddr4:%x\n"; 77 + struct iphdr *iph = data + sizeof(*eth); 78 + 79 + if (data + sizeof(*eth) + sizeof(*iph) > data_end) 80 + return TC_ACT_OK; 81 + 82 + if (iph->protocol != IPPROTO_IPIP) 83 + return TC_ACT_OK; 84 + 85 + bpf_trace_printk(fmt4, sizeof(fmt4), *ifindex, 86 + _htonl(iph->daddr)); 87 + return bpf_redirect(*ifindex, BPF_F_INGRESS); 88 + } else if (eth->h_proto == htons(ETH_P_IPV6)) { 89 + char fmt6[] = "ingress forward to ifindex:%d daddr6:%x::%x\n"; 90 + struct ipv6hdr *ip6h = data + sizeof(*eth); 91 + 92 + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) 93 + return TC_ACT_OK; 94 + 95 + if (ip6h->nexthdr != IPPROTO_IPIP && 96 + ip6h->nexthdr != IPPROTO_IPV6) 97 + return TC_ACT_OK; 98 + 99 + bpf_trace_printk(fmt6, sizeof(fmt6), *ifindex, 100 + _htonl(ip6h->daddr.s6_addr32[0]), 101 + _htonl(ip6h->daddr.s6_addr32[3])); 102 + return bpf_redirect(*ifindex, BPF_F_INGRESS); 103 + } 104 + 105 + return TC_ACT_OK; 106 + } 107 + 108 + SEC("l2_to_iptun_ingress_redirect") 109 + int _l2_to_iptun_ingress_redirect(struct __sk_buff *skb) 110 + { 111 + struct bpf_tunnel_key tkey = {}; 112 + void *data = (void *)(long)skb->data; 113 + struct eth_hdr *eth = data; 114 + void *data_end = (void *)(long)skb->data_end; 115 + int key = 0, *ifindex; 116 + 117 + int ret; 118 + 119 + if (data + sizeof(*eth) > data_end) 120 + return TC_ACT_OK; 121 + 122 + ifindex = bpf_map_lookup_elem(&tun_iface, &key); 123 + if (!ifindex) 124 + return TC_ACT_OK; 125 + 126 + if (eth->h_proto == htons(ETH_P_IP)) { 127 + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; 128 + struct iphdr *iph = data + sizeof(*eth); 129 + __be32 daddr = iph->daddr; 130 + 131 + if (data + sizeof(*eth) + sizeof(*iph) > data_end) 132 + return TC_ACT_OK; 133 + 134 + if (!is_vip_addr(eth->h_proto, daddr)) 135 + return TC_ACT_OK; 136 + 137 + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(daddr), *ifindex); 138 + } else { 139 + return TC_ACT_OK; 140 + } 141 + 142 + tkey.tunnel_id = 10000; 143 + tkey.tunnel_ttl = 64; 144 + tkey.remote_ipv4 = 0x0a020166; /* 10.2.1.102 */ 145 + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), 0); 146 + return bpf_redirect(*ifindex, 0); 147 + } 148 + 149 + SEC("l2_to_ip6tun_ingress_redirect") 150 + int _l2_to_ip6tun_ingress_redirect(struct __sk_buff *skb) 151 + { 152 + struct bpf_tunnel_key tkey = {}; 153 + void *data = (void *)(long)skb->data; 154 + struct eth_hdr *eth = data; 155 + void *data_end = (void *)(long)skb->data_end; 156 + int key = 0, *ifindex; 157 + 158 + if (data + sizeof(*eth) > data_end) 159 + return TC_ACT_OK; 160 + 161 + ifindex = bpf_map_lookup_elem(&tun_iface, &key); 162 + if (!ifindex) 163 + return TC_ACT_OK; 164 + 165 + if (eth->h_proto == htons(ETH_P_IP)) { 166 + char fmt4[] = "e/ingress redirect daddr4:%x to ifindex:%d\n"; 167 + struct iphdr *iph = data + sizeof(*eth); 168 + 169 + if (data + sizeof(*eth) + sizeof(*iph) > data_end) 170 + return TC_ACT_OK; 171 + 172 + if (!is_vip_addr(eth->h_proto, iph->daddr)) 173 + return TC_ACT_OK; 174 + 175 + bpf_trace_printk(fmt4, sizeof(fmt4), _htonl(iph->daddr), 176 + *ifindex); 177 + } else if (eth->h_proto == htons(ETH_P_IPV6)) { 178 + char fmt6[] = "e/ingress redirect daddr6:%x to ifindex:%d\n"; 179 + struct ipv6hdr *ip6h = data + sizeof(*eth); 180 + 181 + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) 182 + return TC_ACT_OK; 183 + 184 + if (!is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) 185 + return TC_ACT_OK; 186 + 187 + bpf_trace_printk(fmt6, sizeof(fmt6), 188 + _htonl(ip6h->daddr.s6_addr32[0]), *ifindex); 189 + } else { 190 + return TC_ACT_OK; 191 + } 192 + 193 + tkey.tunnel_id = 10000; 194 + tkey.tunnel_ttl = 64; 195 + /* 2401:db02:0:0:0:0:0:66 */ 196 + tkey.remote_ipv6[0] = _htonl(0x2401db02); 197 + tkey.remote_ipv6[1] = 0; 198 + tkey.remote_ipv6[2] = 0; 199 + tkey.remote_ipv6[3] = _htonl(0x00000066); 200 + bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), BPF_F_TUNINFO_IPV6); 201 + return bpf_redirect(*ifindex, 0); 202 + } 203 + 204 + SEC("drop_non_tun_vip") 205 + int _drop_non_tun_vip(struct __sk_buff *skb) 206 + { 207 + struct bpf_tunnel_key tkey = {}; 208 + void *data = (void *)(long)skb->data; 209 + struct eth_hdr *eth = data; 210 + void *data_end = (void *)(long)skb->data_end; 211 + 212 + if (data + sizeof(*eth) > data_end) 213 + return TC_ACT_OK; 214 + 215 + if (eth->h_proto == htons(ETH_P_IP)) { 216 + struct iphdr *iph = data + sizeof(*eth); 217 + 218 + if (data + sizeof(*eth) + sizeof(*iph) > data_end) 219 + return TC_ACT_OK; 220 + 221 + if (is_vip_addr(eth->h_proto, iph->daddr)) 222 + return TC_ACT_SHOT; 223 + } else if (eth->h_proto == htons(ETH_P_IPV6)) { 224 + struct ipv6hdr *ip6h = data + sizeof(*eth); 225 + 226 + if (data + sizeof(*eth) + sizeof(*ip6h) > data_end) 227 + return TC_ACT_OK; 228 + 229 + if (is_vip_addr(eth->h_proto, ip6h->daddr.s6_addr32[0])) 230 + return TC_ACT_SHOT; 231 + } 232 + 233 + return TC_ACT_OK; 234 + } 235 + 236 + char _license[] SEC("license") = "GPL";

+73

samples/bpf/tc_l2_redirect_user.c

··· 1 + /* Copyright (c) 2016 Facebook 2 + * 3 + * This program is free software; you can redistribute it and/or 4 + * modify it under the terms of version 2 of the GNU General Public 5 + * License as published by the Free Software Foundation. 6 + */ 7 + #include <linux/unistd.h> 8 + #include <linux/bpf.h> 9 + 10 + #include <stdlib.h> 11 + #include <stdio.h> 12 + #include <unistd.h> 13 + #include <string.h> 14 + #include <errno.h> 15 + 16 + #include "libbpf.h" 17 + 18 + static void usage(void) 19 + { 20 + printf("Usage: tc_l2_ipip_redirect [...]\n"); 21 + printf(" -U <file> Update an already pinned BPF array\n"); 22 + printf(" -i <ifindex> Interface index\n"); 23 + printf(" -h Display this help\n"); 24 + } 25 + 26 + int main(int argc, char **argv) 27 + { 28 + const char *pinned_file = NULL; 29 + int ifindex = -1; 30 + int array_key = 0; 31 + int array_fd = -1; 32 + int ret = -1; 33 + int opt; 34 + 35 + while ((opt = getopt(argc, argv, "F:U:i:")) != -1) { 36 + switch (opt) { 37 + /* General args */ 38 + case 'U': 39 + pinned_file = optarg; 40 + break; 41 + case 'i': 42 + ifindex = atoi(optarg); 43 + break; 44 + default: 45 + usage(); 46 + goto out; 47 + } 48 + } 49 + 50 + if (ifindex < 0 || !pinned_file) { 51 + usage(); 52 + goto out; 53 + } 54 + 55 + array_fd = bpf_obj_get(pinned_file); 56 + if (array_fd < 0) { 57 + fprintf(stderr, "bpf_obj_get(%s): %s(%d)\n", 58 + pinned_file, strerror(errno), errno); 59 + goto out; 60 + } 61 + 62 + /* bpf_tunnel_key.remote_ipv4 expects host byte orders */ 63 + ret = bpf_update_elem(array_fd, &array_key, &ifindex, 0); 64 + if (ret) { 65 + perror("bpf_update_elem"); 66 + goto out; 67 + } 68 + 69 + out: 70 + if (array_fd != -1) 71 + close(array_fd); 72 + return ret; 73 + }