Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Cloudflare Ltd.
3// Copyright (c) 2020 Isovalent, Inc.
4
5#include <stddef.h>
6#include <stdbool.h>
7#include <string.h>
8#include <linux/bpf.h>
9#include <linux/if_ether.h>
10#include <linux/in.h>
11#include <linux/ip.h>
12#include <linux/ipv6.h>
13#include <linux/pkt_cls.h>
14#include <linux/tcp.h>
15#include <sys/socket.h>
16#include <bpf/bpf_helpers.h>
17#include <bpf/bpf_endian.h>
18
19int _version SEC("version") = 1;
20char _license[] SEC("license") = "GPL";
21
22/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
23static inline struct bpf_sock_tuple *
24get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
25{
26 void *data_end = (void *)(long)skb->data_end;
27 void *data = (void *)(long)skb->data;
28 struct bpf_sock_tuple *result;
29 struct ethhdr *eth;
30 __u64 tuple_len;
31 __u8 proto = 0;
32 __u64 ihl_len;
33
34 eth = (struct ethhdr *)(data);
35 if (eth + 1 > data_end)
36 return NULL;
37
38 if (eth->h_proto == bpf_htons(ETH_P_IP)) {
39 struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
40
41 if (iph + 1 > data_end)
42 return NULL;
43 if (iph->ihl != 5)
44 /* Options are not supported */
45 return NULL;
46 ihl_len = iph->ihl * 4;
47 proto = iph->protocol;
48 *ipv4 = true;
49 result = (struct bpf_sock_tuple *)&iph->saddr;
50 } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
51 struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
52
53 if (ip6h + 1 > data_end)
54 return NULL;
55 ihl_len = sizeof(*ip6h);
56 proto = ip6h->nexthdr;
57 *ipv4 = false;
58 result = (struct bpf_sock_tuple *)&ip6h->saddr;
59 } else {
60 return (struct bpf_sock_tuple *)data;
61 }
62
63 if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
64 return NULL;
65
66 *tcp = (proto == IPPROTO_TCP);
67 return result;
68}
69
70static inline int
71handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
72{
73 struct bpf_sock_tuple ln = {0};
74 struct bpf_sock *sk;
75 size_t tuple_len;
76 int ret;
77
78 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
79 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
80 return TC_ACT_SHOT;
81
82 sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
83 if (sk)
84 goto assign;
85
86 if (ipv4) {
87 if (tuple->ipv4.dport != bpf_htons(4321))
88 return TC_ACT_OK;
89
90 ln.ipv4.daddr = bpf_htonl(0x7f000001);
91 ln.ipv4.dport = bpf_htons(1234);
92
93 sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
94 BPF_F_CURRENT_NETNS, 0);
95 } else {
96 if (tuple->ipv6.dport != bpf_htons(4321))
97 return TC_ACT_OK;
98
99 /* Upper parts of daddr are already zero. */
100 ln.ipv6.daddr[3] = bpf_htonl(0x1);
101 ln.ipv6.dport = bpf_htons(1234);
102
103 sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
104 BPF_F_CURRENT_NETNS, 0);
105 }
106
107 /* workaround: We can't do a single socket lookup here, because then
108 * the compiler will likely spill tuple_len to the stack. This makes it
109 * lose all bounds information in the verifier, which then rejects the
110 * call as unsafe.
111 */
112 if (!sk)
113 return TC_ACT_SHOT;
114
115assign:
116 ret = bpf_sk_assign(skb, sk, 0);
117 bpf_sk_release(sk);
118 return ret;
119}
120
121static inline int
122handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
123{
124 struct bpf_sock_tuple ln = {0};
125 struct bpf_sock *sk;
126 size_t tuple_len;
127 int ret;
128
129 tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
130 if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
131 return TC_ACT_SHOT;
132
133 sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
134 if (sk) {
135 if (sk->state != BPF_TCP_LISTEN)
136 goto assign;
137 bpf_sk_release(sk);
138 }
139
140 if (ipv4) {
141 if (tuple->ipv4.dport != bpf_htons(4321))
142 return TC_ACT_OK;
143
144 ln.ipv4.daddr = bpf_htonl(0x7f000001);
145 ln.ipv4.dport = bpf_htons(1234);
146
147 sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
148 BPF_F_CURRENT_NETNS, 0);
149 } else {
150 if (tuple->ipv6.dport != bpf_htons(4321))
151 return TC_ACT_OK;
152
153 /* Upper parts of daddr are already zero. */
154 ln.ipv6.daddr[3] = bpf_htonl(0x1);
155 ln.ipv6.dport = bpf_htons(1234);
156
157 sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
158 BPF_F_CURRENT_NETNS, 0);
159 }
160
161 /* workaround: We can't do a single socket lookup here, because then
162 * the compiler will likely spill tuple_len to the stack. This makes it
163 * lose all bounds information in the verifier, which then rejects the
164 * call as unsafe.
165 */
166 if (!sk)
167 return TC_ACT_SHOT;
168
169 if (sk->state != BPF_TCP_LISTEN) {
170 bpf_sk_release(sk);
171 return TC_ACT_SHOT;
172 }
173
174assign:
175 ret = bpf_sk_assign(skb, sk, 0);
176 bpf_sk_release(sk);
177 return ret;
178}
179
180SEC("classifier/sk_assign_test")
181int bpf_sk_assign_test(struct __sk_buff *skb)
182{
183 struct bpf_sock_tuple *tuple, ln = {0};
184 bool ipv4 = false;
185 bool tcp = false;
186 int tuple_len;
187 int ret = 0;
188
189 tuple = get_tuple(skb, &ipv4, &tcp);
190 if (!tuple)
191 return TC_ACT_SHOT;
192
193 /* Note that the verifier socket return type for bpf_skc_lookup_tcp()
194 * differs from bpf_sk_lookup_udp(), so even though the C-level type is
195 * the same here, if we try to share the implementations they will
196 * fail to verify because we're crossing pointer types.
197 */
198 if (tcp)
199 ret = handle_tcp(skb, tuple, ipv4);
200 else
201 ret = handle_udp(skb, tuple, ipv4);
202
203 return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
204}