Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1#include <stdbool.h>
2#include <linux/bpf.h>
3#include <linux/errno.h>
4#include <linux/if_ether.h>
5#include <linux/pkt_cls.h>
6
7#include <bpf/bpf_helpers.h>
8#include "bpf_kfuncs.h"
9
10#define META_SIZE 32
11
12#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
13
14/* Demonstrates how metadata can be passed from an XDP program to a TC program
15 * using bpf_xdp_adjust_meta.
16 * For the sake of testing the metadata support in drivers, the XDP program uses
17 * a fixed-size payload after the Ethernet header as metadata. The TC program
18 * copies the metadata it receives into a map so it can be checked from
19 * userspace.
20 */
21
22struct {
23 __uint(type, BPF_MAP_TYPE_ARRAY);
24 __uint(max_entries, 1);
25 __type(key, __u32);
26 __uint(value_size, META_SIZE);
27} test_result SEC(".maps");
28
29bool test_pass;
30
31SEC("tc")
32int ing_cls(struct __sk_buff *ctx)
33{
34 __u8 *data, *data_meta;
35 __u32 key = 0;
36
37 data_meta = ctx_ptr(ctx, data_meta);
38 data = ctx_ptr(ctx, data);
39
40 if (data_meta + META_SIZE > data)
41 return TC_ACT_SHOT;
42
43 bpf_map_update_elem(&test_result, &key, data_meta, BPF_ANY);
44
45 return TC_ACT_SHOT;
46}
47
48/* Read from metadata using bpf_dynptr_read helper */
49SEC("tc")
50int ing_cls_dynptr_read(struct __sk_buff *ctx)
51{
52 struct bpf_dynptr meta;
53 const __u32 zero = 0;
54 __u8 *dst;
55
56 dst = bpf_map_lookup_elem(&test_result, &zero);
57 if (!dst)
58 return TC_ACT_SHOT;
59
60 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
61 bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0);
62
63 return TC_ACT_SHOT;
64}
65
66/* Write to metadata using bpf_dynptr_write helper */
67SEC("tc")
68int ing_cls_dynptr_write(struct __sk_buff *ctx)
69{
70 struct bpf_dynptr data, meta;
71 __u8 *src;
72
73 bpf_dynptr_from_skb(ctx, 0, &data);
74 src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
75 if (!src)
76 return TC_ACT_SHOT;
77
78 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
79 bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
80
81 return TC_ACT_UNSPEC; /* pass */
82}
83
84/* Read from metadata using read-only dynptr slice */
85SEC("tc")
86int ing_cls_dynptr_slice(struct __sk_buff *ctx)
87{
88 struct bpf_dynptr meta;
89 const __u32 zero = 0;
90 __u8 *dst, *src;
91
92 dst = bpf_map_lookup_elem(&test_result, &zero);
93 if (!dst)
94 return TC_ACT_SHOT;
95
96 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
97 src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
98 if (!src)
99 return TC_ACT_SHOT;
100
101 __builtin_memcpy(dst, src, META_SIZE);
102
103 return TC_ACT_SHOT;
104}
105
106/* Write to metadata using writeable dynptr slice */
107SEC("tc")
108int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
109{
110 struct bpf_dynptr data, meta;
111 __u8 *src, *dst;
112
113 bpf_dynptr_from_skb(ctx, 0, &data);
114 src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
115 if (!src)
116 return TC_ACT_SHOT;
117
118 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
119 dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
120 if (!dst)
121 return TC_ACT_SHOT;
122
123 __builtin_memcpy(dst, src, META_SIZE);
124
125 return TC_ACT_UNSPEC; /* pass */
126}
127
128/* Read skb metadata in chunks from various offsets in different ways. */
129SEC("tc")
130int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
131{
132 struct bpf_dynptr meta;
133 const __u32 chunk_len = META_SIZE / 4;
134 const __u32 zero = 0;
135 __u8 *dst, *src;
136
137 dst = bpf_map_lookup_elem(&test_result, &zero);
138 if (!dst)
139 return TC_ACT_SHOT;
140
141 /* 1. Regular read */
142 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
143 bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
144 dst += chunk_len;
145
146 /* 2. Read from an offset-adjusted dynptr */
147 bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
148 bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
149 dst += chunk_len;
150
151 /* 3. Read at an offset */
152 bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
153 dst += chunk_len;
154
155 /* 4. Read from a slice starting at an offset */
156 src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
157 if (!src)
158 return TC_ACT_SHOT;
159 __builtin_memcpy(dst, src, chunk_len);
160
161 return TC_ACT_SHOT;
162}
163
164/* Write skb metadata in chunks at various offsets in different ways. */
165SEC("tc")
166int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
167{
168 const __u32 chunk_len = META_SIZE / 4;
169 __u8 payload[META_SIZE];
170 struct bpf_dynptr meta;
171 __u8 *dst, *src;
172
173 bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
174 src = payload;
175
176 /* 1. Regular write */
177 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
178 bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
179 src += chunk_len;
180
181 /* 2. Write to an offset-adjusted dynptr */
182 bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
183 bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
184 src += chunk_len;
185
186 /* 3. Write at an offset */
187 bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
188 src += chunk_len;
189
190 /* 4. Write to a slice starting at an offset */
191 dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
192 if (!dst)
193 return TC_ACT_SHOT;
194 __builtin_memcpy(dst, src, chunk_len);
195
196 return TC_ACT_UNSPEC; /* pass */
197}
198
199/* Pass an OOB offset to dynptr read, write, adjust, slice. */
200SEC("tc")
201int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
202{
203 struct bpf_dynptr meta;
204 __u8 md, *p;
205 int err;
206
207 err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
208 if (err)
209 goto fail;
210
211 /* read offset OOB */
212 err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
213 if (err != -E2BIG)
214 goto fail;
215
216 /* write offset OOB */
217 err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
218 if (err != -E2BIG)
219 goto fail;
220
221 /* adjust end offset OOB */
222 err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
223 if (err != -ERANGE)
224 goto fail;
225
226 /* adjust start offset OOB */
227 err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
228 if (err != -ERANGE)
229 goto fail;
230
231 /* slice offset OOB */
232 p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
233 if (p)
234 goto fail;
235
236 /* slice rdwr offset OOB */
237 p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
238 if (p)
239 goto fail;
240
241 return TC_ACT_UNSPEC;
242fail:
243 return TC_ACT_SHOT;
244}
245
246/* Reserve and clear space for metadata but don't populate it */
247SEC("xdp")
248int ing_xdp_zalloc_meta(struct xdp_md *ctx)
249{
250 struct ethhdr *eth = ctx_ptr(ctx, data);
251 __u8 *meta;
252 int ret;
253
254 /* Drop any non-test packets */
255 if (eth + 1 > ctx_ptr(ctx, data_end))
256 return XDP_DROP;
257 if (eth->h_proto != 0)
258 return XDP_DROP;
259
260 ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
261 if (ret < 0)
262 return XDP_DROP;
263
264 meta = ctx_ptr(ctx, data_meta);
265 if (meta + META_SIZE > ctx_ptr(ctx, data))
266 return XDP_DROP;
267
268 __builtin_memset(meta, 0, META_SIZE);
269
270 return XDP_PASS;
271}
272
273SEC("xdp")
274int ing_xdp(struct xdp_md *ctx)
275{
276 __u8 *data, *data_meta, *data_end, *payload;
277 struct ethhdr *eth;
278 int ret;
279
280 ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
281 if (ret < 0)
282 return XDP_DROP;
283
284 data_meta = ctx_ptr(ctx, data_meta);
285 data_end = ctx_ptr(ctx, data_end);
286 data = ctx_ptr(ctx, data);
287
288 eth = (struct ethhdr *)data;
289 payload = data + sizeof(struct ethhdr);
290
291 if (payload + META_SIZE > data_end ||
292 data_meta + META_SIZE > data)
293 return XDP_DROP;
294
295 /* The Linux networking stack may send other packets on the test
296 * interface that interfere with the test. Just drop them.
297 * The test packets can be recognized by their ethertype of zero.
298 */
299 if (eth->h_proto != 0)
300 return XDP_DROP;
301
302 __builtin_memcpy(data_meta, payload, META_SIZE);
303 return XDP_PASS;
304}
305
306/*
307 * Check that skb->data_meta..skb->data is empty if prog writes to packet
308 * _payload_ using packet pointers. Applies only to cloned skbs.
309 */
310SEC("tc")
311int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx)
312{
313 struct ethhdr *eth = ctx_ptr(ctx, data);
314
315 if (eth + 1 > ctx_ptr(ctx, data_end))
316 goto out;
317 /* Ignore non-test packets */
318 if (eth->h_proto != 0)
319 goto out;
320
321 /* Expect no metadata */
322 if (ctx->data_meta != ctx->data)
323 goto out;
324
325 /* Packet write to trigger unclone in prologue */
326 eth->h_proto = 42;
327
328 test_pass = true;
329out:
330 return TC_ACT_SHOT;
331}
332
333/*
334 * Check that skb->data_meta..skb->data is empty if prog writes to packet
335 * _metadata_ using packet pointers. Applies only to cloned skbs.
336 */
337SEC("tc")
338int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx)
339{
340 struct ethhdr *eth = ctx_ptr(ctx, data);
341 __u8 *md = ctx_ptr(ctx, data_meta);
342
343 if (eth + 1 > ctx_ptr(ctx, data_end))
344 goto out;
345 /* Ignore non-test packets */
346 if (eth->h_proto != 0)
347 goto out;
348
349 if (md + 1 > ctx_ptr(ctx, data)) {
350 /* Expect no metadata */
351 test_pass = true;
352 } else {
353 /* Metadata write to trigger unclone in prologue */
354 *md = 42;
355 }
356out:
357 return TC_ACT_SHOT;
358}
359
360/*
361 * Check that skb_meta dynptr is writable but empty if prog writes to packet
362 * _payload_ using a dynptr slice. Applies only to cloned skbs.
363 */
364SEC("tc")
365int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx)
366{
367 struct bpf_dynptr data, meta;
368 struct ethhdr *eth;
369
370 bpf_dynptr_from_skb(ctx, 0, &data);
371 eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
372 if (!eth)
373 goto out;
374 /* Ignore non-test packets */
375 if (eth->h_proto != 0)
376 goto out;
377
378 /* Expect no metadata */
379 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
380 if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
381 goto out;
382
383 /* Packet write to trigger unclone in prologue */
384 eth->h_proto = 42;
385
386 test_pass = true;
387out:
388 return TC_ACT_SHOT;
389}
390
391/*
392 * Check that skb_meta dynptr is writable but empty if prog writes to packet
393 * _metadata_ using a dynptr slice. Applies only to cloned skbs.
394 */
395SEC("tc")
396int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx)
397{
398 struct bpf_dynptr data, meta;
399 const struct ethhdr *eth;
400 __u8 *md;
401
402 bpf_dynptr_from_skb(ctx, 0, &data);
403 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
404 if (!eth)
405 goto out;
406 /* Ignore non-test packets */
407 if (eth->h_proto != 0)
408 goto out;
409
410 /* Expect no metadata */
411 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
412 if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0)
413 goto out;
414
415 /* Metadata write to trigger unclone in prologue */
416 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
417 md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
418 if (md)
419 *md = 42;
420
421 test_pass = true;
422out:
423 return TC_ACT_SHOT;
424}
425
426/*
427 * Check that skb_meta dynptr is read-only before prog writes to packet payload
428 * using dynptr_write helper. Applies only to cloned skbs.
429 */
430SEC("tc")
431int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx)
432{
433 struct bpf_dynptr data, meta;
434 const struct ethhdr *eth;
435
436 bpf_dynptr_from_skb(ctx, 0, &data);
437 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
438 if (!eth)
439 goto out;
440 /* Ignore non-test packets */
441 if (eth->h_proto != 0)
442 goto out;
443
444 /* Expect read-only metadata before unclone */
445 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
446 if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
447 goto out;
448
449 /* Helper write to payload will unclone the packet */
450 bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
451
452 /* Expect no metadata after unclone */
453 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
454 if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0)
455 goto out;
456
457 test_pass = true;
458out:
459 return TC_ACT_SHOT;
460}
461
462/*
463 * Check that skb_meta dynptr is read-only if prog writes to packet
464 * metadata using dynptr_write helper. Applies only to cloned skbs.
465 */
466SEC("tc")
467int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx)
468{
469 struct bpf_dynptr data, meta;
470 const struct ethhdr *eth;
471
472 bpf_dynptr_from_skb(ctx, 0, &data);
473 eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
474 if (!eth)
475 goto out;
476 /* Ignore non-test packets */
477 if (eth->h_proto != 0)
478 goto out;
479
480 /* Expect read-only metadata */
481 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
482 if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE)
483 goto out;
484
485 /* Metadata write. Expect failure. */
486 bpf_dynptr_from_skb_meta(ctx, 0, &meta);
487 if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL)
488 goto out;
489
490 test_pass = true;
491out:
492 return TC_ACT_SHOT;
493}
494
495char _license[] SEC("license") = "GPL";