Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * common eBPF ELF operations.
5 *
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation;
13 * version 2.1 of the License (not later!)
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this program; if not, see <http://www.gnu.org/licenses>
22 */
23
24#include <stdlib.h>
25#include <memory.h>
26#include <unistd.h>
27#include <asm/unistd.h>
28#include <linux/bpf.h>
29#include "bpf.h"
30#include "libbpf.h"
31#include <errno.h>
32
33/*
34 * When building perf, unistd.h is overridden. __NR_bpf is
35 * required to be defined explicitly.
36 */
37#ifndef __NR_bpf
38# if defined(__i386__)
39# define __NR_bpf 357
40# elif defined(__x86_64__)
41# define __NR_bpf 321
42# elif defined(__aarch64__)
43# define __NR_bpf 280
44# elif defined(__sparc__)
45# define __NR_bpf 349
46# elif defined(__s390__)
47# define __NR_bpf 351
48# else
49# error __NR_bpf not defined. libbpf does not support your arch.
50# endif
51#endif
52
53#ifndef min
54#define min(x, y) ((x) < (y) ? (x) : (y))
55#endif
56
57static inline __u64 ptr_to_u64(const void *ptr)
58{
59 return (__u64) (unsigned long) ptr;
60}
61
62static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
63 unsigned int size)
64{
65 return syscall(__NR_bpf, cmd, attr, size);
66}
67
68static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
69{
70 int fd;
71
72 do {
73 fd = sys_bpf(BPF_PROG_LOAD, attr, size);
74 } while (fd < 0 && errno == EAGAIN);
75
76 return fd;
77}
78
79int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
80{
81 __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
82 union bpf_attr attr;
83
84 memset(&attr, '\0', sizeof(attr));
85
86 attr.map_type = create_attr->map_type;
87 attr.key_size = create_attr->key_size;
88 attr.value_size = create_attr->value_size;
89 attr.max_entries = create_attr->max_entries;
90 attr.map_flags = create_attr->map_flags;
91 memcpy(attr.map_name, create_attr->name,
92 min(name_len, BPF_OBJ_NAME_LEN - 1));
93 attr.numa_node = create_attr->numa_node;
94 attr.btf_fd = create_attr->btf_fd;
95 attr.btf_key_type_id = create_attr->btf_key_type_id;
96 attr.btf_value_type_id = create_attr->btf_value_type_id;
97 attr.map_ifindex = create_attr->map_ifindex;
98 attr.inner_map_fd = create_attr->inner_map_fd;
99
100 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
101}
102
103int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
104 int key_size, int value_size, int max_entries,
105 __u32 map_flags, int node)
106{
107 struct bpf_create_map_attr map_attr = {};
108
109 map_attr.name = name;
110 map_attr.map_type = map_type;
111 map_attr.map_flags = map_flags;
112 map_attr.key_size = key_size;
113 map_attr.value_size = value_size;
114 map_attr.max_entries = max_entries;
115 if (node >= 0) {
116 map_attr.numa_node = node;
117 map_attr.map_flags |= BPF_F_NUMA_NODE;
118 }
119
120 return bpf_create_map_xattr(&map_attr);
121}
122
123int bpf_create_map(enum bpf_map_type map_type, int key_size,
124 int value_size, int max_entries, __u32 map_flags)
125{
126 struct bpf_create_map_attr map_attr = {};
127
128 map_attr.map_type = map_type;
129 map_attr.map_flags = map_flags;
130 map_attr.key_size = key_size;
131 map_attr.value_size = value_size;
132 map_attr.max_entries = max_entries;
133
134 return bpf_create_map_xattr(&map_attr);
135}
136
137int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
138 int key_size, int value_size, int max_entries,
139 __u32 map_flags)
140{
141 struct bpf_create_map_attr map_attr = {};
142
143 map_attr.name = name;
144 map_attr.map_type = map_type;
145 map_attr.map_flags = map_flags;
146 map_attr.key_size = key_size;
147 map_attr.value_size = value_size;
148 map_attr.max_entries = max_entries;
149
150 return bpf_create_map_xattr(&map_attr);
151}
152
153int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
154 int key_size, int inner_map_fd, int max_entries,
155 __u32 map_flags, int node)
156{
157 __u32 name_len = name ? strlen(name) : 0;
158 union bpf_attr attr;
159
160 memset(&attr, '\0', sizeof(attr));
161
162 attr.map_type = map_type;
163 attr.key_size = key_size;
164 attr.value_size = 4;
165 attr.inner_map_fd = inner_map_fd;
166 attr.max_entries = max_entries;
167 attr.map_flags = map_flags;
168 memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
169
170 if (node >= 0) {
171 attr.map_flags |= BPF_F_NUMA_NODE;
172 attr.numa_node = node;
173 }
174
175 return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
176}
177
178int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
179 int key_size, int inner_map_fd, int max_entries,
180 __u32 map_flags)
181{
182 return bpf_create_map_in_map_node(map_type, name, key_size,
183 inner_map_fd, max_entries, map_flags,
184 -1);
185}
186
187static void *
188alloc_zero_tailing_info(const void *orecord, __u32 cnt,
189 __u32 actual_rec_size, __u32 expected_rec_size)
190{
191 __u64 info_len = actual_rec_size * cnt;
192 void *info, *nrecord;
193 int i;
194
195 info = malloc(info_len);
196 if (!info)
197 return NULL;
198
199 /* zero out bytes kernel does not understand */
200 nrecord = info;
201 for (i = 0; i < cnt; i++) {
202 memcpy(nrecord, orecord, expected_rec_size);
203 memset(nrecord + expected_rec_size, 0,
204 actual_rec_size - expected_rec_size);
205 orecord += actual_rec_size;
206 nrecord += actual_rec_size;
207 }
208
209 return info;
210}
211
212int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
213 char *log_buf, size_t log_buf_sz)
214{
215 void *finfo = NULL, *linfo = NULL;
216 union bpf_attr attr;
217 __u32 name_len;
218 int fd;
219
220 if (!load_attr)
221 return -EINVAL;
222
223 name_len = load_attr->name ? strlen(load_attr->name) : 0;
224
225 bzero(&attr, sizeof(attr));
226 attr.prog_type = load_attr->prog_type;
227 attr.expected_attach_type = load_attr->expected_attach_type;
228 attr.insn_cnt = (__u32)load_attr->insns_cnt;
229 attr.insns = ptr_to_u64(load_attr->insns);
230 attr.license = ptr_to_u64(load_attr->license);
231 attr.log_buf = ptr_to_u64(NULL);
232 attr.log_size = 0;
233 attr.log_level = 0;
234 attr.kern_version = load_attr->kern_version;
235 attr.prog_ifindex = load_attr->prog_ifindex;
236 attr.prog_btf_fd = load_attr->prog_btf_fd;
237 attr.func_info_rec_size = load_attr->func_info_rec_size;
238 attr.func_info_cnt = load_attr->func_info_cnt;
239 attr.func_info = ptr_to_u64(load_attr->func_info);
240 attr.line_info_rec_size = load_attr->line_info_rec_size;
241 attr.line_info_cnt = load_attr->line_info_cnt;
242 attr.line_info = ptr_to_u64(load_attr->line_info);
243 memcpy(attr.prog_name, load_attr->name,
244 min(name_len, BPF_OBJ_NAME_LEN - 1));
245
246 fd = sys_bpf_prog_load(&attr, sizeof(attr));
247 if (fd >= 0)
248 return fd;
249
250 /* After bpf_prog_load, the kernel may modify certain attributes
251 * to give user space a hint how to deal with loading failure.
252 * Check to see whether we can make some changes and load again.
253 */
254 while (errno == E2BIG && (!finfo || !linfo)) {
255 if (!finfo && attr.func_info_cnt &&
256 attr.func_info_rec_size < load_attr->func_info_rec_size) {
257 /* try with corrected func info records */
258 finfo = alloc_zero_tailing_info(load_attr->func_info,
259 load_attr->func_info_cnt,
260 load_attr->func_info_rec_size,
261 attr.func_info_rec_size);
262 if (!finfo)
263 goto done;
264
265 attr.func_info = ptr_to_u64(finfo);
266 attr.func_info_rec_size = load_attr->func_info_rec_size;
267 } else if (!linfo && attr.line_info_cnt &&
268 attr.line_info_rec_size <
269 load_attr->line_info_rec_size) {
270 linfo = alloc_zero_tailing_info(load_attr->line_info,
271 load_attr->line_info_cnt,
272 load_attr->line_info_rec_size,
273 attr.line_info_rec_size);
274 if (!linfo)
275 goto done;
276
277 attr.line_info = ptr_to_u64(linfo);
278 attr.line_info_rec_size = load_attr->line_info_rec_size;
279 } else {
280 break;
281 }
282
283 fd = sys_bpf_prog_load(&attr, sizeof(attr));
284
285 if (fd >= 0)
286 goto done;
287 }
288
289 if (!log_buf || !log_buf_sz)
290 goto done;
291
292 /* Try again with log */
293 attr.log_buf = ptr_to_u64(log_buf);
294 attr.log_size = log_buf_sz;
295 attr.log_level = 1;
296 log_buf[0] = 0;
297 fd = sys_bpf_prog_load(&attr, sizeof(attr));
298done:
299 free(finfo);
300 free(linfo);
301 return fd;
302}
303
304int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
305 size_t insns_cnt, const char *license,
306 __u32 kern_version, char *log_buf,
307 size_t log_buf_sz)
308{
309 struct bpf_load_program_attr load_attr;
310
311 memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
312 load_attr.prog_type = type;
313 load_attr.expected_attach_type = 0;
314 load_attr.name = NULL;
315 load_attr.insns = insns;
316 load_attr.insns_cnt = insns_cnt;
317 load_attr.license = license;
318 load_attr.kern_version = kern_version;
319
320 return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
321}
322
323int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
324 size_t insns_cnt, __u32 prog_flags, const char *license,
325 __u32 kern_version, char *log_buf, size_t log_buf_sz,
326 int log_level)
327{
328 union bpf_attr attr;
329
330 bzero(&attr, sizeof(attr));
331 attr.prog_type = type;
332 attr.insn_cnt = (__u32)insns_cnt;
333 attr.insns = ptr_to_u64(insns);
334 attr.license = ptr_to_u64(license);
335 attr.log_buf = ptr_to_u64(log_buf);
336 attr.log_size = log_buf_sz;
337 attr.log_level = log_level;
338 log_buf[0] = 0;
339 attr.kern_version = kern_version;
340 attr.prog_flags = prog_flags;
341
342 return sys_bpf_prog_load(&attr, sizeof(attr));
343}
344
345int bpf_map_update_elem(int fd, const void *key, const void *value,
346 __u64 flags)
347{
348 union bpf_attr attr;
349
350 bzero(&attr, sizeof(attr));
351 attr.map_fd = fd;
352 attr.key = ptr_to_u64(key);
353 attr.value = ptr_to_u64(value);
354 attr.flags = flags;
355
356 return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
357}
358
359int bpf_map_lookup_elem(int fd, const void *key, void *value)
360{
361 union bpf_attr attr;
362
363 bzero(&attr, sizeof(attr));
364 attr.map_fd = fd;
365 attr.key = ptr_to_u64(key);
366 attr.value = ptr_to_u64(value);
367
368 return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
369}
370
371int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
372{
373 union bpf_attr attr;
374
375 bzero(&attr, sizeof(attr));
376 attr.map_fd = fd;
377 attr.key = ptr_to_u64(key);
378 attr.value = ptr_to_u64(value);
379
380 return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
381}
382
383int bpf_map_delete_elem(int fd, const void *key)
384{
385 union bpf_attr attr;
386
387 bzero(&attr, sizeof(attr));
388 attr.map_fd = fd;
389 attr.key = ptr_to_u64(key);
390
391 return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
392}
393
394int bpf_map_get_next_key(int fd, const void *key, void *next_key)
395{
396 union bpf_attr attr;
397
398 bzero(&attr, sizeof(attr));
399 attr.map_fd = fd;
400 attr.key = ptr_to_u64(key);
401 attr.next_key = ptr_to_u64(next_key);
402
403 return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
404}
405
406int bpf_obj_pin(int fd, const char *pathname)
407{
408 union bpf_attr attr;
409
410 bzero(&attr, sizeof(attr));
411 attr.pathname = ptr_to_u64((void *)pathname);
412 attr.bpf_fd = fd;
413
414 return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
415}
416
417int bpf_obj_get(const char *pathname)
418{
419 union bpf_attr attr;
420
421 bzero(&attr, sizeof(attr));
422 attr.pathname = ptr_to_u64((void *)pathname);
423
424 return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
425}
426
427int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
428 unsigned int flags)
429{
430 union bpf_attr attr;
431
432 bzero(&attr, sizeof(attr));
433 attr.target_fd = target_fd;
434 attr.attach_bpf_fd = prog_fd;
435 attr.attach_type = type;
436 attr.attach_flags = flags;
437
438 return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
439}
440
441int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
442{
443 union bpf_attr attr;
444
445 bzero(&attr, sizeof(attr));
446 attr.target_fd = target_fd;
447 attr.attach_type = type;
448
449 return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
450}
451
452int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
453{
454 union bpf_attr attr;
455
456 bzero(&attr, sizeof(attr));
457 attr.target_fd = target_fd;
458 attr.attach_bpf_fd = prog_fd;
459 attr.attach_type = type;
460
461 return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
462}
463
464int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
465 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
466{
467 union bpf_attr attr;
468 int ret;
469
470 bzero(&attr, sizeof(attr));
471 attr.query.target_fd = target_fd;
472 attr.query.attach_type = type;
473 attr.query.query_flags = query_flags;
474 attr.query.prog_cnt = *prog_cnt;
475 attr.query.prog_ids = ptr_to_u64(prog_ids);
476
477 ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
478 if (attach_flags)
479 *attach_flags = attr.query.attach_flags;
480 *prog_cnt = attr.query.prog_cnt;
481 return ret;
482}
483
484int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
485 void *data_out, __u32 *size_out, __u32 *retval,
486 __u32 *duration)
487{
488 union bpf_attr attr;
489 int ret;
490
491 bzero(&attr, sizeof(attr));
492 attr.test.prog_fd = prog_fd;
493 attr.test.data_in = ptr_to_u64(data);
494 attr.test.data_out = ptr_to_u64(data_out);
495 attr.test.data_size_in = size;
496 attr.test.repeat = repeat;
497
498 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
499 if (size_out)
500 *size_out = attr.test.data_size_out;
501 if (retval)
502 *retval = attr.test.retval;
503 if (duration)
504 *duration = attr.test.duration;
505 return ret;
506}
507
508int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
509{
510 union bpf_attr attr;
511 int ret;
512
513 if (!test_attr->data_out && test_attr->data_size_out > 0)
514 return -EINVAL;
515
516 bzero(&attr, sizeof(attr));
517 attr.test.prog_fd = test_attr->prog_fd;
518 attr.test.data_in = ptr_to_u64(test_attr->data_in);
519 attr.test.data_out = ptr_to_u64(test_attr->data_out);
520 attr.test.data_size_in = test_attr->data_size_in;
521 attr.test.data_size_out = test_attr->data_size_out;
522 attr.test.repeat = test_attr->repeat;
523
524 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
525 test_attr->data_size_out = attr.test.data_size_out;
526 test_attr->retval = attr.test.retval;
527 test_attr->duration = attr.test.duration;
528 return ret;
529}
530
531int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
532{
533 union bpf_attr attr;
534 int err;
535
536 bzero(&attr, sizeof(attr));
537 attr.start_id = start_id;
538
539 err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
540 if (!err)
541 *next_id = attr.next_id;
542
543 return err;
544}
545
546int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
547{
548 union bpf_attr attr;
549 int err;
550
551 bzero(&attr, sizeof(attr));
552 attr.start_id = start_id;
553
554 err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
555 if (!err)
556 *next_id = attr.next_id;
557
558 return err;
559}
560
561int bpf_prog_get_fd_by_id(__u32 id)
562{
563 union bpf_attr attr;
564
565 bzero(&attr, sizeof(attr));
566 attr.prog_id = id;
567
568 return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
569}
570
571int bpf_map_get_fd_by_id(__u32 id)
572{
573 union bpf_attr attr;
574
575 bzero(&attr, sizeof(attr));
576 attr.map_id = id;
577
578 return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
579}
580
581int bpf_btf_get_fd_by_id(__u32 id)
582{
583 union bpf_attr attr;
584
585 bzero(&attr, sizeof(attr));
586 attr.btf_id = id;
587
588 return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
589}
590
591int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
592{
593 union bpf_attr attr;
594 int err;
595
596 bzero(&attr, sizeof(attr));
597 attr.info.bpf_fd = prog_fd;
598 attr.info.info_len = *info_len;
599 attr.info.info = ptr_to_u64(info);
600
601 err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
602 if (!err)
603 *info_len = attr.info.info_len;
604
605 return err;
606}
607
608int bpf_raw_tracepoint_open(const char *name, int prog_fd)
609{
610 union bpf_attr attr;
611
612 bzero(&attr, sizeof(attr));
613 attr.raw_tracepoint.name = ptr_to_u64(name);
614 attr.raw_tracepoint.prog_fd = prog_fd;
615
616 return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
617}
618
619int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
620 bool do_log)
621{
622 union bpf_attr attr = {};
623 int fd;
624
625 attr.btf = ptr_to_u64(btf);
626 attr.btf_size = btf_size;
627
628retry:
629 if (do_log && log_buf && log_buf_size) {
630 attr.btf_log_level = 1;
631 attr.btf_log_size = log_buf_size;
632 attr.btf_log_buf = ptr_to_u64(log_buf);
633 }
634
635 fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
636 if (fd == -1 && !do_log && log_buf && log_buf_size) {
637 do_log = true;
638 goto retry;
639 }
640
641 return fd;
642}
643
644int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
645 __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
646 __u64 *probe_addr)
647{
648 union bpf_attr attr = {};
649 int err;
650
651 attr.task_fd_query.pid = pid;
652 attr.task_fd_query.fd = fd;
653 attr.task_fd_query.flags = flags;
654 attr.task_fd_query.buf = ptr_to_u64(buf);
655 attr.task_fd_query.buf_len = *buf_len;
656
657 err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
658 *buf_len = attr.task_fd_query.buf_len;
659 *prog_id = attr.task_fd_query.prog_id;
660 *fd_type = attr.task_fd_query.fd_type;
661 *probe_offset = attr.task_fd_query.probe_offset;
662 *probe_addr = attr.task_fd_query.probe_addr;
663
664 return err;
665}