Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2023-05-26

We've added 54 non-merge commits during the last 10 day(s) which contain
a total of 76 files changed, 2729 insertions(+), 1003 deletions(-).

The main changes are:

1) Add the capability to destroy sockets in BPF through a new kfunc,
from Aditi Ghag.

2) Support O_PATH fds in BPF_OBJ_PIN and BPF_OBJ_GET commands,
from Andrii Nakryiko.

3) Add capability for libbpf to resize datasec maps when backed via mmap,
from JP Kobryn.

4) Move all the test kfuncs for CI out of the kernel and into bpf_testmod,
from Jiri Olsa.

5) Big batch of xsk selftest improvements to prep for multi-buffer testing,
from Magnus Karlsson.

6) Show the target_{obj,btf}_id in tracing link's fdinfo and dump it
via bpftool, from Yafang Shao.

7) Various misc BPF selftest improvements to work with upcoming LLVM 17,
from Yonghong Song.

8) Extend bpftool to specify netdevice for resolving XDP hints,
from Larysa Zaremba.

9) Document masking in shift operations for the insn set document,
from Dave Thaler.

10) Extend BPF selftests to check xdp_feature support for bond driver,
from Lorenzo Bianconi.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (54 commits)
bpf: Fix bad unlock balance on freeze_mutex
libbpf: Ensure FD >= 3 during bpf_map__reuse_fd()
libbpf: Ensure libbpf always opens files with O_CLOEXEC
selftests/bpf: Check whether to run selftest
libbpf: Change var type in datasec resize func
bpf: drop unnecessary bpf_capable() check in BPF_MAP_FREEZE command
libbpf: Selftests for resizing datasec maps
libbpf: Add capability for resizing datasec maps
selftests/bpf: Add path_fd-based BPF_OBJ_PIN and BPF_OBJ_GET tests
libbpf: Add opts-based bpf_obj_pin() API and add support for path_fd
bpf: Support O_PATH FDs in BPF_OBJ_PIN and BPF_OBJ_GET commands
libbpf: Start v1.3 development cycle
bpf: Validate BPF object in BPF_OBJ_PIN before calling LSM
bpftool: Specify XDP Hints ifname when loading program
selftests/bpf: Add xdp_feature selftest for bond device
selftests/bpf: Test bpf_sock_destroy
selftests/bpf: Add helper to get port using getsockname
bpf: Add bpf_sock_destroy kfunc
bpf: Add kfunc filter function to 'struct btf_kfunc_id_set'
bpf: udp: Implement batching for sockets iterator
...
====================

Link: https://lore.kernel.org/r/20230526222747.17775-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+2728 -1002
+6 -3
Documentation/bpf/instruction-set.rst
··· 163 163 BPF_DIV 0x30 dst = (src != 0) ? (dst / src) : 0 164 164 BPF_OR 0x40 dst \|= src 165 165 BPF_AND 0x50 dst &= src 166 - BPF_LSH 0x60 dst <<= src 167 - BPF_RSH 0x70 dst >>= src 166 + BPF_LSH 0x60 dst <<= (src & mask) 167 + BPF_RSH 0x70 dst >>= (src & mask) 168 168 BPF_NEG 0x80 dst = ~src 169 169 BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst 170 170 BPF_XOR 0xa0 dst ^= src 171 171 BPF_MOV 0xb0 dst = src 172 - BPF_ARSH 0xc0 sign extending shift right 172 + BPF_ARSH 0xc0 sign extending dst >>= (src & mask) 173 173 BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below) 174 174 ======== ===== ========================================================== 175 175 ··· 203 203 for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result 204 204 interpreted as an unsigned 64-bit value. There are no instructions for 205 205 signed division or modulo. 206 + 207 + Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31) 208 + for 32-bit operations. 206 209 207 210 Byte swap instructions 208 211 ~~~~~~~~~~~~~~~~~~~~~~
+2 -2
include/linux/bpf.h
··· 2077 2077 struct bpf_link *bpf_link_get_from_fd(u32 ufd); 2078 2078 struct bpf_link *bpf_link_get_curr_or_next(u32 *id); 2079 2079 2080 - int bpf_obj_pin_user(u32 ufd, const char __user *pathname); 2081 - int bpf_obj_get_user(const char __user *pathname, int flags); 2080 + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); 2081 + int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); 2082 2082 2083 2083 #define BPF_ITER_FUNC_PREFIX "bpf_iter_" 2084 2084 #define DEFINE_BPF_ITER_FUNC(target, args...) \
+11 -7
include/linux/btf.h
··· 98 98 union bpf_attr; 99 99 struct btf_show; 100 100 struct btf_id_set; 101 + struct bpf_prog; 102 + 103 + typedef int (*btf_kfunc_filter_t)(const struct bpf_prog *prog, u32 kfunc_id); 101 104 102 105 struct btf_kfunc_id_set { 103 106 struct module *owner; 104 107 struct btf_id_set8 *set; 108 + btf_kfunc_filter_t filter; 105 109 }; 106 110 107 111 struct btf_id_dtor_kfunc { ··· 483 479 return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); 484 480 } 485 481 486 - struct bpf_prog; 487 482 struct bpf_verifier_log; 488 483 489 484 #ifdef CONFIG_BPF_SYSCALL ··· 490 487 const char *btf_name_by_offset(const struct btf *btf, u32 offset); 491 488 struct btf *btf_parse_vmlinux(void); 492 489 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); 493 - u32 *btf_kfunc_id_set_contains(const struct btf *btf, 494 - enum bpf_prog_type prog_type, 495 - u32 kfunc_btf_id); 496 - u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id); 490 + u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, 491 + const struct bpf_prog *prog); 492 + u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, 493 + const struct bpf_prog *prog); 497 494 int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, 498 495 const struct btf_kfunc_id_set *s); 499 496 int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset); ··· 520 517 return NULL; 521 518 } 522 519 static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf, 523 - enum bpf_prog_type prog_type, 524 - u32 kfunc_btf_id) 520 + u32 kfunc_btf_id, 521 + struct bpf_prog *prog) 522 + 525 523 { 526 524 return NULL; 527 525 }
-1
include/net/udp.h
··· 437 437 struct udp_iter_state { 438 438 struct seq_net_private p; 439 439 int bucket; 440 - struct udp_seq_afinfo *bpf_seq_afinfo; 441 440 }; 442 441 443 442 void *udp_seq_start(struct seq_file *seq, loff_t *pos);
+10
include/uapi/linux/bpf.h
··· 1272 1272 1273 1273 /* Create a map that will be registered/unregesitered by the backed bpf_link */ 1274 1274 BPF_F_LINK = (1U << 13), 1275 + 1276 + /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ 1277 + BPF_F_PATH_FD = (1U << 14), 1275 1278 }; 1276 1279 1277 1280 /* Flags for BPF_PROG_QUERY. */ ··· 1423 1420 __aligned_u64 pathname; 1424 1421 __u32 bpf_fd; 1425 1422 __u32 file_flags; 1423 + /* Same as dirfd in openat() syscall; see openat(2) 1424 + * manpage for details of path FD and pathname semantics; 1425 + * path_fd should accompanied by BPF_F_PATH_FD flag set in 1426 + * file_flags field, otherwise it should be set to zero; 1427 + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. 1428 + */ 1429 + __s32 path_fd; 1426 1430 }; 1427 1431 1428 1432 struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+54 -11
kernel/bpf/btf.c
··· 222 222 enum { 223 223 BTF_KFUNC_SET_MAX_CNT = 256, 224 224 BTF_DTOR_KFUNC_MAX_CNT = 256, 225 + BTF_KFUNC_FILTER_MAX_CNT = 16, 226 + }; 227 + 228 + struct btf_kfunc_hook_filter { 229 + btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT]; 230 + u32 nr_filters; 225 231 }; 226 232 227 233 struct btf_kfunc_set_tab { 228 234 struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; 235 + struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX]; 229 236 }; 230 237 231 238 struct btf_id_dtor_kfunc_tab { ··· 7676 7669 /* Kernel Function (kfunc) BTF ID set registration API */ 7677 7670 7678 7671 static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, 7679 - struct btf_id_set8 *add_set) 7672 + const struct btf_kfunc_id_set *kset) 7680 7673 { 7674 + struct btf_kfunc_hook_filter *hook_filter; 7675 + struct btf_id_set8 *add_set = kset->set; 7681 7676 bool vmlinux_set = !btf_is_module(btf); 7677 + bool add_filter = !!kset->filter; 7682 7678 struct btf_kfunc_set_tab *tab; 7683 7679 struct btf_id_set8 *set; 7684 7680 u32 set_cnt; ··· 7696 7686 return 0; 7697 7687 7698 7688 tab = btf->kfunc_set_tab; 7689 + 7690 + if (tab && add_filter) { 7691 + u32 i; 7692 + 7693 + hook_filter = &tab->hook_filters[hook]; 7694 + for (i = 0; i < hook_filter->nr_filters; i++) { 7695 + if (hook_filter->filters[i] == kset->filter) { 7696 + add_filter = false; 7697 + break; 7698 + } 7699 + } 7700 + 7701 + if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) { 7702 + ret = -E2BIG; 7703 + goto end; 7704 + } 7705 + } 7706 + 7699 7707 if (!tab) { 7700 7708 tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); 7701 7709 if (!tab) ··· 7736 7708 */ 7737 7709 if (!vmlinux_set) { 7738 7710 tab->sets[hook] = add_set; 7739 - return 0; 7711 + goto do_add_filter; 7740 7712 } 7741 7713 7742 7714 /* In case of vmlinux sets, there may be more than one set being ··· 7778 7750 7779 7751 sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); 7780 7752 7753 + do_add_filter: 7754 + if (add_filter) { 7755 + hook_filter = &tab->hook_filters[hook]; 7756 + hook_filter->filters[hook_filter->nr_filters++] = kset->filter; 7757 + } 7781 7758 return 0; 7782 7759 end: 7783 7760 btf_free_kfunc_set_tab(btf); ··· 7791 7758 7792 7759 static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, 7793 7760 enum btf_kfunc_hook hook, 7794 - u32 kfunc_btf_id) 7761 + u32 kfunc_btf_id, 7762 + const struct bpf_prog *prog) 7795 7763 { 7764 + struct btf_kfunc_hook_filter *hook_filter; 7796 7765 struct btf_id_set8 *set; 7797 - u32 *id; 7766 + u32 *id, i; 7798 7767 7799 7768 if (hook >= BTF_KFUNC_HOOK_MAX) 7800 7769 return NULL; 7801 7770 if (!btf->kfunc_set_tab) 7802 7771 return NULL; 7772 + hook_filter = &btf->kfunc_set_tab->hook_filters[hook]; 7773 + for (i = 0; i < hook_filter->nr_filters; i++) { 7774 + if (hook_filter->filters[i](prog, kfunc_btf_id)) 7775 + return NULL; 7776 + } 7803 7777 set = btf->kfunc_set_tab->sets[hook]; 7804 7778 if (!set) 7805 7779 return NULL; ··· 7861 7821 * protection for looking up a well-formed btf->kfunc_set_tab. 7862 7822 */ 7863 7823 u32 *btf_kfunc_id_set_contains(const struct btf *btf, 7864 - enum bpf_prog_type prog_type, 7865 - u32 kfunc_btf_id) 7824 + u32 kfunc_btf_id, 7825 + const struct bpf_prog *prog) 7866 7826 { 7827 + enum bpf_prog_type prog_type = resolve_prog_type(prog); 7867 7828 enum btf_kfunc_hook hook; 7868 7829 u32 *kfunc_flags; 7869 7830 7870 - kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id); 7831 + kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog); 7871 7832 if (kfunc_flags) 7872 7833 return kfunc_flags; 7873 7834 7874 7835 hook = bpf_prog_type_to_kfunc_hook(prog_type); 7875 - return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); 7836 + return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog); 7876 7837 } 7877 7838 7878 - u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id) 7839 + u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, 7840 + const struct bpf_prog *prog) 7879 7841 { 7880 - return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id); 7842 + return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog); 7881 7843 } 7882 7844 7883 7845 static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, ··· 7910 7868 goto err_out; 7911 7869 } 7912 7870 7913 - ret = btf_populate_kfunc_set(btf, hook, kset->set); 7871 + ret = btf_populate_kfunc_set(btf, hook, kset); 7872 + 7914 7873 err_out: 7915 7874 btf_put(btf); 7916 7875 return ret;
+13 -14
kernel/bpf/inode.c
··· 435 435 return ret; 436 436 } 437 437 438 - static int bpf_obj_do_pin(const char __user *pathname, void *raw, 438 + static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, 439 439 enum bpf_type type) 440 440 { 441 441 struct dentry *dentry; ··· 444 444 umode_t mode; 445 445 int ret; 446 446 447 - dentry = user_path_create(AT_FDCWD, pathname, &path, 0); 447 + dentry = user_path_create(path_fd, pathname, &path, 0); 448 448 if (IS_ERR(dentry)) 449 449 return PTR_ERR(dentry); 450 - 451 - mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 452 - 453 - ret = security_path_mknod(&path, dentry, mode, 0); 454 - if (ret) 455 - goto out; 456 450 457 451 dir = d_inode(path.dentry); 458 452 if (dir->i_op != &bpf_dir_iops) { 459 453 ret = -EPERM; 460 454 goto out; 461 455 } 456 + 457 + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 458 + ret = security_path_mknod(&path, dentry, mode, 0); 459 + if (ret) 460 + goto out; 462 461 463 462 switch (type) { 464 463 case BPF_TYPE_PROG: ··· 477 478 return ret; 478 479 } 479 480 480 - int bpf_obj_pin_user(u32 ufd, const char __user *pathname) 481 + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) 481 482 { 482 483 enum bpf_type type; 483 484 void *raw; ··· 487 488 if (IS_ERR(raw)) 488 489 return PTR_ERR(raw); 489 490 490 - ret = bpf_obj_do_pin(pathname, raw, type); 491 + ret = bpf_obj_do_pin(path_fd, pathname, raw, type); 491 492 if (ret != 0) 492 493 bpf_any_put(raw, type); 493 494 494 495 return ret; 495 496 } 496 497 497 - static void *bpf_obj_do_get(const char __user *pathname, 498 + static void *bpf_obj_do_get(int path_fd, const char __user *pathname, 498 499 enum bpf_type *type, int flags) 499 500 { 500 501 struct inode *inode; ··· 502 503 void *raw; 503 504 int ret; 504 505 505 - ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); 506 + ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); 506 507 if (ret) 507 508 return ERR_PTR(ret); 508 509 ··· 526 527 return ERR_PTR(ret); 527 528 } 528 529 529 - int bpf_obj_get_user(const char __user *pathname, int flags) 530 + int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) 530 531 { 531 532 enum bpf_type type = BPF_TYPE_UNSPEC; 532 533 int f_flags; ··· 537 538 if (f_flags < 0) 538 539 return f_flags; 539 540 540 - raw = bpf_obj_do_get(pathname, &type, f_flags); 541 + raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); 541 542 if (IS_ERR(raw)) 542 543 return PTR_ERR(raw); 543 544
-3
kernel/bpf/log.c
··· 62 62 63 63 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); 64 64 65 - WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, 66 - "verifier log line truncated - local buffer too short\n"); 67 - 68 65 if (log->level == BPF_LOG_KERNEL) { 69 66 bool newline = n > 0 && log->kbuf[n - 1] == '\n'; 70 67
+34 -11
kernel/bpf/syscall.c
··· 1931 1931 return -ENOTSUPP; 1932 1932 } 1933 1933 1934 + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1935 + fdput(f); 1936 + return -EPERM; 1937 + } 1938 + 1934 1939 mutex_lock(&map->freeze_mutex); 1935 1940 if (bpf_map_write_active(map)) { 1936 1941 err = -EBUSY; ··· 1943 1938 } 1944 1939 if (READ_ONCE(map->frozen)) { 1945 1940 err = -EBUSY; 1946 - goto err_put; 1947 - } 1948 - if (!bpf_capable()) { 1949 - err = -EPERM; 1950 1941 goto err_put; 1951 1942 } 1952 1943 ··· 2698 2697 return err; 2699 2698 } 2700 2699 2701 - #define BPF_OBJ_LAST_FIELD file_flags 2700 + #define BPF_OBJ_LAST_FIELD path_fd 2702 2701 2703 2702 static int bpf_obj_pin(const union bpf_attr *attr) 2704 2703 { 2705 - if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 2704 + int path_fd; 2705 + 2706 + if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 2706 2707 return -EINVAL; 2707 2708 2708 - return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 2709 + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 2710 + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 2711 + return -EINVAL; 2712 + 2713 + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 2714 + return bpf_obj_pin_user(attr->bpf_fd, path_fd, 2715 + u64_to_user_ptr(attr->pathname)); 2709 2716 } 2710 2717 2711 2718 static int bpf_obj_get(const union bpf_attr *attr) 2712 2719 { 2720 + int path_fd; 2721 + 2713 2722 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 2714 - attr->file_flags & ~BPF_OBJ_FLAG_MASK) 2723 + attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 2715 2724 return -EINVAL; 2716 2725 2717 - return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 2726 + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 2727 + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 2728 + return -EINVAL; 2729 + 2730 + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 2731 + return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 2718 2732 attr->file_flags); 2719 2733 } 2720 2734 ··· 2984 2968 { 2985 2969 struct bpf_tracing_link *tr_link = 2986 2970 container_of(link, struct bpf_tracing_link, link.link); 2971 + u32 target_btf_id, target_obj_id; 2987 2972 2973 + bpf_trampoline_unpack_key(tr_link->trampoline->key, 2974 + &target_obj_id, &target_btf_id); 2988 2975 seq_printf(seq, 2989 - "attach_type:\t%d\n", 2990 - tr_link->attach_type); 2976 + "attach_type:\t%d\n" 2977 + "target_obj_id:\t%u\n" 2978 + "target_btf_id:\t%u\n", 2979 + tr_link->attach_type, 2980 + target_obj_id, 2981 + target_btf_id); 2991 2982 } 2992 2983 2993 2984 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
+4 -3
kernel/bpf/verifier.c
··· 10939 10939 *kfunc_name = func_name; 10940 10940 func_proto = btf_type_by_id(desc_btf, func->type); 10941 10941 10942 - kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); 10942 + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog); 10943 10943 if (!kfunc_flags) { 10944 10944 return -EACCES; 10945 10945 } ··· 19010 19010 * in the fmodret id set with the KF_SLEEPABLE flag. 19011 19011 */ 19012 19012 else { 19013 - u32 *flags = btf_kfunc_is_modify_return(btf, btf_id); 19013 + u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, 19014 + prog); 19014 19015 19015 19016 if (flags && (*flags & KF_SLEEPABLE)) 19016 19017 ret = 0; ··· 19039 19038 return -EINVAL; 19040 19039 } 19041 19040 ret = -EINVAL; 19042 - if (btf_kfunc_is_modify_return(btf, btf_id) || 19041 + if (btf_kfunc_is_modify_return(btf, btf_id, prog) || 19043 19042 !check_attach_modify_return(addr, tname)) 19044 19043 ret = 0; 19045 19044 if (ret) {
-201
net/bpf/test_run.c
··· 561 561 return a + *b; 562 562 } 563 563 564 - __bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) 565 - { 566 - return a + b + c + d; 567 - } 568 - 569 - __bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) 570 - { 571 - return a + b; 572 - } 573 - 574 - __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) 575 - { 576 - return sk; 577 - } 578 - 579 - long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) 580 - { 581 - /* Provoke the compiler to assume that the caller has sign-extended a, 582 - * b and c on platforms where this is required (e.g. s390x). 583 - */ 584 - return (long)a + (long)b + (long)c + d; 585 - } 586 - 587 564 int noinline bpf_fentry_shadow_test(int a) 588 565 { 589 566 return a + 1; ··· 583 606 refcount_t cnt; 584 607 }; 585 608 586 - static struct prog_test_ref_kfunc prog_test_struct = { 587 - .a = 42, 588 - .b = 108, 589 - .next = &prog_test_struct, 590 - .cnt = REFCOUNT_INIT(1), 591 - }; 592 - 593 - __bpf_kfunc struct prog_test_ref_kfunc * 594 - bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) 595 - { 596 - refcount_inc(&prog_test_struct.cnt); 597 - return &prog_test_struct; 598 - } 599 - 600 - __bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) 601 - { 602 - WARN_ON_ONCE(1); 603 - } 604 - 605 - __bpf_kfunc struct prog_test_member * 606 - bpf_kfunc_call_memb_acquire(void) 607 - { 608 - WARN_ON_ONCE(1); 609 - return NULL; 610 - } 611 - 612 609 __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) 613 610 { 614 611 refcount_dec(&p->cnt); ··· 590 639 591 640 __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) 592 641 { 593 - } 594 - 595 - __bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) 596 - { 597 - WARN_ON_ONCE(1); 598 - } 599 - 600 - static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) 601 - { 602 - if (size > 2 * sizeof(int)) 603 - return NULL; 604 - 605 - return (int *)p; 606 - } 607 - 608 - __bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, 609 - const int rdwr_buf_size) 610 - { 611 - return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); 612 - } 613 - 614 - __bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, 615 - const int rdonly_buf_size) 616 - { 617 - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 618 - } 619 - 620 - /* the next 2 ones can't be really used for testing expect to ensure 621 - * that the verifier rejects the call. 622 - * Acquire functions must return struct pointers, so these ones are 623 - * failing. 624 - */ 625 - __bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, 626 - const int rdonly_buf_size) 627 - { 628 - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 629 - } 630 - 631 - __bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) 632 - { 633 - } 634 - 635 - struct prog_test_pass1 { 636 - int x0; 637 - struct { 638 - int x1; 639 - struct { 640 - int x2; 641 - struct { 642 - int x3; 643 - }; 644 - }; 645 - }; 646 - }; 647 - 648 - struct prog_test_pass2 { 649 - int len; 650 - short arr1[4]; 651 - struct { 652 - char arr2[4]; 653 - unsigned long arr3[8]; 654 - } x; 655 - }; 656 - 657 - struct prog_test_fail1 { 658 - void *p; 659 - int x; 660 - }; 661 - 662 - struct prog_test_fail2 { 663 - int x8; 664 - struct prog_test_pass1 x; 665 - }; 666 - 667 - struct prog_test_fail3 { 668 - int len; 669 - char arr1[2]; 670 - char arr2[]; 671 - }; 672 - 673 - __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) 674 - { 675 - } 676 - 677 - __bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) 678 - { 679 - } 680 - 681 - __bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) 682 - { 683 - } 684 - 685 - __bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) 686 - { 687 - } 688 - 689 - __bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) 690 - { 691 - } 692 - 693 - __bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) 694 - { 695 - } 696 - 697 - __bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) 698 - { 699 - } 700 - 701 - __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) 702 - { 703 - } 704 - 705 - __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) 706 - { 707 - } 708 - 709 - __bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) 710 - { 711 - /* p != NULL, but p->cnt could be 0 */ 712 - } 713 - 714 - __bpf_kfunc void bpf_kfunc_call_test_destructive(void) 715 - { 716 - } 717 - 718 - __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) 719 - { 720 - return arg; 721 642 } 722 643 723 644 __diag_pop(); ··· 605 782 }; 606 783 607 784 BTF_SET8_START(test_sk_check_kfunc_ids) 608 - BTF_ID_FLAGS(func, bpf_kfunc_call_test1) 609 - BTF_ID_FLAGS(func, bpf_kfunc_call_test2) 610 - BTF_ID_FLAGS(func, bpf_kfunc_call_test3) 611 - BTF_ID_FLAGS(func, bpf_kfunc_call_test4) 612 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) 613 - BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) 614 785 BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) 615 786 BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) 616 - BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) 617 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) 618 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) 619 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) 620 - BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) 621 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) 622 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) 623 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) 624 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) 625 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) 626 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) 627 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) 628 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) 629 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) 630 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) 631 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) 632 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) 633 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) 634 787 BTF_SET8_END(test_sk_check_kfunc_ids) 635 788 636 789 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
+63
net/core/filter.c
··· 11723 11723 return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); 11724 11724 } 11725 11725 late_initcall(bpf_kfunc_init); 11726 + 11727 + /* Disables missing prototype warnings */ 11728 + __diag_push(); 11729 + __diag_ignore_all("-Wmissing-prototypes", 11730 + "Global functions as their definitions will be in vmlinux BTF"); 11731 + 11732 + /* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. 11733 + * 11734 + * The function expects a non-NULL pointer to a socket, and invokes the 11735 + * protocol specific socket destroy handlers. 11736 + * 11737 + * The helper can only be called from BPF contexts that have acquired the socket 11738 + * locks. 11739 + * 11740 + * Parameters: 11741 + * @sock: Pointer to socket to be destroyed 11742 + * 11743 + * Return: 11744 + * On error, may return EPROTONOSUPPORT, EINVAL. 11745 + * EPROTONOSUPPORT if protocol specific destroy handler is not supported. 11746 + * 0 otherwise 11747 + */ 11748 + __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) 11749 + { 11750 + struct sock *sk = (struct sock *)sock; 11751 + 11752 + /* The locking semantics that allow for synchronous execution of the 11753 + * destroy handlers are only supported for TCP and UDP. 11754 + * Supporting protocols will need to acquire sock lock in the BPF context 11755 + * prior to invoking this kfunc. 11756 + */ 11757 + if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP && 11758 + sk->sk_protocol != IPPROTO_UDP)) 11759 + return -EOPNOTSUPP; 11760 + 11761 + return sk->sk_prot->diag_destroy(sk, ECONNABORTED); 11762 + } 11763 + 11764 + __diag_pop() 11765 + 11766 + BTF_SET8_START(bpf_sk_iter_kfunc_ids) 11767 + BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) 11768 + BTF_SET8_END(bpf_sk_iter_kfunc_ids) 11769 + 11770 + static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) 11771 + { 11772 + if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) && 11773 + prog->expected_attach_type != BPF_TRACE_ITER) 11774 + return -EACCES; 11775 + return 0; 11776 + } 11777 + 11778 + static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = { 11779 + .owner = THIS_MODULE, 11780 + .set = &bpf_sk_iter_kfunc_ids, 11781 + .filter = tracing_iter_filter, 11782 + }; 11783 + 11784 + static int init_subsystem(void) 11785 + { 11786 + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set); 11787 + } 11788 + late_initcall(init_subsystem);
+6 -3
net/ipv4/tcp.c
··· 4553 4553 return 0; 4554 4554 } 4555 4555 4556 - /* Don't race with userspace socket closes such as tcp_close. */ 4557 - lock_sock(sk); 4556 + /* BPF context ensures sock locking. */ 4557 + if (!has_current_bpf_ctx()) 4558 + /* Don't race with userspace socket closes such as tcp_close. */ 4559 + lock_sock(sk); 4558 4560 4559 4561 if (sk->sk_state == TCP_LISTEN) { 4560 4562 tcp_set_state(sk, TCP_CLOSE); ··· 4580 4578 bh_unlock_sock(sk); 4581 4579 local_bh_enable(); 4582 4580 tcp_write_queue_purge(sk); 4583 - release_sock(sk); 4581 + if (!has_current_bpf_ctx()) 4582 + release_sock(sk); 4584 4583 return 0; 4585 4584 } 4586 4585 EXPORT_SYMBOL_GPL(tcp_abort);
+3 -4
net/ipv4/tcp_ipv4.c
··· 2967 2967 struct bpf_iter_meta meta; 2968 2968 struct bpf_prog *prog; 2969 2969 struct sock *sk = v; 2970 - bool slow; 2971 2970 uid_t uid; 2972 2971 int ret; 2973 2972 ··· 2974 2975 return 0; 2975 2976 2976 2977 if (sk_fullsock(sk)) 2977 - slow = lock_sock_fast(sk); 2978 + lock_sock(sk); 2978 2979 2979 2980 if (unlikely(sk_unhashed(sk))) { 2980 2981 ret = SEQ_SKIP; ··· 2998 2999 2999 3000 unlock: 3000 3001 if (sk_fullsock(sk)) 3001 - unlock_sock_fast(sk, slow); 3002 + release_sock(sk); 3002 3003 return ret; 3003 3004 3004 3005 } ··· 3360 3361 .ctx_arg_info_size = 1, 3361 3362 .ctx_arg_info = { 3362 3363 { offsetof(struct bpf_iter__tcp, sk_common), 3363 - PTR_TO_BTF_ID_OR_NULL }, 3364 + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, 3364 3365 }, 3365 3366 .get_func_proto = bpf_iter_tcp_get_func_proto, 3366 3367 .seq_info = &tcp_seq_info,
+241 -50
net/ipv4/udp.c
··· 2883 2883 2884 2884 int udp_abort(struct sock *sk, int err) 2885 2885 { 2886 - lock_sock(sk); 2886 + if (!has_current_bpf_ctx()) 2887 + lock_sock(sk); 2887 2888 2888 2889 /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing 2889 2890 * with close() ··· 2897 2896 __udp_disconnect(sk, 0); 2898 2897 2899 2898 out: 2900 - release_sock(sk); 2899 + if (!has_current_bpf_ctx()) 2900 + release_sock(sk); 2901 2901 2902 2902 return 0; 2903 2903 } ··· 2943 2941 /* ------------------------------------------------------------------------ */ 2944 2942 #ifdef CONFIG_PROC_FS 2945 2943 2946 - static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo, 2947 - struct net *net) 2944 + static unsigned short seq_file_family(const struct seq_file *seq); 2945 + static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) 2948 2946 { 2947 + unsigned short family = seq_file_family(seq); 2948 + 2949 + /* AF_UNSPEC is used as a match all */ 2950 + return ((family == AF_UNSPEC || family == sk->sk_family) && 2951 + net_eq(sock_net(sk), seq_file_net(seq))); 2952 + } 2953 + 2954 + #ifdef CONFIG_BPF_SYSCALL 2955 + static const struct seq_operations bpf_iter_udp_seq_ops; 2956 + #endif 2957 + static struct udp_table *udp_get_table_seq(struct seq_file *seq, 2958 + struct net *net) 2959 + { 2960 + const struct udp_seq_afinfo *afinfo; 2961 + 2962 + #ifdef CONFIG_BPF_SYSCALL 2963 + if (seq->op == &bpf_iter_udp_seq_ops) 2964 + return net->ipv4.udp_table; 2965 + #endif 2966 + 2967 + afinfo = pde_data(file_inode(seq->file)); 2949 2968 return afinfo->udp_table ? : net->ipv4.udp_table; 2950 2969 } 2951 2970 ··· 2974 2951 { 2975 2952 struct udp_iter_state *state = seq->private; 2976 2953 struct net *net = seq_file_net(seq); 2977 - struct udp_seq_afinfo *afinfo; 2978 2954 struct udp_table *udptable; 2979 2955 struct sock *sk; 2980 2956 2981 - if (state->bpf_seq_afinfo) 2982 - afinfo = state->bpf_seq_afinfo; 2983 - else 2984 - afinfo = pde_data(file_inode(seq->file)); 2985 - 2986 - udptable = udp_get_table_afinfo(afinfo, net); 2957 + udptable = udp_get_table_seq(seq, net); 2987 2958 2988 2959 for (state->bucket = start; state->bucket <= udptable->mask; 2989 2960 ++state->bucket) { ··· 2988 2971 2989 2972 spin_lock_bh(&hslot->lock); 2990 2973 sk_for_each(sk, &hslot->head) { 2991 - if (!net_eq(sock_net(sk), net)) 2992 - continue; 2993 - if (afinfo->family == AF_UNSPEC || 2994 - sk->sk_family == afinfo->family) 2974 + if (seq_sk_match(seq, sk)) 2995 2975 goto found; 2996 2976 } 2997 2977 spin_unlock_bh(&hslot->lock); ··· 3002 2988 { 3003 2989 struct udp_iter_state *state = seq->private; 3004 2990 struct net *net = seq_file_net(seq); 3005 - struct udp_seq_afinfo *afinfo; 3006 2991 struct udp_table *udptable; 3007 - 3008 - if (state->bpf_seq_afinfo) 3009 - afinfo = state->bpf_seq_afinfo; 3010 - else 3011 - afinfo = pde_data(file_inode(seq->file)); 3012 2992 3013 2993 do { 3014 2994 sk = sk_next(sk); 3015 - } while (sk && (!net_eq(sock_net(sk), net) || 3016 - (afinfo->family != AF_UNSPEC && 3017 - sk->sk_family != afinfo->family))); 2995 + } while (sk && !seq_sk_match(seq, sk)); 3018 2996 3019 2997 if (!sk) { 3020 - udptable = udp_get_table_afinfo(afinfo, net); 2998 + udptable = udp_get_table_seq(seq, net); 3021 2999 3022 3000 if (state->bucket <= udptable->mask) 3023 3001 spin_unlock_bh(&udptable->hash[state->bucket].lock); ··· 3055 3049 void udp_seq_stop(struct seq_file *seq, void *v) 3056 3050 { 3057 3051 struct udp_iter_state *state = seq->private; 3058 - struct udp_seq_afinfo *afinfo; 3059 3052 struct udp_table *udptable; 3060 3053 3061 - if (state->bpf_seq_afinfo) 3062 - afinfo = state->bpf_seq_afinfo; 3063 - else 3064 - afinfo = pde_data(file_inode(seq->file)); 3065 - 3066 - udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq)); 3054 + udptable = udp_get_table_seq(seq, seq_file_net(seq)); 3067 3055 3068 3056 if (state->bucket <= udptable->mask) 3069 3057 spin_unlock_bh(&udptable->hash[state->bucket].lock); ··· 3110 3110 int bucket __aligned(8); 3111 3111 }; 3112 3112 3113 + struct bpf_udp_iter_state { 3114 + struct udp_iter_state state; 3115 + unsigned int cur_sk; 3116 + unsigned int end_sk; 3117 + unsigned int max_sk; 3118 + int offset; 3119 + struct sock **batch; 3120 + bool st_bucket_done; 3121 + }; 3122 + 3123 + static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, 3124 + unsigned int new_batch_sz); 3125 + static struct sock *bpf_iter_udp_batch(struct seq_file *seq) 3126 + { 3127 + struct bpf_udp_iter_state *iter = seq->private; 3128 + struct udp_iter_state *state = &iter->state; 3129 + struct net *net = seq_file_net(seq); 3130 + struct udp_table *udptable; 3131 + unsigned int batch_sks = 0; 3132 + bool resized = false; 3133 + struct sock *sk; 3134 + 3135 + /* The current batch is done, so advance the bucket. */ 3136 + if (iter->st_bucket_done) { 3137 + state->bucket++; 3138 + iter->offset = 0; 3139 + } 3140 + 3141 + udptable = udp_get_table_seq(seq, net); 3142 + 3143 + again: 3144 + /* New batch for the next bucket. 3145 + * Iterate over the hash table to find a bucket with sockets matching 3146 + * the iterator attributes, and return the first matching socket from 3147 + * the bucket. The remaining matched sockets from the bucket are batched 3148 + * before releasing the bucket lock. This allows BPF programs that are 3149 + * called in seq_show to acquire the bucket lock if needed. 3150 + */ 3151 + iter->cur_sk = 0; 3152 + iter->end_sk = 0; 3153 + iter->st_bucket_done = false; 3154 + batch_sks = 0; 3155 + 3156 + for (; state->bucket <= udptable->mask; state->bucket++) { 3157 + struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; 3158 + 3159 + if (hlist_empty(&hslot2->head)) { 3160 + iter->offset = 0; 3161 + continue; 3162 + } 3163 + 3164 + spin_lock_bh(&hslot2->lock); 3165 + udp_portaddr_for_each_entry(sk, &hslot2->head) { 3166 + if (seq_sk_match(seq, sk)) { 3167 + /* Resume from the last iterated socket at the 3168 + * offset in the bucket before iterator was stopped. 3169 + */ 3170 + if (iter->offset) { 3171 + --iter->offset; 3172 + continue; 3173 + } 3174 + if (iter->end_sk < iter->max_sk) { 3175 + sock_hold(sk); 3176 + iter->batch[iter->end_sk++] = sk; 3177 + } 3178 + batch_sks++; 3179 + } 3180 + } 3181 + spin_unlock_bh(&hslot2->lock); 3182 + 3183 + if (iter->end_sk) 3184 + break; 3185 + 3186 + /* Reset the current bucket's offset before moving to the next bucket. */ 3187 + iter->offset = 0; 3188 + } 3189 + 3190 + /* All done: no batch made. */ 3191 + if (!iter->end_sk) 3192 + return NULL; 3193 + 3194 + if (iter->end_sk == batch_sks) { 3195 + /* Batching is done for the current bucket; return the first 3196 + * socket to be iterated from the batch. 3197 + */ 3198 + iter->st_bucket_done = true; 3199 + goto done; 3200 + } 3201 + if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) { 3202 + resized = true; 3203 + /* After allocating a larger batch, retry one more time to grab 3204 + * the whole bucket. 3205 + */ 3206 + state->bucket--; 3207 + goto again; 3208 + } 3209 + done: 3210 + return iter->batch[0]; 3211 + } 3212 + 3213 + static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3214 + { 3215 + struct bpf_udp_iter_state *iter = seq->private; 3216 + struct sock *sk; 3217 + 3218 + /* Whenever seq_next() is called, the iter->cur_sk is 3219 + * done with seq_show(), so unref the iter->cur_sk. 3220 + */ 3221 + if (iter->cur_sk < iter->end_sk) { 3222 + sock_put(iter->batch[iter->cur_sk++]); 3223 + ++iter->offset; 3224 + } 3225 + 3226 + /* After updating iter->cur_sk, check if there are more sockets 3227 + * available in the current bucket batch. 3228 + */ 3229 + if (iter->cur_sk < iter->end_sk) 3230 + sk = iter->batch[iter->cur_sk]; 3231 + else 3232 + /* Prepare a new batch. */ 3233 + sk = bpf_iter_udp_batch(seq); 3234 + 3235 + ++*pos; 3236 + return sk; 3237 + } 3238 + 3239 + static void *bpf_iter_udp_seq_start(struct seq_file *seq, loff_t *pos) 3240 + { 3241 + /* bpf iter does not support lseek, so it always 3242 + * continue from where it was stop()-ped. 3243 + */ 3244 + if (*pos) 3245 + return bpf_iter_udp_batch(seq); 3246 + 3247 + return SEQ_START_TOKEN; 3248 + } 3249 + 3113 3250 static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3114 3251 struct udp_sock *udp_sk, uid_t uid, int bucket) 3115 3252 { ··· 3267 3130 struct bpf_prog *prog; 3268 3131 struct sock *sk = v; 3269 3132 uid_t uid; 3133 + int ret; 3270 3134 3271 3135 if (v == SEQ_START_TOKEN) 3272 3136 return 0; 3273 3137 3138 + lock_sock(sk); 3139 + 3140 + if (unlikely(sk_unhashed(sk))) { 3141 + ret = SEQ_SKIP; 3142 + goto unlock; 3143 + } 3144 + 3274 3145 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3275 3146 meta.seq = seq; 3276 3147 prog = bpf_iter_get_info(&meta, false); 3277 - return udp_prog_seq_show(prog, &meta, v, uid, state->bucket); 3148 + ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket); 3149 + 3150 + unlock: 3151 + release_sock(sk); 3152 + return ret; 3153 + } 3154 + 3155 + static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter) 3156 + { 3157 + while (iter->cur_sk < iter->end_sk) 3158 + sock_put(iter->batch[iter->cur_sk++]); 3278 3159 } 3279 3160 3280 3161 static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) 3281 3162 { 3163 + struct bpf_udp_iter_state *iter = seq->private; 3282 3164 struct bpf_iter_meta meta; 3283 3165 struct bpf_prog *prog; 3284 3166 ··· 3308 3152 (void)udp_prog_seq_show(prog, &meta, v, 0, 0); 3309 3153 } 3310 3154 3311 - udp_seq_stop(seq, v); 3155 + if (iter->cur_sk < iter->end_sk) { 3156 + bpf_iter_udp_put_batch(iter); 3157 + iter->st_bucket_done = false; 3158 + } 3312 3159 } 3313 3160 3314 3161 static const struct seq_operations bpf_iter_udp_seq_ops = { 3315 - .start = udp_seq_start, 3316 - .next = udp_seq_next, 3162 + .start = bpf_iter_udp_seq_start, 3163 + .next = bpf_iter_udp_seq_next, 3317 3164 .stop = bpf_iter_udp_seq_stop, 3318 3165 .show = bpf_iter_udp_seq_show, 3319 3166 }; 3320 3167 #endif 3168 + 3169 + static unsigned short seq_file_family(const struct seq_file *seq) 3170 + { 3171 + const struct udp_seq_afinfo *afinfo; 3172 + 3173 + #ifdef CONFIG_BPF_SYSCALL 3174 + /* BPF iterator: bpf programs to filter sockets. */ 3175 + if (seq->op == &bpf_iter_udp_seq_ops) 3176 + return AF_UNSPEC; 3177 + #endif 3178 + 3179 + /* Proc fs iterator */ 3180 + afinfo = pde_data(file_inode(seq->file)); 3181 + return afinfo->family; 3182 + } 3321 3183 3322 3184 const struct seq_operations udp_seq_ops = { 3323 3185 .start = udp_seq_start, ··· 3545 3371 DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, 3546 3372 struct udp_sock *udp_sk, uid_t uid, int bucket) 3547 3373 3548 - static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) 3374 + static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, 3375 + unsigned int new_batch_sz) 3549 3376 { 3550 - struct udp_iter_state *st = priv_data; 3551 - struct udp_seq_afinfo *afinfo; 3552 - int ret; 3377 + struct sock **new_batch; 3553 3378 3554 - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); 3555 - if (!afinfo) 3379 + new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch), 3380 + GFP_USER | __GFP_NOWARN); 3381 + if (!new_batch) 3556 3382 return -ENOMEM; 3557 3383 3558 - afinfo->family = AF_UNSPEC; 3559 - afinfo->udp_table = NULL; 3560 - st->bpf_seq_afinfo = afinfo; 3384 + bpf_iter_udp_put_batch(iter); 3385 + kvfree(iter->batch); 3386 + iter->batch = new_batch; 3387 + iter->max_sk = new_batch_sz; 3388 + 3389 + return 0; 3390 + } 3391 + 3392 + #define INIT_BATCH_SZ 16 3393 + 3394 + static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) 3395 + { 3396 + struct bpf_udp_iter_state *iter = priv_data; 3397 + int ret; 3398 + 3561 3399 ret = bpf_iter_init_seq_net(priv_data, aux); 3562 3400 if (ret) 3563 - kfree(afinfo); 3401 + return ret; 3402 + 3403 + ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ); 3404 + if (ret) 3405 + bpf_iter_fini_seq_net(priv_data); 3406 + 3564 3407 return ret; 3565 3408 } 3566 3409 3567 3410 static void bpf_iter_fini_udp(void *priv_data) 3568 3411 { 3569 - struct udp_iter_state *st = priv_data; 3412 + struct bpf_udp_iter_state *iter = priv_data; 3570 3413 3571 - kfree(st->bpf_seq_afinfo); 3572 3414 bpf_iter_fini_seq_net(priv_data); 3415 + kvfree(iter->batch); 3573 3416 } 3574 3417 3575 3418 static const struct bpf_iter_seq_info udp_seq_info = { 3576 3419 .seq_ops = &bpf_iter_udp_seq_ops, 3577 3420 .init_seq_private = bpf_iter_init_udp, 3578 3421 .fini_seq_private = bpf_iter_fini_udp, 3579 - .seq_priv_size = sizeof(struct udp_iter_state), 3422 + .seq_priv_size = sizeof(struct bpf_udp_iter_state), 3580 3423 }; 3581 3424 3582 3425 static struct bpf_iter_reg udp_reg_info = { ··· 3601 3410 .ctx_arg_info_size = 1, 3602 3411 .ctx_arg_info = { 3603 3412 { offsetof(struct bpf_iter__udp, udp_sk), 3604 - PTR_TO_BTF_ID_OR_NULL }, 3413 + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, 3605 3414 }, 3606 3415 .seq_info = &udp_seq_info, 3607 3416 };
+4 -4
tools/bpf/bpftool/Documentation/bpftool-map.rst
··· 28 28 | **bpftool** **map** { **show** | **list** } [*MAP*] 29 29 | **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ 30 30 | **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \ 31 - | [**dev** *NAME*] 31 + | [**offload_dev** *NAME*] 32 32 | **bpftool** **map dump** *MAP* 33 33 | **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] 34 34 | **bpftool** **map lookup** *MAP* [**key** *DATA*] ··· 73 73 maps. On such kernels bpftool will automatically emit this 74 74 information as well. 75 75 76 - **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*] 76 + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**offload_dev** *NAME*] 77 77 Create a new map with given parameters and pin it to *bpffs* 78 78 as *FILE*. 79 79 ··· 86 86 kernel needs it to collect metadata related to the inner maps 87 87 that the new map will work with. 88 88 89 - Keyword **dev** expects a network interface name, and is used 90 - to request hardware offload for the map. 89 + Keyword **offload_dev** expects a network interface name, 90 + and is used to request hardware offload for the map. 91 91 92 92 **bpftool map dump** *MAP* 93 93 Dump all entries in a given *MAP*. In case of **name**,
+7 -4
tools/bpf/bpftool/Documentation/bpftool-prog.rst
··· 31 31 | **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] 32 32 | **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] 33 33 | **bpftool** **prog pin** *PROG* *FILE* 34 - | **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 34 + | **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 35 35 | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] 36 36 | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] 37 37 | **bpftool** **prog tracelog** ··· 129 129 contain a dot character ('.'), which is reserved for future 130 130 extensions of *bpffs*. 131 131 132 - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 132 + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 133 133 Load bpf program(s) from binary *OBJ* and pin as *PATH*. 134 134 **bpftool prog load** pins only the first program from the 135 135 *OBJ* as *PATH*. **bpftool prog loadall** pins all programs ··· 143 143 to be replaced in the ELF file counting from 0, while *NAME* 144 144 allows to replace a map by name. *MAP* specifies the map to 145 145 use, referring to it by **id** or through a **pinned** file. 146 - If **dev** *NAME* is specified program will be loaded onto 147 - given networking device (offload). 146 + If **offload_dev** *NAME* is specified program will be loaded 147 + onto given networking device (offload). 148 + If **xdpmeta_dev** *NAME* is specified program will become 149 + device-bound without offloading, this facilitates access 150 + to XDP metadata. 148 151 Optional **pinmaps** argument can be provided to pin all 149 152 maps under *MAP_DIR* directory. 150 153
+4 -3
tools/bpf/bpftool/bash-completion/bpftool
··· 278 278 _bpftool_get_prog_tags 279 279 return 0 280 280 ;; 281 - dev) 281 + dev|offload_dev|xdpmeta_dev) 282 282 _sysfs_get_netdevs 283 283 return 0 284 284 ;; ··· 508 508 ;; 509 509 *) 510 510 COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) 511 - _bpftool_once_attr 'type dev pinmaps autoattach' 511 + _bpftool_once_attr 'type pinmaps autoattach' 512 + _bpftool_one_of_list 'offload_dev xdpmeta_dev' 512 513 return 0 513 514 ;; 514 515 esac ··· 734 733 esac 735 734 ;; 736 735 *) 737 - _bpftool_once_attr 'type key value entries name flags dev' 736 + _bpftool_once_attr 'type key value entries name flags offload_dev' 738 737 if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then 739 738 _bpftool_once_attr 'inner_map' 740 739 fi
+6 -3
tools/bpf/bpftool/common.c
··· 68 68 va_end(ap); 69 69 } 70 70 71 - static bool is_bpffs(char *path) 71 + static bool is_bpffs(const char *path) 72 72 { 73 73 struct statfs st_fs; 74 74 ··· 244 244 return fd; 245 245 } 246 246 247 - int mount_bpffs_for_pin(const char *name) 247 + int mount_bpffs_for_pin(const char *name, bool is_dir) 248 248 { 249 249 char err_str[ERR_MAX_LEN]; 250 250 char *file; 251 251 char *dir; 252 252 int err = 0; 253 + 254 + if (is_dir && is_bpffs(name)) 255 + return err; 253 256 254 257 file = malloc(strlen(name) + 1); 255 258 if (!file) { ··· 289 286 { 290 287 int err; 291 288 292 - err = mount_bpffs_for_pin(name); 289 + err = mount_bpffs_for_pin(name, false); 293 290 if (err) 294 291 return err; 295 292
+1 -1
tools/bpf/bpftool/iter.c
··· 76 76 goto close_obj; 77 77 } 78 78 79 - err = mount_bpffs_for_pin(path); 79 + err = mount_bpffs_for_pin(path, false); 80 80 if (err) 81 81 goto close_link; 82 82
+6
tools/bpf/bpftool/link.c
··· 195 195 196 196 show_link_attach_type_json(info->tracing.attach_type, 197 197 json_wtr); 198 + jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id); 199 + jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id); 198 200 break; 199 201 case BPF_LINK_TYPE_CGROUP: 200 202 jsonw_lluint_field(json_wtr, "cgroup_id", ··· 377 375 printf("\n\tprog_type %u ", prog_info.type); 378 376 379 377 show_link_attach_type_plain(info->tracing.attach_type); 378 + if (info->tracing.target_obj_id || info->tracing.target_btf_id) 379 + printf("\n\ttarget_obj_id %u target_btf_id %u ", 380 + info->tracing.target_obj_id, 381 + info->tracing.target_btf_id); 380 382 break; 381 383 case BPF_LINK_TYPE_CGROUP: 382 384 printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
+1 -1
tools/bpf/bpftool/main.h
··· 142 142 char *get_fdinfo(int fd, const char *key); 143 143 int open_obj_pinned(const char *path, bool quiet); 144 144 int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); 145 - int mount_bpffs_for_pin(const char *name); 145 + int mount_bpffs_for_pin(const char *name, bool is_dir); 146 146 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); 147 147 int do_pin_fd(int fd, const char *name); 148 148
+6 -1
tools/bpf/bpftool/map.c
··· 1287 1287 "flags")) 1288 1288 goto exit; 1289 1289 } else if (is_prefix(*argv, "dev")) { 1290 + p_info("Warning: 'bpftool map create [...] dev <ifname>' syntax is deprecated.\n" 1291 + "Going further, please use 'offload_dev <ifname>' to request hardware offload for the map."); 1292 + goto offload_dev; 1293 + } else if (is_prefix(*argv, "offload_dev")) { 1294 + offload_dev: 1290 1295 NEXT_ARG(); 1291 1296 1292 1297 if (attr.map_ifindex) { ··· 1436 1431 "Usage: %1$s %2$s { show | list } [MAP]\n" 1437 1432 " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n" 1438 1433 " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" 1439 - " [inner_map MAP] [dev NAME]\n" 1434 + " [inner_map MAP] [offload_dev NAME]\n" 1440 1435 " %1$s %2$s dump MAP\n" 1441 1436 " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" 1442 1437 " %1$s %2$s lookup MAP [key DATA]\n"
+44 -9
tools/bpf/bpftool/prog.c
··· 1517 1517 struct bpf_program *prog = NULL, *pos; 1518 1518 unsigned int old_map_fds = 0; 1519 1519 const char *pinmaps = NULL; 1520 + __u32 xdpmeta_ifindex = 0; 1521 + __u32 offload_ifindex = 0; 1520 1522 bool auto_attach = false; 1521 1523 struct bpf_object *obj; 1522 1524 struct bpf_map *map; 1523 1525 const char *pinfile; 1524 1526 unsigned int i, j; 1525 - __u32 ifindex = 0; 1526 1527 const char *file; 1527 1528 int idx, err; 1528 1529 ··· 1615 1614 map_replace[old_map_fds].fd = fd; 1616 1615 old_map_fds++; 1617 1616 } else if (is_prefix(*argv, "dev")) { 1617 + p_info("Warning: 'bpftool prog load [...] dev <ifname>' syntax is deprecated.\n" 1618 + "Going further, please use 'offload_dev <ifname>' to offload program to device.\n" 1619 + "For applications using XDP hints only, use 'xdpmeta_dev <ifname>'."); 1620 + goto offload_dev; 1621 + } else if (is_prefix(*argv, "offload_dev")) { 1622 + offload_dev: 1618 1623 NEXT_ARG(); 1619 1624 1620 - if (ifindex) { 1621 - p_err("offload device already specified"); 1625 + if (offload_ifindex) { 1626 + p_err("offload_dev already specified"); 1627 + goto err_free_reuse_maps; 1628 + } else if (xdpmeta_ifindex) { 1629 + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); 1622 1630 goto err_free_reuse_maps; 1623 1631 } 1624 1632 if (!REQ_ARGS(1)) 1625 1633 goto err_free_reuse_maps; 1626 1634 1627 - ifindex = if_nametoindex(*argv); 1628 - if (!ifindex) { 1635 + offload_ifindex = if_nametoindex(*argv); 1636 + if (!offload_ifindex) { 1637 + p_err("unrecognized netdevice '%s': %s", 1638 + *argv, strerror(errno)); 1639 + goto err_free_reuse_maps; 1640 + } 1641 + NEXT_ARG(); 1642 + } else if (is_prefix(*argv, "xdpmeta_dev")) { 1643 + NEXT_ARG(); 1644 + 1645 + if (xdpmeta_ifindex) { 1646 + p_err("xdpmeta_dev already specified"); 1647 + goto err_free_reuse_maps; 1648 + } else if (offload_ifindex) { 1649 + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); 1650 + goto err_free_reuse_maps; 1651 + } 1652 + if (!REQ_ARGS(1)) 1653 + goto err_free_reuse_maps; 1654 + 1655 + xdpmeta_ifindex = if_nametoindex(*argv); 1656 + if (!xdpmeta_ifindex) { 1629 1657 p_err("unrecognized netdevice '%s': %s", 1630 1658 *argv, strerror(errno)); 1631 1659 goto err_free_reuse_maps; ··· 1701 1671 goto err_close_obj; 1702 1672 } 1703 1673 1704 - bpf_program__set_ifindex(pos, ifindex); 1674 + if (prog_type == BPF_PROG_TYPE_XDP && xdpmeta_ifindex) { 1675 + bpf_program__set_flags(pos, BPF_F_XDP_DEV_BOUND_ONLY); 1676 + bpf_program__set_ifindex(pos, xdpmeta_ifindex); 1677 + } else { 1678 + bpf_program__set_ifindex(pos, offload_ifindex); 1679 + } 1705 1680 if (bpf_program__type(pos) != prog_type) 1706 1681 bpf_program__set_type(pos, prog_type); 1707 1682 bpf_program__set_expected_attach_type(pos, expected_attach_type); ··· 1744 1709 idx = 0; 1745 1710 bpf_object__for_each_map(map, obj) { 1746 1711 if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 1747 - bpf_map__set_ifindex(map, ifindex); 1712 + bpf_map__set_ifindex(map, offload_ifindex); 1748 1713 1749 1714 if (j < old_map_fds && idx == map_replace[j].idx) { 1750 1715 err = bpf_map__reuse_fd(map, map_replace[j++].fd); ··· 1774 1739 goto err_close_obj; 1775 1740 } 1776 1741 1777 - err = mount_bpffs_for_pin(pinfile); 1742 + err = mount_bpffs_for_pin(pinfile, !first_prog_only); 1778 1743 if (err) 1779 1744 goto err_close_obj; 1780 1745 ··· 2451 2416 " %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n" 2452 2417 " %1$s %2$s pin PROG FILE\n" 2453 2418 " %1$s %2$s { load | loadall } OBJ PATH \\\n" 2454 - " [type TYPE] [dev NAME] \\\n" 2419 + " [type TYPE] [{ offload_dev | xdpmeta_dev } NAME] \\\n" 2455 2420 " [map { idx IDX | name NAME } MAP]\\\n" 2456 2421 " [pinmaps MAP_DIR]\n" 2457 2422 " [autoattach]\n"
+1 -1
tools/bpf/bpftool/struct_ops.c
··· 509 509 if (argc == 1) 510 510 linkdir = GET_ARG(); 511 511 512 - if (linkdir && mount_bpffs_for_pin(linkdir)) { 512 + if (linkdir && mount_bpffs_for_pin(linkdir, true)) { 513 513 p_err("can't mount bpffs for pinning"); 514 514 return -1; 515 515 }
+10
tools/include/uapi/linux/bpf.h
··· 1272 1272 1273 1273 /* Create a map that will be registered/unregesitered by the backed bpf_link */ 1274 1274 BPF_F_LINK = (1U << 13), 1275 + 1276 + /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ 1277 + BPF_F_PATH_FD = (1U << 14), 1275 1278 }; 1276 1279 1277 1280 /* Flags for BPF_PROG_QUERY. */ ··· 1423 1420 __aligned_u64 pathname; 1424 1421 __u32 bpf_fd; 1425 1422 __u32 file_flags; 1423 + /* Same as dirfd in openat() syscall; see openat(2) 1424 + * manpage for details of path FD and pathname semantics; 1425 + * path_fd should accompanied by BPF_F_PATH_FD flag set in 1426 + * file_flags field, otherwise it should be set to zero; 1427 + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. 1428 + */ 1429 + __s32 path_fd; 1426 1430 }; 1427 1431 1428 1432 struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+14 -3
tools/lib/bpf/bpf.c
··· 572 572 (void *)keys, (void *)values, count, opts); 573 573 } 574 574 575 - int bpf_obj_pin(int fd, const char *pathname) 575 + int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts) 576 576 { 577 - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); 577 + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); 578 578 union bpf_attr attr; 579 579 int ret; 580 580 581 + if (!OPTS_VALID(opts, bpf_obj_pin_opts)) 582 + return libbpf_err(-EINVAL); 583 + 581 584 memset(&attr, 0, attr_sz); 585 + attr.path_fd = OPTS_GET(opts, path_fd, 0); 582 586 attr.pathname = ptr_to_u64((void *)pathname); 587 + attr.file_flags = OPTS_GET(opts, file_flags, 0); 583 588 attr.bpf_fd = fd; 584 589 585 590 ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz); 586 591 return libbpf_err_errno(ret); 592 + } 593 + 594 + int bpf_obj_pin(int fd, const char *pathname) 595 + { 596 + return bpf_obj_pin_opts(fd, pathname, NULL); 587 597 } 588 598 589 599 int bpf_obj_get(const char *pathname) ··· 603 593 604 594 int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) 605 595 { 606 - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); 596 + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); 607 597 union bpf_attr attr; 608 598 int fd; 609 599 ··· 611 601 return libbpf_err(-EINVAL); 612 602 613 603 memset(&attr, 0, attr_sz); 604 + attr.path_fd = OPTS_GET(opts, path_fd, 0); 614 605 attr.pathname = ptr_to_u64((void *)pathname); 615 606 attr.file_flags = OPTS_GET(opts, file_flags, 0); 616 607
+16 -2
tools/lib/bpf/bpf.h
··· 284 284 __u32 *count, 285 285 const struct bpf_map_batch_opts *opts); 286 286 287 + struct bpf_obj_pin_opts { 288 + size_t sz; /* size of this struct for forward/backward compatibility */ 289 + 290 + __u32 file_flags; 291 + int path_fd; 292 + 293 + size_t :0; 294 + }; 295 + #define bpf_obj_pin_opts__last_field path_fd 296 + 297 + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 298 + LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname, 299 + const struct bpf_obj_pin_opts *opts); 300 + 287 301 struct bpf_obj_get_opts { 288 302 size_t sz; /* size of this struct for forward/backward compatibility */ 289 303 290 304 __u32 file_flags; 305 + int path_fd; 291 306 292 307 size_t :0; 293 308 }; 294 - #define bpf_obj_get_opts__last_field file_flags 309 + #define bpf_obj_get_opts__last_field path_fd 295 310 296 - LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 297 311 LIBBPF_API int bpf_obj_get(const char *pathname); 298 312 LIBBPF_API int bpf_obj_get_opts(const char *pathname, 299 313 const struct bpf_obj_get_opts *opts);
+1 -1
tools/lib/bpf/btf.c
··· 1064 1064 int err = 0; 1065 1065 long sz; 1066 1066 1067 - f = fopen(path, "rb"); 1067 + f = fopen(path, "rbe"); 1068 1068 if (!f) { 1069 1069 err = -errno; 1070 1070 goto err_out;
+7 -7
tools/lib/bpf/gen_loader.c
··· 703 703 /* obtain fd in BPF_REG_9 */ 704 704 emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); 705 705 emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); 706 - /* jump to fd_array store if fd denotes module BTF */ 706 + /* load fd_array slot pointer */ 707 + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 708 + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); 709 + /* store BTF fd in slot, 0 for vmlinux */ 710 + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); 711 + /* jump to insn[insn_idx].off store if fd denotes module BTF */ 707 712 emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); 708 713 /* set the default value for off */ 709 714 emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); 710 715 /* skip BTF fd store for vmlinux BTF */ 711 - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); 712 - /* load fd_array slot pointer */ 713 - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 714 - 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); 715 - /* store BTF fd in slot */ 716 - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); 716 + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); 717 717 /* store index into insn[insn_idx].off */ 718 718 emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); 719 719 log:
+135 -19
tools/lib/bpf/libbpf.c
··· 1500 1500 return map; 1501 1501 } 1502 1502 1503 - static size_t bpf_map_mmap_sz(const struct bpf_map *map) 1503 + static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1504 1504 { 1505 - long page_sz = sysconf(_SC_PAGE_SIZE); 1505 + const long page_sz = sysconf(_SC_PAGE_SIZE); 1506 1506 size_t map_sz; 1507 1507 1508 - map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; 1508 + map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1509 1509 map_sz = roundup(map_sz, page_sz); 1510 1510 return map_sz; 1511 + } 1512 + 1513 + static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1514 + { 1515 + void *mmaped; 1516 + 1517 + if (!map->mmaped) 1518 + return -EINVAL; 1519 + 1520 + if (old_sz == new_sz) 1521 + return 0; 1522 + 1523 + mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1524 + if (mmaped == MAP_FAILED) 1525 + return -errno; 1526 + 1527 + memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1528 + munmap(map->mmaped, old_sz); 1529 + map->mmaped = mmaped; 1530 + return 0; 1511 1531 } 1512 1532 1513 1533 static char *internal_map_name(struct bpf_object *obj, const char *real_name) ··· 1628 1608 { 1629 1609 struct bpf_map_def *def; 1630 1610 struct bpf_map *map; 1611 + size_t mmap_sz; 1631 1612 int err; 1632 1613 1633 1614 map = bpf_object__add_map(obj); ··· 1663 1642 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1664 1643 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1665 1644 1666 - map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, 1645 + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 1646 + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1667 1647 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1668 1648 if (map->mmaped == MAP_FAILED) { 1669 1649 err = -errno; ··· 4351 4329 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4352 4330 memset(info, 0, sizeof(*info)); 4353 4331 4354 - fp = fopen(file, "r"); 4332 + fp = fopen(file, "re"); 4355 4333 if (!fp) { 4356 4334 err = -errno; 4357 4335 pr_warn("failed to open %s: %d. No procfs support?\n", file, ··· 4414 4392 if (!new_name) 4415 4393 return libbpf_err(-errno); 4416 4394 4417 - new_fd = open("/", O_RDONLY | O_CLOEXEC); 4395 + /* 4396 + * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4397 + * This is similar to what we do in ensure_good_fd(), but without 4398 + * closing original FD. 4399 + */ 4400 + new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4418 4401 if (new_fd < 0) { 4419 4402 err = -errno; 4420 4403 goto err_free_new_name; 4421 - } 4422 - 4423 - new_fd = dup3(fd, new_fd, O_CLOEXEC); 4424 - if (new_fd < 0) { 4425 - err = -errno; 4426 - goto err_close_new_fd; 4427 4404 } 4428 4405 4429 4406 err = zclose(map->fd); ··· 7454 7433 int ret, err = 0; 7455 7434 FILE *f; 7456 7435 7457 - f = fopen("/proc/kallsyms", "r"); 7436 + f = fopen("/proc/kallsyms", "re"); 7458 7437 if (!f) { 7459 7438 err = -errno; 7460 7439 pr_warn("failed to open /proc/kallsyms: %d\n", err); ··· 8315 8294 map->init_slots_sz = 0; 8316 8295 8317 8296 if (map->mmaped) { 8318 - munmap(map->mmaped, bpf_map_mmap_sz(map)); 8297 + size_t mmap_sz; 8298 + 8299 + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 8300 + munmap(map->mmaped, mmap_sz); 8319 8301 map->mmaped = NULL; 8320 8302 } 8321 8303 ··· 9436 9412 return map->def.value_size; 9437 9413 } 9438 9414 9415 + static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 9416 + { 9417 + struct btf *btf; 9418 + struct btf_type *datasec_type, *var_type; 9419 + struct btf_var_secinfo *var; 9420 + const struct btf_type *array_type; 9421 + const struct btf_array *array; 9422 + int vlen, element_sz, new_array_id; 9423 + __u32 nr_elements; 9424 + 9425 + /* check btf existence */ 9426 + btf = bpf_object__btf(map->obj); 9427 + if (!btf) 9428 + return -ENOENT; 9429 + 9430 + /* verify map is datasec */ 9431 + datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 9432 + if (!btf_is_datasec(datasec_type)) { 9433 + pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 9434 + bpf_map__name(map)); 9435 + return -EINVAL; 9436 + } 9437 + 9438 + /* verify datasec has at least one var */ 9439 + vlen = btf_vlen(datasec_type); 9440 + if (vlen == 0) { 9441 + pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 9442 + bpf_map__name(map)); 9443 + return -EINVAL; 9444 + } 9445 + 9446 + /* verify last var in the datasec is an array */ 9447 + var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9448 + var_type = btf_type_by_id(btf, var->type); 9449 + array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 9450 + if (!btf_is_array(array_type)) { 9451 + pr_warn("map '%s': cannot be resized, last var must be an array\n", 9452 + bpf_map__name(map)); 9453 + return -EINVAL; 9454 + } 9455 + 9456 + /* verify request size aligns with array */ 9457 + array = btf_array(array_type); 9458 + element_sz = btf__resolve_size(btf, array->type); 9459 + if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 9460 + pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 9461 + bpf_map__name(map), element_sz, size); 9462 + return -EINVAL; 9463 + } 9464 + 9465 + /* create a new array based on the existing array, but with new length */ 9466 + nr_elements = (size - var->offset) / element_sz; 9467 + new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 9468 + if (new_array_id < 0) 9469 + return new_array_id; 9470 + 9471 + /* adding a new btf type invalidates existing pointers to btf objects, 9472 + * so refresh pointers before proceeding 9473 + */ 9474 + datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 9475 + var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9476 + var_type = btf_type_by_id(btf, var->type); 9477 + 9478 + /* finally update btf info */ 9479 + datasec_type->size = size; 9480 + var->size = size - var->offset; 9481 + var_type->type = new_array_id; 9482 + 9483 + return 0; 9484 + } 9485 + 9439 9486 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 9440 9487 { 9441 9488 if (map->fd >= 0) 9442 9489 return libbpf_err(-EBUSY); 9490 + 9491 + if (map->mmaped) { 9492 + int err; 9493 + size_t mmap_old_sz, mmap_new_sz; 9494 + 9495 + mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 9496 + mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); 9497 + err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 9498 + if (err) { 9499 + pr_warn("map '%s': failed to resize memory-mapped region: %d\n", 9500 + bpf_map__name(map), err); 9501 + return err; 9502 + } 9503 + err = map_btf_datasec_resize(map, size); 9504 + if (err && err != -ENOENT) { 9505 + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", 9506 + bpf_map__name(map), err); 9507 + map->btf_value_type_id = 0; 9508 + map->btf_key_type_id = 0; 9509 + } 9510 + } 9511 + 9443 9512 map->def.value_size = size; 9444 9513 return 0; 9445 9514 } ··· 9558 9441 return 0; 9559 9442 } 9560 9443 9561 - const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) 9444 + void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) 9562 9445 { 9563 9446 if (!map->mmaped) 9564 9447 return NULL; ··· 10074 9957 int err, ret; 10075 9958 FILE *f; 10076 9959 10077 - f = fopen(file, "r"); 9960 + f = fopen(file, "re"); 10078 9961 if (!f) { 10079 9962 err = -errno; 10080 9963 pr_debug("failed to open '%s': %s\n", file, ··· 12810 12693 12811 12694 for (i = 0; i < s->map_cnt; i++) { 12812 12695 struct bpf_map *map = *s->maps[i].map; 12813 - size_t mmap_sz = bpf_map_mmap_sz(map); 12696 + size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 12814 12697 int prot, map_fd = bpf_map__fd(map); 12815 12698 void **mmaped = s->maps[i].mmaped; 12816 12699 ··· 12837 12720 * as per normal clean up procedure, so we don't need to worry 12838 12721 * about it from skeleton's clean up perspective. 12839 12722 */ 12840 - *mmaped = mmap(map->mmaped, mmap_sz, prot, 12841 - MAP_SHARED | MAP_FIXED, map_fd, 0); 12723 + *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); 12842 12724 if (*mmaped == MAP_FAILED) { 12843 12725 err = -errno; 12844 12726 *mmaped = NULL;
+16 -2
tools/lib/bpf/libbpf.h
··· 869 869 /* get/set map key size */ 870 870 LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); 871 871 LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); 872 - /* get/set map value size */ 872 + /* get map value size */ 873 873 LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); 874 + /** 875 + * @brief **bpf_map__set_value_size()** sets map value size. 876 + * @param map the BPF map instance 877 + * @return 0, on success; negative error, otherwise 878 + * 879 + * There is a special case for maps with associated memory-mapped regions, like 880 + * the global data section maps (bss, data, rodata). When this function is used 881 + * on such a map, the mapped region is resized. Afterward, an attempt is made to 882 + * adjust the corresponding BTF info. This attempt is best-effort and can only 883 + * succeed if the last variable of the data section map is an array. The array 884 + * BTF type is replaced by a new BTF array type with a different length. 885 + * Any previously existing pointers returned from bpf_map__initial_value() or 886 + * corresponding data section skeleton pointer must be reinitialized. 887 + */ 874 888 LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); 875 889 /* get map key/value BTF type IDs */ 876 890 LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); ··· 898 884 899 885 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, 900 886 const void *data, size_t size); 901 - LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); 887 + LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); 902 888 903 889 /** 904 890 * @brief **bpf_map__is_internal()** tells the caller whether or not the
+5
tools/lib/bpf/libbpf.map
··· 391 391 bpf_map_get_info_by_fd; 392 392 bpf_prog_get_info_by_fd; 393 393 } LIBBPF_1.1.0; 394 + 395 + LIBBPF_1.3.0 { 396 + global: 397 + bpf_obj_pin_opts; 398 + } LIBBPF_1.2.0;
+1 -1
tools/lib/bpf/libbpf_probes.c
··· 38 38 if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0) 39 39 return 0; 40 40 41 - f = fopen(ubuntu_kver_file, "r"); 41 + f = fopen(ubuntu_kver_file, "re"); 42 42 if (!f) 43 43 return 0; 44 44
+1 -1
tools/lib/bpf/libbpf_version.h
··· 4 4 #define __LIBBPF_VERSION_H 5 5 6 6 #define LIBBPF_MAJOR_VERSION 1 7 - #define LIBBPF_MINOR_VERSION 2 7 + #define LIBBPF_MINOR_VERSION 3 8 8 9 9 #endif /* __LIBBPF_VERSION_H */
+2 -3
tools/lib/bpf/usdt.c
··· 466 466 467 467 proceed: 468 468 sprintf(line, "/proc/%d/maps", pid); 469 - f = fopen(line, "r"); 469 + f = fopen(line, "re"); 470 470 if (!f) { 471 471 err = -errno; 472 472 pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", ··· 954 954 spec_map_fd = bpf_map__fd(man->specs_map); 955 955 ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); 956 956 957 - /* TODO: perform path resolution similar to uprobe's */ 958 - fd = open(path, O_RDONLY); 957 + fd = open(path, O_RDONLY | O_CLOEXEC); 959 958 if (fd < 0) { 960 959 err = -errno; 961 960 pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err);
+1 -2
tools/testing/selftests/bpf/Makefile
··· 88 88 xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ 89 89 xdp_features 90 90 91 - TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file 92 - TEST_GEN_FILES += liburandom_read.so 91 + TEST_GEN_FILES += liburandom_read.so urandom_read sign-file 93 92 94 93 # Emit succinct information message describing current building step 95 94 # $1 - generic step name (e.g., CC, LINK, etc);
+2 -2
tools/testing/selftests/bpf/bpf_kfuncs.h
··· 36 36 void *buffer, __u32 buffer__szk) __ksym; 37 37 38 38 extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym; 39 - extern int bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; 40 - extern int bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; 39 + extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; 40 + extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; 41 41 extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; 42 42 extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; 43 43
+166
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
··· 9 9 #include <linux/sysfs.h> 10 10 #include <linux/tracepoint.h> 11 11 #include "bpf_testmod.h" 12 + #include "bpf_testmod_kfunc.h" 12 13 13 14 #define CREATE_TRACE_POINTS 14 15 #include "bpf_testmod-events.h" ··· 290 289 .set = &bpf_testmod_common_kfunc_ids, 291 290 }; 292 291 292 + __bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) 293 + { 294 + return a + b + c + d; 295 + } 296 + 297 + __bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) 298 + { 299 + return a + b; 300 + } 301 + 302 + __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) 303 + { 304 + return sk; 305 + } 306 + 307 + __bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) 308 + { 309 + /* Provoke the compiler to assume that the caller has sign-extended a, 310 + * b and c on platforms where this is required (e.g. s390x). 311 + */ 312 + return (long)a + (long)b + (long)c + d; 313 + } 314 + 315 + static struct prog_test_ref_kfunc prog_test_struct = { 316 + .a = 42, 317 + .b = 108, 318 + .next = &prog_test_struct, 319 + .cnt = REFCOUNT_INIT(1), 320 + }; 321 + 322 + __bpf_kfunc struct prog_test_ref_kfunc * 323 + bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) 324 + { 325 + refcount_inc(&prog_test_struct.cnt); 326 + return &prog_test_struct; 327 + } 328 + 329 + __bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) 330 + { 331 + WARN_ON_ONCE(1); 332 + } 333 + 334 + __bpf_kfunc struct prog_test_member * 335 + bpf_kfunc_call_memb_acquire(void) 336 + { 337 + WARN_ON_ONCE(1); 338 + return NULL; 339 + } 340 + 341 + __bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) 342 + { 343 + WARN_ON_ONCE(1); 344 + } 345 + 346 + static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) 347 + { 348 + if (size > 2 * sizeof(int)) 349 + return NULL; 350 + 351 + return (int *)p; 352 + } 353 + 354 + __bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, 355 + const int rdwr_buf_size) 356 + { 357 + return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); 358 + } 359 + 360 + __bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, 361 + const int rdonly_buf_size) 362 + { 363 + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 364 + } 365 + 366 + /* the next 2 ones can't be really used for testing expect to ensure 367 + * that the verifier rejects the call. 368 + * Acquire functions must return struct pointers, so these ones are 369 + * failing. 370 + */ 371 + __bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, 372 + const int rdonly_buf_size) 373 + { 374 + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 375 + } 376 + 377 + __bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) 378 + { 379 + } 380 + 381 + __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) 382 + { 383 + } 384 + 385 + __bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) 386 + { 387 + } 388 + 389 + __bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) 390 + { 391 + } 392 + 393 + __bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) 394 + { 395 + } 396 + 397 + __bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) 398 + { 399 + } 400 + 401 + __bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) 402 + { 403 + } 404 + 405 + __bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) 406 + { 407 + } 408 + 409 + __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) 410 + { 411 + } 412 + 413 + __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) 414 + { 415 + } 416 + 417 + __bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) 418 + { 419 + /* p != NULL, but p->cnt could be 0 */ 420 + } 421 + 422 + __bpf_kfunc void bpf_kfunc_call_test_destructive(void) 423 + { 424 + } 425 + 426 + __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) 427 + { 428 + return arg; 429 + } 430 + 293 431 BTF_SET8_START(bpf_testmod_check_kfunc_ids) 294 432 BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) 433 + BTF_ID_FLAGS(func, bpf_kfunc_call_test1) 434 + BTF_ID_FLAGS(func, bpf_kfunc_call_test2) 435 + BTF_ID_FLAGS(func, bpf_kfunc_call_test3) 436 + BTF_ID_FLAGS(func, bpf_kfunc_call_test4) 437 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) 438 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) 439 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) 440 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) 441 + BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) 442 + BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) 443 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) 444 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) 445 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) 446 + BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) 447 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) 448 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) 449 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) 450 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) 451 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) 452 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) 453 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) 454 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) 455 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) 456 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) 295 457 BTF_SET8_END(bpf_testmod_check_kfunc_ids) 296 458 297 459 static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { ··· 476 312 477 313 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); 478 314 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); 315 + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); 316 + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); 479 317 if (ret < 0) 480 318 return ret; 481 319 if (bpf_fentry_test1(0) < 0)
+100
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef _BPF_TESTMOD_KFUNC_H 4 + #define _BPF_TESTMOD_KFUNC_H 5 + 6 + #ifndef __KERNEL__ 7 + #include <vmlinux.h> 8 + #include <bpf/bpf_helpers.h> 9 + #else 10 + #define __ksym 11 + struct prog_test_member1 { 12 + int a; 13 + }; 14 + 15 + struct prog_test_member { 16 + struct prog_test_member1 m; 17 + int c; 18 + }; 19 + 20 + struct prog_test_ref_kfunc { 21 + int a; 22 + int b; 23 + struct prog_test_member memb; 24 + struct prog_test_ref_kfunc *next; 25 + refcount_t cnt; 26 + }; 27 + #endif 28 + 29 + struct prog_test_pass1 { 30 + int x0; 31 + struct { 32 + int x1; 33 + struct { 34 + int x2; 35 + struct { 36 + int x3; 37 + }; 38 + }; 39 + }; 40 + }; 41 + 42 + struct prog_test_pass2 { 43 + int len; 44 + short arr1[4]; 45 + struct { 46 + char arr2[4]; 47 + unsigned long arr3[8]; 48 + } x; 49 + }; 50 + 51 + struct prog_test_fail1 { 52 + void *p; 53 + int x; 54 + }; 55 + 56 + struct prog_test_fail2 { 57 + int x8; 58 + struct prog_test_pass1 x; 59 + }; 60 + 61 + struct prog_test_fail3 { 62 + int len; 63 + char arr1[2]; 64 + char arr2[]; 65 + }; 66 + 67 + struct prog_test_ref_kfunc * 68 + bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; 69 + void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 70 + void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; 71 + 72 + void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; 73 + int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; 74 + int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 75 + int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 76 + void bpf_kfunc_call_int_mem_release(int *p) __ksym; 77 + 78 + /* The bpf_kfunc_call_test_static_unused_arg is defined as static, 79 + * but bpf program compilation needs to see it as global symbol. 80 + */ 81 + #ifndef __KERNEL__ 82 + u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; 83 + #endif 84 + 85 + void bpf_testmod_test_mod_kfunc(int i) __ksym; 86 + 87 + __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 88 + __u32 c, __u64 d) __ksym; 89 + int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; 90 + struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; 91 + long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; 92 + 93 + void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; 94 + void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; 95 + void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; 96 + void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; 97 + 98 + void bpf_kfunc_call_test_destructive(void) __ksym; 99 + 100 + #endif /* _BPF_TESTMOD_KFUNC_H */
+23
tools/testing/selftests/bpf/network_helpers.c
··· 427 427 close(token->orig_netns_fd); 428 428 free(token); 429 429 } 430 + 431 + int get_socket_local_port(int sock_fd) 432 + { 433 + struct sockaddr_storage addr; 434 + socklen_t addrlen = sizeof(addr); 435 + int err; 436 + 437 + err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); 438 + if (err < 0) 439 + return err; 440 + 441 + if (addr.ss_family == AF_INET) { 442 + struct sockaddr_in *sin = (struct sockaddr_in *)&addr; 443 + 444 + return sin->sin_port; 445 + } else if (addr.ss_family == AF_INET6) { 446 + struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; 447 + 448 + return sin->sin6_port; 449 + } 450 + 451 + return -1; 452 + }
+1
tools/testing/selftests/bpf/network_helpers.h
··· 56 56 int make_sockaddr(int family, const char *addr_str, __u16 port, 57 57 struct sockaddr_storage *addr, socklen_t *len); 58 58 char *ping_command(int family); 59 + int get_socket_local_port(int sock_fd); 59 60 60 61 struct nstoken; 61 62 /**
+5 -29
tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
··· 11 11 #include "ksym_race.skel.h" 12 12 #include "bpf_mod_race.skel.h" 13 13 #include "kfunc_call_race.skel.h" 14 + #include "testing_helpers.h" 14 15 15 16 /* This test crafts a race between btf_try_get_module and do_init_module, and 16 17 * checks whether btf_try_get_module handles the invocation for a well-formed ··· 45 44 46 45 static _Atomic enum bpf_test_state state = _TS_INVALID; 47 46 48 - static int sys_finit_module(int fd, const char *param_values, int flags) 49 - { 50 - return syscall(__NR_finit_module, fd, param_values, flags); 51 - } 52 - 53 - static int sys_delete_module(const char *name, unsigned int flags) 54 - { 55 - return syscall(__NR_delete_module, name, flags); 56 - } 57 - 58 - static int load_module(const char *mod) 59 - { 60 - int ret, fd; 61 - 62 - fd = open("bpf_testmod.ko", O_RDONLY); 63 - if (fd < 0) 64 - return fd; 65 - 66 - ret = sys_finit_module(fd, "", 0); 67 - close(fd); 68 - if (ret < 0) 69 - return ret; 70 - return 0; 71 - } 72 - 73 47 static void *load_module_thread(void *p) 74 48 { 75 49 76 - if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) 50 + if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail")) 77 51 atomic_store(&state, TS_MODULE_LOAD); 78 52 else 79 53 atomic_store(&state, TS_MODULE_LOAD_FAIL); ··· 100 124 if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) 101 125 return; 102 126 103 - if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) 127 + if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod")) 104 128 goto end_mmap; 105 129 106 130 skel = bpf_mod_race__open(); ··· 178 202 bpf_mod_race__destroy(skel); 179 203 ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); 180 204 end_module: 181 - sys_delete_module("bpf_testmod", 0); 182 - ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); 205 + unload_bpf_testmod(false); 206 + ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod"); 183 207 end_mmap: 184 208 munmap(fault_addr, 4096); 185 209 atomic_store(&state, _TS_INVALID);
+268
tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + #define _GNU_SOURCE 4 + #include <test_progs.h> 5 + #include <bpf/btf.h> 6 + #include <fcntl.h> 7 + #include <unistd.h> 8 + #include <linux/unistd.h> 9 + #include <linux/mount.h> 10 + #include <sys/syscall.h> 11 + 12 + static inline int sys_fsopen(const char *fsname, unsigned flags) 13 + { 14 + return syscall(__NR_fsopen, fsname, flags); 15 + } 16 + 17 + static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) 18 + { 19 + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); 20 + } 21 + 22 + static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) 23 + { 24 + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); 25 + } 26 + 27 + __attribute__((unused)) 28 + static inline int sys_move_mount(int from_dfd, const char *from_path, 29 + int to_dfd, const char *to_path, 30 + unsigned int ms_flags) 31 + { 32 + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags); 33 + } 34 + 35 + static void bpf_obj_pinning_detached(void) 36 + { 37 + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); 38 + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); 39 + int fs_fd = -1, mnt_fd = -1; 40 + int map_fd = -1, map_fd2 = -1; 41 + int zero = 0, src_value, dst_value, err; 42 + const char *map_name = "fsmount_map"; 43 + 44 + /* A bunch of below UAPI calls are constructed based on reading: 45 + * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html 46 + */ 47 + 48 + /* create VFS context */ 49 + fs_fd = sys_fsopen("bpf", 0); 50 + if (!ASSERT_GE(fs_fd, 0, "fs_fd")) 51 + goto cleanup; 52 + 53 + /* instantiate FS object */ 54 + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); 55 + if (!ASSERT_OK(err, "fs_create")) 56 + goto cleanup; 57 + 58 + /* create O_PATH fd for detached mount */ 59 + mnt_fd = sys_fsmount(fs_fd, 0, 0); 60 + if (!ASSERT_GE(mnt_fd, 0, "mnt_fd")) 61 + goto cleanup; 62 + 63 + /* If we wanted to expose detached mount in the file system, we'd do 64 + * something like below. But the whole point is that we actually don't 65 + * even have to expose BPF FS in the file system to be able to work 66 + * (pin/get objects) with it. 67 + * 68 + * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH); 69 + * if (!ASSERT_OK(err, "move_mount")) 70 + * goto cleanup; 71 + */ 72 + 73 + /* create BPF map to pin */ 74 + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); 75 + if (!ASSERT_GE(map_fd, 0, "map_fd")) 76 + goto cleanup; 77 + 78 + /* pin BPF map into detached BPF FS through mnt_fd */ 79 + pin_opts.file_flags = BPF_F_PATH_FD; 80 + pin_opts.path_fd = mnt_fd; 81 + err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts); 82 + if (!ASSERT_OK(err, "map_pin")) 83 + goto cleanup; 84 + 85 + /* get BPF map from detached BPF FS through mnt_fd */ 86 + get_opts.file_flags = BPF_F_PATH_FD; 87 + get_opts.path_fd = mnt_fd; 88 + map_fd2 = bpf_obj_get_opts(map_name, &get_opts); 89 + if (!ASSERT_GE(map_fd2, 0, "map_get")) 90 + goto cleanup; 91 + 92 + /* update map through one FD */ 93 + src_value = 0xcafebeef; 94 + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); 95 + ASSERT_OK(err, "map_update"); 96 + 97 + /* check values written/read through different FDs do match */ 98 + dst_value = 0; 99 + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); 100 + ASSERT_OK(err, "map_lookup"); 101 + ASSERT_EQ(dst_value, src_value, "map_value_eq1"); 102 + ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2"); 103 + 104 + cleanup: 105 + if (map_fd >= 0) 106 + ASSERT_OK(close(map_fd), "close_map_fd"); 107 + if (map_fd2 >= 0) 108 + ASSERT_OK(close(map_fd2), "close_map_fd2"); 109 + if (fs_fd >= 0) 110 + ASSERT_OK(close(fs_fd), "close_fs_fd"); 111 + if (mnt_fd >= 0) 112 + ASSERT_OK(close(mnt_fd), "close_mnt_fd"); 113 + } 114 + 115 + enum path_kind 116 + { 117 + PATH_STR_ABS, 118 + PATH_STR_REL, 119 + PATH_FD_REL, 120 + }; 121 + 122 + static void validate_pin(int map_fd, const char *map_name, int src_value, 123 + enum path_kind path_kind) 124 + { 125 + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); 126 + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; 127 + const char *pin_path = NULL; 128 + int zero = 0, dst_value, map_fd2, err; 129 + 130 + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); 131 + old_cwd[0] = '\0'; 132 + 133 + switch (path_kind) { 134 + case PATH_STR_ABS: 135 + /* absolute path */ 136 + pin_path = abs_path; 137 + break; 138 + case PATH_STR_REL: 139 + /* cwd + relative path */ 140 + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); 141 + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); 142 + pin_path = map_name; 143 + break; 144 + case PATH_FD_REL: 145 + /* dir fd + relative path */ 146 + pin_opts.file_flags = BPF_F_PATH_FD; 147 + pin_opts.path_fd = open("/sys/fs/bpf", O_PATH); 148 + ASSERT_GE(pin_opts.path_fd, 0, "path_fd"); 149 + pin_path = map_name; 150 + break; 151 + } 152 + 153 + /* pin BPF map using specified path definition */ 154 + err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts); 155 + ASSERT_OK(err, "obj_pin"); 156 + 157 + /* cleanup */ 158 + if (pin_opts.path_fd >= 0) 159 + close(pin_opts.path_fd); 160 + if (old_cwd[0]) 161 + ASSERT_OK(chdir(old_cwd), "restore_cwd"); 162 + 163 + map_fd2 = bpf_obj_get(abs_path); 164 + if (!ASSERT_GE(map_fd2, 0, "map_get")) 165 + goto cleanup; 166 + 167 + /* update map through one FD */ 168 + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); 169 + ASSERT_OK(err, "map_update"); 170 + 171 + /* check values written/read through different FDs do match */ 172 + dst_value = 0; 173 + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); 174 + ASSERT_OK(err, "map_lookup"); 175 + ASSERT_EQ(dst_value, src_value, "map_value_eq"); 176 + cleanup: 177 + if (map_fd2 >= 0) 178 + ASSERT_OK(close(map_fd2), "close_map_fd2"); 179 + unlink(abs_path); 180 + } 181 + 182 + static void validate_get(int map_fd, const char *map_name, int src_value, 183 + enum path_kind path_kind) 184 + { 185 + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); 186 + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; 187 + const char *pin_path = NULL; 188 + int zero = 0, dst_value, map_fd2, err; 189 + 190 + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); 191 + /* pin BPF map using specified path definition */ 192 + err = bpf_obj_pin(map_fd, abs_path); 193 + if (!ASSERT_OK(err, "pin_map")) 194 + return; 195 + 196 + old_cwd[0] = '\0'; 197 + 198 + switch (path_kind) { 199 + case PATH_STR_ABS: 200 + /* absolute path */ 201 + pin_path = abs_path; 202 + break; 203 + case PATH_STR_REL: 204 + /* cwd + relative path */ 205 + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); 206 + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); 207 + pin_path = map_name; 208 + break; 209 + case PATH_FD_REL: 210 + /* dir fd + relative path */ 211 + get_opts.file_flags = BPF_F_PATH_FD; 212 + get_opts.path_fd = open("/sys/fs/bpf", O_PATH); 213 + ASSERT_GE(get_opts.path_fd, 0, "path_fd"); 214 + pin_path = map_name; 215 + break; 216 + } 217 + 218 + map_fd2 = bpf_obj_get_opts(pin_path, &get_opts); 219 + if (!ASSERT_GE(map_fd2, 0, "map_get")) 220 + goto cleanup; 221 + 222 + /* cleanup */ 223 + if (get_opts.path_fd >= 0) 224 + close(get_opts.path_fd); 225 + if (old_cwd[0]) 226 + ASSERT_OK(chdir(old_cwd), "restore_cwd"); 227 + 228 + /* update map through one FD */ 229 + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); 230 + ASSERT_OK(err, "map_update"); 231 + 232 + /* check values written/read through different FDs do match */ 233 + dst_value = 0; 234 + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); 235 + ASSERT_OK(err, "map_lookup"); 236 + ASSERT_EQ(dst_value, src_value, "map_value_eq"); 237 + cleanup: 238 + if (map_fd2 >= 0) 239 + ASSERT_OK(close(map_fd2), "close_map_fd2"); 240 + unlink(abs_path); 241 + } 242 + 243 + static void bpf_obj_pinning_mounted(enum path_kind path_kind) 244 + { 245 + const char *map_name = "mounted_map"; 246 + int map_fd; 247 + 248 + /* create BPF map to pin */ 249 + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); 250 + if (!ASSERT_GE(map_fd, 0, "map_fd")) 251 + return; 252 + 253 + validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind); 254 + validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind); 255 + ASSERT_OK(close(map_fd), "close_map_fd"); 256 + } 257 + 258 + void test_bpf_obj_pinning() 259 + { 260 + if (test__start_subtest("detached")) 261 + bpf_obj_pinning_detached(); 262 + if (test__start_subtest("mounted-str-abs")) 263 + bpf_obj_pinning_mounted(PATH_STR_ABS); 264 + if (test__start_subtest("mounted-str-rel")) 265 + bpf_obj_pinning_mounted(PATH_STR_REL); 266 + if (test__start_subtest("mounted-fd-rel")) 267 + bpf_obj_pinning_mounted(PATH_FD_REL); 268 + }
+227
tools/testing/selftests/bpf/prog_tests/global_map_resize.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + #include <errno.h> 4 + #include <sys/syscall.h> 5 + #include <unistd.h> 6 + #include "test_global_map_resize.skel.h" 7 + #include "test_progs.h" 8 + 9 + static void run_prog_bss_array_sum(void) 10 + { 11 + (void)syscall(__NR_getpid); 12 + } 13 + 14 + static void run_prog_data_array_sum(void) 15 + { 16 + (void)syscall(__NR_getuid); 17 + } 18 + 19 + static void global_map_resize_bss_subtest(void) 20 + { 21 + int err; 22 + struct test_global_map_resize *skel; 23 + struct bpf_map *map; 24 + const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2; 25 + size_t array_len, actual_sz; 26 + 27 + skel = test_global_map_resize__open(); 28 + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) 29 + goto teardown; 30 + 31 + /* set some initial value before resizing. 32 + * it is expected this non-zero value will be preserved 33 + * while resizing. 34 + */ 35 + skel->bss->array[0] = 1; 36 + 37 + /* resize map value and verify the new size */ 38 + map = skel->maps.bss; 39 + err = bpf_map__set_value_size(map, desired_sz); 40 + if (!ASSERT_OK(err, "bpf_map__set_value_size")) 41 + goto teardown; 42 + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) 43 + goto teardown; 44 + 45 + /* set the expected number of elements based on the resized array */ 46 + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]); 47 + if (!ASSERT_GT(array_len, 1, "array_len")) 48 + goto teardown; 49 + 50 + skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz); 51 + if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)")) 52 + goto teardown; 53 + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) 54 + goto teardown; 55 + 56 + /* fill the newly resized array with ones, 57 + * skipping the first element which was previously set 58 + */ 59 + for (int i = 1; i < array_len; i++) 60 + skel->bss->array[i] = 1; 61 + 62 + /* set global const values before loading */ 63 + skel->rodata->pid = getpid(); 64 + skel->rodata->bss_array_len = array_len; 65 + skel->rodata->data_array_len = 1; 66 + 67 + err = test_global_map_resize__load(skel); 68 + if (!ASSERT_OK(err, "test_global_map_resize__load")) 69 + goto teardown; 70 + err = test_global_map_resize__attach(skel); 71 + if (!ASSERT_OK(err, "test_global_map_resize__attach")) 72 + goto teardown; 73 + 74 + /* run the bpf program which will sum the contents of the array. 75 + * since the array was filled with ones,verify the sum equals array_len 76 + */ 77 + run_prog_bss_array_sum(); 78 + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) 79 + goto teardown; 80 + 81 + teardown: 82 + test_global_map_resize__destroy(skel); 83 + } 84 + 85 + static void global_map_resize_data_subtest(void) 86 + { 87 + int err; 88 + struct test_global_map_resize *skel; 89 + struct bpf_map *map; 90 + const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2; 91 + size_t array_len, actual_sz; 92 + 93 + skel = test_global_map_resize__open(); 94 + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) 95 + goto teardown; 96 + 97 + /* set some initial value before resizing. 98 + * it is expected this non-zero value will be preserved 99 + * while resizing. 100 + */ 101 + skel->data_custom->my_array[0] = 1; 102 + 103 + /* resize map value and verify the new size */ 104 + map = skel->maps.data_custom; 105 + err = bpf_map__set_value_size(map, desired_sz); 106 + if (!ASSERT_OK(err, "bpf_map__set_value_size")) 107 + goto teardown; 108 + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) 109 + goto teardown; 110 + 111 + /* set the expected number of elements based on the resized array */ 112 + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]); 113 + if (!ASSERT_GT(array_len, 1, "array_len")) 114 + goto teardown; 115 + 116 + skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz); 117 + if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)")) 118 + goto teardown; 119 + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) 120 + goto teardown; 121 + 122 + /* fill the newly resized array with ones, 123 + * skipping the first element which was previously set 124 + */ 125 + for (int i = 1; i < array_len; i++) 126 + skel->data_custom->my_array[i] = 1; 127 + 128 + /* set global const values before loading */ 129 + skel->rodata->pid = getpid(); 130 + skel->rodata->bss_array_len = 1; 131 + skel->rodata->data_array_len = array_len; 132 + 133 + err = test_global_map_resize__load(skel); 134 + if (!ASSERT_OK(err, "test_global_map_resize__load")) 135 + goto teardown; 136 + err = test_global_map_resize__attach(skel); 137 + if (!ASSERT_OK(err, "test_global_map_resize__attach")) 138 + goto teardown; 139 + 140 + /* run the bpf program which will sum the contents of the array. 141 + * since the array was filled with ones,verify the sum equals array_len 142 + */ 143 + run_prog_data_array_sum(); 144 + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) 145 + goto teardown; 146 + 147 + teardown: 148 + test_global_map_resize__destroy(skel); 149 + } 150 + 151 + static void global_map_resize_invalid_subtest(void) 152 + { 153 + int err; 154 + struct test_global_map_resize *skel; 155 + struct bpf_map *map; 156 + __u32 element_sz, desired_sz; 157 + 158 + skel = test_global_map_resize__open(); 159 + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) 160 + return; 161 + 162 + /* attempt to resize a global datasec map to size 163 + * which does NOT align with array 164 + */ 165 + map = skel->maps.data_custom; 166 + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf")) 167 + goto teardown; 168 + /* set desired size a fraction of element size beyond an aligned size */ 169 + element_sz = sizeof(skel->data_custom->my_array[0]); 170 + desired_sz = element_sz + element_sz / 2; 171 + /* confirm desired size does NOT align with array */ 172 + if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment")) 173 + goto teardown; 174 + err = bpf_map__set_value_size(map, desired_sz); 175 + /* confirm resize is OK but BTF info is cleared */ 176 + if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") || 177 + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") || 178 + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val")) 179 + goto teardown; 180 + 181 + /* attempt to resize a global datasec map whose only var is NOT an array */ 182 + map = skel->maps.data_non_array; 183 + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf")) 184 + goto teardown; 185 + /* set desired size to arbitrary value */ 186 + desired_sz = 1024; 187 + err = bpf_map__set_value_size(map, desired_sz); 188 + /* confirm resize is OK but BTF info is cleared */ 189 + if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") || 190 + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") || 191 + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val")) 192 + goto teardown; 193 + 194 + /* attempt to resize a global datasec map 195 + * whose last var is NOT an array 196 + */ 197 + map = skel->maps.data_array_not_last; 198 + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf")) 199 + goto teardown; 200 + /* set desired size to a multiple of element size */ 201 + element_sz = sizeof(skel->data_array_not_last->my_array_first[0]); 202 + desired_sz = element_sz * 8; 203 + /* confirm desired size aligns with array */ 204 + if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment")) 205 + goto teardown; 206 + err = bpf_map__set_value_size(map, desired_sz); 207 + /* confirm resize is OK but BTF info is cleared */ 208 + if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") || 209 + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") || 210 + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val")) 211 + goto teardown; 212 + 213 + teardown: 214 + test_global_map_resize__destroy(skel); 215 + } 216 + 217 + void test_global_map_resize(void) 218 + { 219 + if (test__start_subtest("global_map_resize_bss")) 220 + global_map_resize_bss_subtest(); 221 + 222 + if (test__start_subtest("global_map_resize_data")) 223 + global_map_resize_data_subtest(); 224 + 225 + if (test__start_subtest("global_map_resize_invalid")) 226 + global_map_resize_invalid_subtest(); 227 + }
+4 -8
tools/testing/selftests/bpf/prog_tests/module_attach.c
··· 4 4 #include <test_progs.h> 5 5 #include <stdbool.h> 6 6 #include "test_module_attach.skel.h" 7 + #include "testing_helpers.h" 7 8 8 9 static int duration; 9 10 ··· 31 30 close(fd); 32 31 33 32 return 0; 34 - } 35 - 36 - static int delete_module(const char *name, int flags) 37 - { 38 - return syscall(__NR_delete_module, name, flags); 39 33 } 40 34 41 35 void test_module_attach(void) ··· 89 93 if (!ASSERT_OK_PTR(link, "attach_fentry")) 90 94 goto cleanup; 91 95 92 - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); 96 + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); 93 97 bpf_link__destroy(link); 94 98 95 99 link = bpf_program__attach(skel->progs.handle_fexit); 96 100 if (!ASSERT_OK_PTR(link, "attach_fexit")) 97 101 goto cleanup; 98 102 99 - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); 103 + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); 100 104 bpf_link__destroy(link); 101 105 102 106 link = bpf_program__attach(skel->progs.kprobe_multi); 103 107 if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) 104 108 goto cleanup; 105 109 106 - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); 110 + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); 107 111 bpf_link__destroy(link); 108 112 109 113 cleanup:
+2 -2
tools/testing/selftests/bpf/prog_tests/netcnt.c
··· 67 67 } 68 68 69 69 /* No packets should be lost */ 70 - ASSERT_EQ(packets, 10000, "packets"); 70 + ASSERT_GE(packets, 10000, "packets"); 71 71 72 72 /* Let's check that bytes counter matches the number of packets 73 73 * multiplied by the size of ipv6 ICMP packet. 74 74 */ 75 - ASSERT_EQ(bytes, packets * 104, "bytes"); 75 + ASSERT_GE(bytes, packets * 104, "bytes"); 76 76 77 77 err: 78 78 if (cg_fd != -1)
+221
tools/testing/selftests/bpf/prog_tests/sock_destroy.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <bpf/bpf_endian.h> 4 + 5 + #include "sock_destroy_prog.skel.h" 6 + #include "sock_destroy_prog_fail.skel.h" 7 + #include "network_helpers.h" 8 + 9 + #define TEST_NS "sock_destroy_netns" 10 + 11 + static void start_iter_sockets(struct bpf_program *prog) 12 + { 13 + struct bpf_link *link; 14 + char buf[50] = {}; 15 + int iter_fd, len; 16 + 17 + link = bpf_program__attach_iter(prog, NULL); 18 + if (!ASSERT_OK_PTR(link, "attach_iter")) 19 + return; 20 + 21 + iter_fd = bpf_iter_create(bpf_link__fd(link)); 22 + if (!ASSERT_GE(iter_fd, 0, "create_iter")) 23 + goto free_link; 24 + 25 + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) 26 + ; 27 + ASSERT_GE(len, 0, "read"); 28 + 29 + close(iter_fd); 30 + 31 + free_link: 32 + bpf_link__destroy(link); 33 + } 34 + 35 + static void test_tcp_client(struct sock_destroy_prog *skel) 36 + { 37 + int serv = -1, clien = -1, accept_serv = -1, n; 38 + 39 + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); 40 + if (!ASSERT_GE(serv, 0, "start_server")) 41 + goto cleanup; 42 + 43 + clien = connect_to_fd(serv, 0); 44 + if (!ASSERT_GE(clien, 0, "connect_to_fd")) 45 + goto cleanup; 46 + 47 + accept_serv = accept(serv, NULL, NULL); 48 + if (!ASSERT_GE(accept_serv, 0, "serv accept")) 49 + goto cleanup; 50 + 51 + n = send(clien, "t", 1, 0); 52 + if (!ASSERT_EQ(n, 1, "client send")) 53 + goto cleanup; 54 + 55 + /* Run iterator program that destroys connected client sockets. */ 56 + start_iter_sockets(skel->progs.iter_tcp6_client); 57 + 58 + n = send(clien, "t", 1, 0); 59 + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) 60 + goto cleanup; 61 + ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"); 62 + 63 + cleanup: 64 + if (clien != -1) 65 + close(clien); 66 + if (accept_serv != -1) 67 + close(accept_serv); 68 + if (serv != -1) 69 + close(serv); 70 + } 71 + 72 + static void test_tcp_server(struct sock_destroy_prog *skel) 73 + { 74 + int serv = -1, clien = -1, accept_serv = -1, n, serv_port; 75 + 76 + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); 77 + if (!ASSERT_GE(serv, 0, "start_server")) 78 + goto cleanup; 79 + serv_port = get_socket_local_port(serv); 80 + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) 81 + goto cleanup; 82 + skel->bss->serv_port = (__be16) serv_port; 83 + 84 + clien = connect_to_fd(serv, 0); 85 + if (!ASSERT_GE(clien, 0, "connect_to_fd")) 86 + goto cleanup; 87 + 88 + accept_serv = accept(serv, NULL, NULL); 89 + if (!ASSERT_GE(accept_serv, 0, "serv accept")) 90 + goto cleanup; 91 + 92 + n = send(clien, "t", 1, 0); 93 + if (!ASSERT_EQ(n, 1, "client send")) 94 + goto cleanup; 95 + 96 + /* Run iterator program that destroys server sockets. */ 97 + start_iter_sockets(skel->progs.iter_tcp6_server); 98 + 99 + n = send(clien, "t", 1, 0); 100 + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) 101 + goto cleanup; 102 + ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket"); 103 + 104 + cleanup: 105 + if (clien != -1) 106 + close(clien); 107 + if (accept_serv != -1) 108 + close(accept_serv); 109 + if (serv != -1) 110 + close(serv); 111 + } 112 + 113 + static void test_udp_client(struct sock_destroy_prog *skel) 114 + { 115 + int serv = -1, clien = -1, n = 0; 116 + 117 + serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0); 118 + if (!ASSERT_GE(serv, 0, "start_server")) 119 + goto cleanup; 120 + 121 + clien = connect_to_fd(serv, 0); 122 + if (!ASSERT_GE(clien, 0, "connect_to_fd")) 123 + goto cleanup; 124 + 125 + n = send(clien, "t", 1, 0); 126 + if (!ASSERT_EQ(n, 1, "client send")) 127 + goto cleanup; 128 + 129 + /* Run iterator program that destroys sockets. */ 130 + start_iter_sockets(skel->progs.iter_udp6_client); 131 + 132 + n = send(clien, "t", 1, 0); 133 + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) 134 + goto cleanup; 135 + /* UDP sockets have an overriding error code after they are disconnected, 136 + * so we don't check for ECONNABORTED error code. 137 + */ 138 + 139 + cleanup: 140 + if (clien != -1) 141 + close(clien); 142 + if (serv != -1) 143 + close(serv); 144 + } 145 + 146 + static void test_udp_server(struct sock_destroy_prog *skel) 147 + { 148 + int *listen_fds = NULL, n, i, serv_port; 149 + unsigned int num_listens = 5; 150 + char buf[1]; 151 + 152 + /* Start reuseport servers. */ 153 + listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM, 154 + "::1", 0, 0, num_listens); 155 + if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server")) 156 + goto cleanup; 157 + serv_port = get_socket_local_port(listen_fds[0]); 158 + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) 159 + goto cleanup; 160 + skel->bss->serv_port = (__be16) serv_port; 161 + 162 + /* Run iterator program that destroys server sockets. */ 163 + start_iter_sockets(skel->progs.iter_udp6_server); 164 + 165 + for (i = 0; i < num_listens; ++i) { 166 + n = read(listen_fds[i], buf, sizeof(buf)); 167 + if (!ASSERT_EQ(n, -1, "read") || 168 + !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket")) 169 + break; 170 + } 171 + ASSERT_EQ(i, num_listens, "server socket"); 172 + 173 + cleanup: 174 + free_fds(listen_fds, num_listens); 175 + } 176 + 177 + void test_sock_destroy(void) 178 + { 179 + struct sock_destroy_prog *skel; 180 + struct nstoken *nstoken = NULL; 181 + int cgroup_fd; 182 + 183 + skel = sock_destroy_prog__open_and_load(); 184 + if (!ASSERT_OK_PTR(skel, "skel_open")) 185 + return; 186 + 187 + cgroup_fd = test__join_cgroup("/sock_destroy"); 188 + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) 189 + goto cleanup; 190 + 191 + skel->links.sock_connect = bpf_program__attach_cgroup( 192 + skel->progs.sock_connect, cgroup_fd); 193 + if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach")) 194 + goto cleanup; 195 + 196 + SYS(cleanup, "ip netns add %s", TEST_NS); 197 + SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS); 198 + 199 + nstoken = open_netns(TEST_NS); 200 + if (!ASSERT_OK_PTR(nstoken, "open_netns")) 201 + goto cleanup; 202 + 203 + if (test__start_subtest("tcp_client")) 204 + test_tcp_client(skel); 205 + if (test__start_subtest("tcp_server")) 206 + test_tcp_server(skel); 207 + if (test__start_subtest("udp_client")) 208 + test_udp_client(skel); 209 + if (test__start_subtest("udp_server")) 210 + test_udp_server(skel); 211 + 212 + RUN_TESTS(sock_destroy_prog_fail); 213 + 214 + cleanup: 215 + if (nstoken) 216 + close_netns(nstoken); 217 + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); 218 + if (cgroup_fd >= 0) 219 + close(cgroup_fd); 220 + sock_destroy_prog__destroy(skel); 221 + }
+3 -1
tools/testing/selftests/bpf/prog_tests/sockopt.c
··· 1060 1060 return; 1061 1061 1062 1062 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1063 - test__start_subtest(tests[i].descr); 1063 + if (!test__start_subtest(tests[i].descr)) 1064 + continue; 1065 + 1064 1066 ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); 1065 1067 } 1066 1068
+121
tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
··· 18 18 #include <linux/if_bonding.h> 19 19 #include <linux/limits.h> 20 20 #include <linux/udp.h> 21 + #include <uapi/linux/netdev.h> 21 22 22 23 #include "xdp_dummy.skel.h" 23 24 #include "xdp_redirect_multi_kern.skel.h" ··· 493 492 system("ip link del bond_nest2"); 494 493 } 495 494 495 + static void test_xdp_bonding_features(struct skeletons *skeletons) 496 + { 497 + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); 498 + int bond_idx, veth1_idx, err; 499 + struct bpf_link *link = NULL; 500 + 501 + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) 502 + goto out; 503 + 504 + bond_idx = if_nametoindex("bond"); 505 + if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond")) 506 + goto out; 507 + 508 + /* query default xdp-feature for bond device */ 509 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 510 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 511 + goto out; 512 + 513 + if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, 514 + "bond query_opts.feature_flags")) 515 + goto out; 516 + 517 + if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"), 518 + "add veth{0,1} pair")) 519 + goto out; 520 + 521 + if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"), 522 + "add veth{2,3} pair")) 523 + goto out; 524 + 525 + if (!ASSERT_OK(system("ip link set veth0 master bond"), 526 + "add veth0 to master bond")) 527 + goto out; 528 + 529 + /* xdp-feature for bond device should be obtained from the single slave 530 + * device (veth0) 531 + */ 532 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 533 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 534 + goto out; 535 + 536 + if (!ASSERT_EQ(query_opts.feature_flags, 537 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 538 + NETDEV_XDP_ACT_RX_SG, 539 + "bond query_opts.feature_flags")) 540 + goto out; 541 + 542 + veth1_idx = if_nametoindex("veth1"); 543 + if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1")) 544 + goto out; 545 + 546 + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, 547 + veth1_idx); 548 + if (!ASSERT_OK_PTR(link, "attach program to veth1")) 549 + goto out; 550 + 551 + /* xdp-feature for veth0 are changed */ 552 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 553 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 554 + goto out; 555 + 556 + if (!ASSERT_EQ(query_opts.feature_flags, 557 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 558 + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | 559 + NETDEV_XDP_ACT_NDO_XMIT_SG, 560 + "bond query_opts.feature_flags")) 561 + goto out; 562 + 563 + if (!ASSERT_OK(system("ip link set veth2 master bond"), 564 + "add veth2 to master bond")) 565 + goto out; 566 + 567 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 568 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 569 + goto out; 570 + 571 + /* xdp-feature for bond device should be set to the most restrict 572 + * value obtained from attached slave devices (veth0 and veth2) 573 + */ 574 + if (!ASSERT_EQ(query_opts.feature_flags, 575 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 576 + NETDEV_XDP_ACT_RX_SG, 577 + "bond query_opts.feature_flags")) 578 + goto out; 579 + 580 + if (!ASSERT_OK(system("ip link set veth2 nomaster"), 581 + "del veth2 to master bond")) 582 + goto out; 583 + 584 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 585 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 586 + goto out; 587 + 588 + if (!ASSERT_EQ(query_opts.feature_flags, 589 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 590 + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | 591 + NETDEV_XDP_ACT_NDO_XMIT_SG, 592 + "bond query_opts.feature_flags")) 593 + goto out; 594 + 595 + if (!ASSERT_OK(system("ip link set veth0 nomaster"), 596 + "del veth0 to master bond")) 597 + goto out; 598 + 599 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 600 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 601 + goto out; 602 + 603 + ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, 604 + "bond query_opts.feature_flags"); 605 + out: 606 + bpf_link__destroy(link); 607 + system("ip link del veth0"); 608 + system("ip link del veth2"); 609 + system("ip link del bond"); 610 + } 611 + 496 612 static int libbpf_debug_print(enum libbpf_print_level level, 497 613 const char *format, va_list args) 498 614 { ··· 663 545 664 546 if (test__start_subtest("xdp_bonding_nested")) 665 547 test_xdp_bonding_nested(&skeletons); 548 + 549 + if (test__start_subtest("xdp_bonding_features")) 550 + test_xdp_bonding_features(&skeletons); 666 551 667 552 for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { 668 553 struct bond_test_case *test_case = &bond_test_cases[i];
+1 -3
tools/testing/selftests/bpf/progs/cb_refs.c
··· 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_tracing.h> 4 4 #include <bpf/bpf_helpers.h> 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 5 6 6 7 struct map_value { 7 8 struct prog_test_ref_kfunc __kptr *ptr; ··· 14 13 __type(value, struct map_value); 15 14 __uint(max_entries, 16); 16 15 } array_map SEC(".maps"); 17 - 18 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 19 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 20 16 21 17 static __noinline int cb1(void *map, void *key, void *value, void *ctx) 22 18 {
+1
tools/testing/selftests/bpf/progs/dynptr_fail.c
··· 3 3 4 4 #include <errno.h> 5 5 #include <string.h> 6 + #include <stdbool.h> 6 7 #include <linux/bpf.h> 7 8 #include <bpf/bpf_helpers.h> 8 9 #include <linux/if_ether.h>
+1
tools/testing/selftests/bpf/progs/dynptr_success.c
··· 2 2 /* Copyright (c) 2022 Facebook */ 3 3 4 4 #include <string.h> 5 + #include <stdbool.h> 5 6 #include <linux/bpf.h> 6 7 #include <bpf/bpf_helpers.h> 7 8 #include "bpf_misc.h"
+1 -3
tools/testing/selftests/bpf/progs/jit_probe_mem.c
··· 3 3 #include <vmlinux.h> 4 4 #include <bpf/bpf_tracing.h> 5 5 #include <bpf/bpf_helpers.h> 6 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 7 7 8 static struct prog_test_ref_kfunc __kptr *v; 8 9 long total_sum = -1; 9 - 10 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 11 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 12 10 13 11 SEC("tc") 14 12 int test_jit_probe_mem(struct __sk_buff *ctx)
+1 -2
tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_helpers.h> 4 - 5 - extern void bpf_kfunc_call_test_destructive(void) __ksym; 4 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 5 7 6 SEC("tc") 8 7 int kfunc_destructive_test(void)
+1 -8
tools/testing/selftests/bpf/progs/kfunc_call_fail.c
··· 2 2 /* Copyright (c) 2021 Facebook */ 3 3 #include <vmlinux.h> 4 4 #include <bpf/bpf_helpers.h> 5 - 6 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 7 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 8 - extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; 9 - extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; 10 - extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 11 - extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 12 - extern void bpf_kfunc_call_int_mem_release(int *p) __ksym; 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 13 6 14 7 struct syscall_test_args { 15 8 __u8 data[16];
+1 -2
tools/testing/selftests/bpf/progs/kfunc_call_race.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_helpers.h> 4 - 5 - extern void bpf_testmod_test_mod_kfunc(int i) __ksym; 4 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 5 7 6 SEC("tc") 8 7 int kfunc_call_fail(struct __sk_buff *ctx)
+1 -16
tools/testing/selftests/bpf/progs/kfunc_call_test.c
··· 2 2 /* Copyright (c) 2021 Facebook */ 3 3 #include <vmlinux.h> 4 4 #include <bpf/bpf_helpers.h> 5 - 6 - extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; 7 - extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; 8 - extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 9 - __u32 c, __u64 d) __ksym; 10 - 11 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 12 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 13 - extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; 14 - extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; 15 - extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; 16 - extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; 17 - extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; 18 - extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; 19 - extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 20 - extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 21 6 22 7 SEC("tc") 23 8 int kfunc_call_test4(struct __sk_buff *skb)
+2 -7
tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2021 Facebook */ 3 - #include <linux/bpf.h> 4 - #include <bpf/bpf_helpers.h> 5 - #include "bpf_tcp_helpers.h" 3 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 4 7 5 extern const int bpf_prog_active __ksym; 8 - extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 9 - __u32 c, __u64 d) __ksym; 10 - extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; 11 6 int active_res = -1; 12 7 int sk_state_res = -1; 13 8 ··· 23 28 if (active) 24 29 active_res = *active; 25 30 26 - sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state; 31 + sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; 27 32 28 33 return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); 29 34 }
+2 -3
tools/testing/selftests/bpf/progs/local_kptr_stash.c
··· 5 5 #include <bpf/bpf_tracing.h> 6 6 #include <bpf/bpf_helpers.h> 7 7 #include <bpf/bpf_core_read.h> 8 - #include "bpf_experimental.h" 8 + #include "../bpf_experimental.h" 9 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 9 10 10 11 struct node_data { 11 12 long key; ··· 32 31 * Had to do the same w/ bpf_kfunc_call_test_release below 33 32 */ 34 33 struct node_data *just_here_because_btf_bug; 35 - 36 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 37 34 38 35 struct { 39 36 __uint(type, BPF_MAP_TYPE_ARRAY);
+1 -4
tools/testing/selftests/bpf/progs/map_kptr.c
··· 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_tracing.h> 4 4 #include <bpf/bpf_helpers.h> 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 5 6 6 7 struct map_value { 7 8 struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; ··· 114 113 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); 115 114 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); 116 115 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); 117 - 118 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 119 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 120 - void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; 121 116 122 117 #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) 123 118
+1 -3
tools/testing/selftests/bpf/progs/map_kptr_fail.c
··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include <bpf/bpf_core_read.h> 6 6 #include "bpf_misc.h" 7 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 7 8 8 9 struct map_value { 9 10 char buf[8]; ··· 19 18 __type(value, struct map_value); 20 19 __uint(max_entries, 1); 21 20 } array_map SEC(".maps"); 22 - 23 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 24 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 25 21 26 22 SEC("?tc") 27 23 __failure __msg("kptr access size must be BPF_DW")
+145
tools/testing/selftests/bpf/progs/sock_destroy_prog.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_endian.h> 6 + 7 + #include "bpf_tracing_net.h" 8 + 9 + __be16 serv_port = 0; 10 + 11 + int bpf_sock_destroy(struct sock_common *sk) __ksym; 12 + 13 + struct { 14 + __uint(type, BPF_MAP_TYPE_ARRAY); 15 + __uint(max_entries, 1); 16 + __type(key, __u32); 17 + __type(value, __u64); 18 + } tcp_conn_sockets SEC(".maps"); 19 + 20 + struct { 21 + __uint(type, BPF_MAP_TYPE_ARRAY); 22 + __uint(max_entries, 1); 23 + __type(key, __u32); 24 + __type(value, __u64); 25 + } udp_conn_sockets SEC(".maps"); 26 + 27 + SEC("cgroup/connect6") 28 + int sock_connect(struct bpf_sock_addr *ctx) 29 + { 30 + __u64 sock_cookie = 0; 31 + int key = 0; 32 + __u32 keyc = 0; 33 + 34 + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) 35 + return 1; 36 + 37 + sock_cookie = bpf_get_socket_cookie(ctx); 38 + if (ctx->protocol == IPPROTO_TCP) 39 + bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0); 40 + else if (ctx->protocol == IPPROTO_UDP) 41 + bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0); 42 + else 43 + return 1; 44 + 45 + return 1; 46 + } 47 + 48 + SEC("iter/tcp") 49 + int iter_tcp6_client(struct bpf_iter__tcp *ctx) 50 + { 51 + struct sock_common *sk_common = ctx->sk_common; 52 + __u64 sock_cookie = 0; 53 + __u64 *val; 54 + int key = 0; 55 + 56 + if (!sk_common) 57 + return 0; 58 + 59 + if (sk_common->skc_family != AF_INET6) 60 + return 0; 61 + 62 + sock_cookie = bpf_get_socket_cookie(sk_common); 63 + val = bpf_map_lookup_elem(&tcp_conn_sockets, &key); 64 + if (!val) 65 + return 0; 66 + /* Destroy connected client sockets. */ 67 + if (sock_cookie == *val) 68 + bpf_sock_destroy(sk_common); 69 + 70 + return 0; 71 + } 72 + 73 + SEC("iter/tcp") 74 + int iter_tcp6_server(struct bpf_iter__tcp *ctx) 75 + { 76 + struct sock_common *sk_common = ctx->sk_common; 77 + const struct inet_connection_sock *icsk; 78 + const struct inet_sock *inet; 79 + struct tcp6_sock *tcp_sk; 80 + __be16 srcp; 81 + 82 + if (!sk_common) 83 + return 0; 84 + 85 + if (sk_common->skc_family != AF_INET6) 86 + return 0; 87 + 88 + tcp_sk = bpf_skc_to_tcp6_sock(sk_common); 89 + if (!tcp_sk) 90 + return 0; 91 + 92 + icsk = &tcp_sk->tcp.inet_conn; 93 + inet = &icsk->icsk_inet; 94 + srcp = inet->inet_sport; 95 + 96 + /* Destroy server sockets. */ 97 + if (srcp == serv_port) 98 + bpf_sock_destroy(sk_common); 99 + 100 + return 0; 101 + } 102 + 103 + 104 + SEC("iter/udp") 105 + int iter_udp6_client(struct bpf_iter__udp *ctx) 106 + { 107 + struct udp_sock *udp_sk = ctx->udp_sk; 108 + struct sock *sk = (struct sock *) udp_sk; 109 + __u64 sock_cookie = 0, *val; 110 + int key = 0; 111 + 112 + if (!sk) 113 + return 0; 114 + 115 + sock_cookie = bpf_get_socket_cookie(sk); 116 + val = bpf_map_lookup_elem(&udp_conn_sockets, &key); 117 + if (!val) 118 + return 0; 119 + /* Destroy connected client sockets. */ 120 + if (sock_cookie == *val) 121 + bpf_sock_destroy((struct sock_common *)sk); 122 + 123 + return 0; 124 + } 125 + 126 + SEC("iter/udp") 127 + int iter_udp6_server(struct bpf_iter__udp *ctx) 128 + { 129 + struct udp_sock *udp_sk = ctx->udp_sk; 130 + struct sock *sk = (struct sock *) udp_sk; 131 + struct inet_sock *inet; 132 + __be16 srcp; 133 + 134 + if (!sk) 135 + return 0; 136 + 137 + inet = &udp_sk->inet; 138 + srcp = inet->inet_sport; 139 + if (srcp == serv_port) 140 + bpf_sock_destroy((struct sock_common *)sk); 141 + 142 + return 0; 143 + } 144 + 145 + char _license[] SEC("license") = "GPL";
+22
tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_tracing.h> 5 + #include <bpf/bpf_helpers.h> 6 + 7 + #include "bpf_misc.h" 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + int bpf_sock_destroy(struct sock_common *sk) __ksym; 12 + 13 + SEC("tp_btf/tcp_destroy_sock") 14 + __failure __msg("calling kernel function bpf_sock_destroy is not allowed") 15 + int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk) 16 + { 17 + /* should not load */ 18 + bpf_sock_destroy((struct sock_common *)sk); 19 + 20 + return 0; 21 + } 22 +
+58
tools/testing/selftests/bpf/progs/test_global_map_resize.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include "vmlinux.h" 5 + #include <bpf/bpf_helpers.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + /* rodata section */ 10 + const volatile pid_t pid; 11 + const volatile size_t bss_array_len; 12 + const volatile size_t data_array_len; 13 + 14 + /* bss section */ 15 + int sum = 0; 16 + int array[1]; 17 + 18 + /* custom data secton */ 19 + int my_array[1] SEC(".data.custom"); 20 + 21 + /* custom data section which should NOT be resizable, 22 + * since it contains a single var which is not an array 23 + */ 24 + int my_int SEC(".data.non_array"); 25 + 26 + /* custom data section which should NOT be resizable, 27 + * since its last var is not an array 28 + */ 29 + int my_array_first[1] SEC(".data.array_not_last"); 30 + int my_int_last SEC(".data.array_not_last"); 31 + 32 + SEC("tp/syscalls/sys_enter_getpid") 33 + int bss_array_sum(void *ctx) 34 + { 35 + if (pid != (bpf_get_current_pid_tgid() >> 32)) 36 + return 0; 37 + 38 + sum = 0; 39 + 40 + for (size_t i = 0; i < bss_array_len; ++i) 41 + sum += array[i]; 42 + 43 + return 0; 44 + } 45 + 46 + SEC("tp/syscalls/sys_enter_getuid") 47 + int data_array_sum(void *ctx) 48 + { 49 + if (pid != (bpf_get_current_pid_tgid() >> 32)) 50 + return 0; 51 + 52 + sum = 0; 53 + 54 + for (size_t i = 0; i < data_array_len; ++i) 55 + sum += my_array[i]; 56 + 57 + return 0; 58 + }
+4 -1
tools/testing/selftests/bpf/progs/test_sock_fields.c
··· 265 265 266 266 static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) 267 267 { 268 - __u16 *half = (__u16 *)&sk->dst_port; 268 + __u16 *half; 269 + 270 + asm volatile (""); 271 + half = (__u16 *)&sk->dst_port; 269 272 return half[0] == bpf_htons(0xcafe); 270 273 } 271 274
+1
tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
··· 2 2 /* Copyright (c) 2022 Meta */ 3 3 #include <stddef.h> 4 4 #include <string.h> 5 + #include <stdbool.h> 5 6 #include <linux/bpf.h> 6 7 #include <linux/if_ether.h> 7 8 #include <linux/if_packet.h>
+9 -67
tools/testing/selftests/bpf/test_progs.c
··· 11 11 #include <signal.h> 12 12 #include <string.h> 13 13 #include <execinfo.h> /* backtrace */ 14 - #include <linux/membarrier.h> 15 14 #include <sys/sysinfo.h> /* get_nprocs */ 16 15 #include <netinet/in.h> 17 16 #include <sys/select.h> ··· 626 627 free(val_buf1); 627 628 free(val_buf2); 628 629 return err; 629 - } 630 - 631 - static int finit_module(int fd, const char *param_values, int flags) 632 - { 633 - return syscall(__NR_finit_module, fd, param_values, flags); 634 - } 635 - 636 - static int delete_module(const char *name, int flags) 637 - { 638 - return syscall(__NR_delete_module, name, flags); 639 - } 640 - 641 - /* 642 - * Trigger synchronize_rcu() in kernel. 643 - */ 644 - int kern_sync_rcu(void) 645 - { 646 - return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); 647 - } 648 - 649 - static void unload_bpf_testmod(void) 650 - { 651 - if (kern_sync_rcu()) 652 - fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); 653 - if (delete_module("bpf_testmod", 0)) { 654 - if (errno == ENOENT) { 655 - if (verbose()) 656 - fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); 657 - return; 658 - } 659 - fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); 660 - return; 661 - } 662 - if (verbose()) 663 - fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); 664 - } 665 - 666 - static int load_bpf_testmod(void) 667 - { 668 - int fd; 669 - 670 - /* ensure previous instance of the module is unloaded */ 671 - unload_bpf_testmod(); 672 - 673 - if (verbose()) 674 - fprintf(stdout, "Loading bpf_testmod.ko...\n"); 675 - 676 - fd = open("bpf_testmod.ko", O_RDONLY); 677 - if (fd < 0) { 678 - fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); 679 - return -ENOENT; 680 - } 681 - if (finit_module(fd, "", 0)) { 682 - fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); 683 - close(fd); 684 - return -EINVAL; 685 - } 686 - close(fd); 687 - 688 - if (verbose()) 689 - fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); 690 - return 0; 691 630 } 692 631 693 632 /* extern declarations for test funcs */ ··· 1657 1720 env.stderr = stderr; 1658 1721 1659 1722 env.has_testmod = true; 1660 - if (!env.list_test_names && load_bpf_testmod()) { 1661 - fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); 1662 - env.has_testmod = false; 1723 + if (!env.list_test_names) { 1724 + /* ensure previous instance of the module is unloaded */ 1725 + unload_bpf_testmod(verbose()); 1726 + 1727 + if (load_bpf_testmod(verbose())) { 1728 + fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); 1729 + env.has_testmod = false; 1730 + } 1663 1731 } 1664 1732 1665 1733 /* initializing tests */ ··· 1761 1819 close(env.saved_netns_fd); 1762 1820 out: 1763 1821 if (!env.list_test_names && env.has_testmod) 1764 - unload_bpf_testmod(); 1822 + unload_bpf_testmod(verbose()); 1765 1823 1766 1824 free_test_selector(&env.test_selector); 1767 1825 free_test_selector(&env.subtest_selector);
-1
tools/testing/selftests/bpf/test_progs.h
··· 405 405 int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); 406 406 int compare_map_keys(int map1_fd, int map2_fd); 407 407 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); 408 - int kern_sync_rcu(void); 409 408 int trigger_module_test_read(int read_sz); 410 409 int trigger_module_test_write(int write_sz); 411 410 int write_sysctl(const char *sysctl, const char *value);
+148 -22
tools/testing/selftests/bpf/test_verifier.c
··· 40 40 #include "bpf_util.h" 41 41 #include "test_btf.h" 42 42 #include "../../../include/linux/filter.h" 43 + #include "testing_helpers.h" 43 44 44 45 #ifndef ENOTSUPP 45 46 #define ENOTSUPP 524 ··· 874 873 return fd; 875 874 } 876 875 876 + static void set_root(bool set) 877 + { 878 + __u64 caps; 879 + 880 + if (set) { 881 + if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps)) 882 + perror("cap_disable_effective(CAP_SYS_ADMIN)"); 883 + } else { 884 + if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) 885 + perror("cap_disable_effective(CAP_SYS_ADMIN)"); 886 + } 887 + } 888 + 889 + static __u64 ptr_to_u64(const void *ptr) 890 + { 891 + return (uintptr_t) ptr; 892 + } 893 + 894 + static struct btf *btf__load_testmod_btf(struct btf *vmlinux) 895 + { 896 + struct bpf_btf_info info; 897 + __u32 len = sizeof(info); 898 + struct btf *btf = NULL; 899 + char name[64]; 900 + __u32 id = 0; 901 + int err, fd; 902 + 903 + /* Iterate all loaded BTF objects and find bpf_testmod, 904 + * we need SYS_ADMIN cap for that. 905 + */ 906 + set_root(true); 907 + 908 + while (true) { 909 + err = bpf_btf_get_next_id(id, &id); 910 + if (err) { 911 + if (errno == ENOENT) 912 + break; 913 + perror("bpf_btf_get_next_id failed"); 914 + break; 915 + } 916 + 917 + fd = bpf_btf_get_fd_by_id(id); 918 + if (fd < 0) { 919 + if (errno == ENOENT) 920 + continue; 921 + perror("bpf_btf_get_fd_by_id failed"); 922 + break; 923 + } 924 + 925 + memset(&info, 0, sizeof(info)); 926 + info.name_len = sizeof(name); 927 + info.name = ptr_to_u64(name); 928 + len = sizeof(info); 929 + 930 + err = bpf_obj_get_info_by_fd(fd, &info, &len); 931 + if (err) { 932 + close(fd); 933 + perror("bpf_obj_get_info_by_fd failed"); 934 + break; 935 + } 936 + 937 + if (strcmp("bpf_testmod", name)) { 938 + close(fd); 939 + continue; 940 + } 941 + 942 + btf = btf__load_from_kernel_by_id_split(id, vmlinux); 943 + if (!btf) { 944 + close(fd); 945 + break; 946 + } 947 + 948 + /* We need the fd to stay open so it can be used in fd_array. 949 + * The final cleanup call to btf__free will free btf object 950 + * and close the file descriptor. 951 + */ 952 + btf__set_fd(btf, fd); 953 + break; 954 + } 955 + 956 + set_root(false); 957 + return btf; 958 + } 959 + 960 + static struct btf *testmod_btf; 961 + static struct btf *vmlinux_btf; 962 + 963 + static void kfuncs_cleanup(void) 964 + { 965 + btf__free(testmod_btf); 966 + btf__free(vmlinux_btf); 967 + } 968 + 969 + static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array, 970 + struct kfunc_btf_id_pair *fixup_kfunc_btf_id) 971 + { 972 + /* Patch in kfunc BTF IDs */ 973 + while (fixup_kfunc_btf_id->kfunc) { 974 + int btf_id = 0; 975 + 976 + /* try to find kfunc in kernel BTF */ 977 + vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf(); 978 + if (vmlinux_btf) { 979 + btf_id = btf__find_by_name_kind(vmlinux_btf, 980 + fixup_kfunc_btf_id->kfunc, 981 + BTF_KIND_FUNC); 982 + btf_id = btf_id < 0 ? 0 : btf_id; 983 + } 984 + 985 + /* kfunc not found in kernel BTF, try bpf_testmod BTF */ 986 + if (!btf_id) { 987 + testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf); 988 + if (testmod_btf) { 989 + btf_id = btf__find_by_name_kind(testmod_btf, 990 + fixup_kfunc_btf_id->kfunc, 991 + BTF_KIND_FUNC); 992 + btf_id = btf_id < 0 ? 0 : btf_id; 993 + if (btf_id) { 994 + /* We put bpf_testmod module fd into fd_array 995 + * and its index 1 into instruction 'off'. 996 + */ 997 + *fd_array = btf__fd(testmod_btf); 998 + prog[fixup_kfunc_btf_id->insn_idx].off = 1; 999 + } 1000 + } 1001 + } 1002 + 1003 + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; 1004 + fixup_kfunc_btf_id++; 1005 + } 1006 + } 1007 + 877 1008 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, 878 - struct bpf_insn *prog, int *map_fds) 1009 + struct bpf_insn *prog, int *map_fds, int *fd_array) 879 1010 { 880 1011 int *fixup_map_hash_8b = test->fixup_map_hash_8b; 881 1012 int *fixup_map_hash_48b = test->fixup_map_hash_48b; ··· 1032 899 int *fixup_map_ringbuf = test->fixup_map_ringbuf; 1033 900 int *fixup_map_timer = test->fixup_map_timer; 1034 901 int *fixup_map_kptr = test->fixup_map_kptr; 1035 - struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; 1036 902 1037 903 if (test->fill_helper) { 1038 904 test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); ··· 1232 1100 } while (*fixup_map_kptr); 1233 1101 } 1234 1102 1235 - /* Patch in kfunc BTF IDs */ 1236 - if (fixup_kfunc_btf_id->kfunc) { 1237 - struct btf *btf; 1238 - int btf_id; 1239 - 1240 - do { 1241 - btf_id = 0; 1242 - btf = btf__load_vmlinux_btf(); 1243 - if (btf) { 1244 - btf_id = btf__find_by_name_kind(btf, 1245 - fixup_kfunc_btf_id->kfunc, 1246 - BTF_KIND_FUNC); 1247 - btf_id = btf_id < 0 ? 0 : btf_id; 1248 - } 1249 - btf__free(btf); 1250 - prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; 1251 - fixup_kfunc_btf_id++; 1252 - } while (fixup_kfunc_btf_id->kfunc); 1253 - } 1103 + fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); 1254 1104 } 1255 1105 1256 1106 struct libcap { ··· 1559 1445 int run_errs, run_successes; 1560 1446 int map_fds[MAX_NR_MAPS]; 1561 1447 const char *expected_err; 1448 + int fd_array[2] = { -1, -1 }; 1562 1449 int saved_errno; 1563 1450 int fixup_skips; 1564 1451 __u32 pflags; ··· 1573 1458 if (!prog_type) 1574 1459 prog_type = BPF_PROG_TYPE_SOCKET_FILTER; 1575 1460 fixup_skips = skips; 1576 - do_test_fixup(test, prog_type, prog, map_fds); 1461 + do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]); 1577 1462 if (test->fill_insns) { 1578 1463 prog = test->fill_insns; 1579 1464 prog_len = test->prog_len; ··· 1607 1492 else 1608 1493 opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; 1609 1494 opts.prog_flags = pflags; 1495 + if (fd_array[1] != -1) 1496 + opts.fd_array = &fd_array[0]; 1610 1497 1611 1498 if ((prog_type == BPF_PROG_TYPE_TRACING || 1612 1499 prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) { ··· 1801 1684 { 1802 1685 int i, passes = 0, errors = 0; 1803 1686 1687 + /* ensure previous instance of the module is unloaded */ 1688 + unload_bpf_testmod(verbose); 1689 + 1690 + if (load_bpf_testmod(verbose)) 1691 + return EXIT_FAILURE; 1692 + 1804 1693 for (i = from; i < to; i++) { 1805 1694 struct bpf_test *test = &tests[i]; 1806 1695 ··· 1833 1710 do_test_single(test, false, &passes, &errors); 1834 1711 } 1835 1712 } 1713 + 1714 + unload_bpf_testmod(verbose); 1715 + kfuncs_cleanup(); 1836 1716 1837 1717 printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes, 1838 1718 skips, errors);
+1 -9
tools/testing/selftests/bpf/test_xsk.sh
··· 68 68 # Run with verbose output: 69 69 # sudo ./test_xsk.sh -v 70 70 # 71 - # Run and dump packet contents: 72 - # sudo ./test_xsk.sh -D 73 - # 74 71 # Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger: 75 72 # sudo ./test_xsk.sh -d 76 73 # ··· 78 81 79 82 ETH="" 80 83 81 - while getopts "vDi:d" flag 84 + while getopts "vi:d" flag 82 85 do 83 86 case "${flag}" in 84 87 v) verbose=1;; 85 - D) dump_pkts=1;; 86 88 d) debug=1;; 87 89 i) ETH=${OPTARG};; 88 90 esac ··· 151 155 152 156 if [[ $verbose -eq 1 ]]; then 153 157 ARGS+="-v " 154 - fi 155 - 156 - if [[ $dump_pkts -eq 1 ]]; then 157 - ARGS="-D " 158 158 fi 159 159 160 160 retval=$?
+61
tools/testing/selftests/bpf/testing_helpers.c
··· 9 9 #include <bpf/libbpf.h> 10 10 #include "test_progs.h" 11 11 #include "testing_helpers.h" 12 + #include <linux/membarrier.h> 12 13 13 14 int parse_num_list(const char *s, bool **num_set, int *num_set_len) 14 15 { ··· 326 325 327 326 fclose(f); 328 327 return sample_freq; 328 + } 329 + 330 + static int finit_module(int fd, const char *param_values, int flags) 331 + { 332 + return syscall(__NR_finit_module, fd, param_values, flags); 333 + } 334 + 335 + static int delete_module(const char *name, int flags) 336 + { 337 + return syscall(__NR_delete_module, name, flags); 338 + } 339 + 340 + int unload_bpf_testmod(bool verbose) 341 + { 342 + if (kern_sync_rcu()) 343 + fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); 344 + if (delete_module("bpf_testmod", 0)) { 345 + if (errno == ENOENT) { 346 + if (verbose) 347 + fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); 348 + return -1; 349 + } 350 + fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); 351 + return -1; 352 + } 353 + if (verbose) 354 + fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); 355 + return 0; 356 + } 357 + 358 + int load_bpf_testmod(bool verbose) 359 + { 360 + int fd; 361 + 362 + if (verbose) 363 + fprintf(stdout, "Loading bpf_testmod.ko...\n"); 364 + 365 + fd = open("bpf_testmod.ko", O_RDONLY); 366 + if (fd < 0) { 367 + fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); 368 + return -ENOENT; 369 + } 370 + if (finit_module(fd, "", 0)) { 371 + fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); 372 + close(fd); 373 + return -EINVAL; 374 + } 375 + close(fd); 376 + 377 + if (verbose) 378 + fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); 379 + return 0; 380 + } 381 + 382 + /* 383 + * Trigger synchronize_rcu() in kernel. 384 + */ 385 + int kern_sync_rcu(void) 386 + { 387 + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); 329 388 }
+9
tools/testing/selftests/bpf/testing_helpers.h
··· 1 1 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 2 /* Copyright (C) 2020 Facebook, Inc. */ 3 + 4 + #ifndef __TESTING_HELPERS_H 5 + #define __TESTING_HELPERS_H 6 + 3 7 #include <stdbool.h> 4 8 #include <bpf/bpf.h> 5 9 #include <bpf/libbpf.h> ··· 29 25 bool is_glob_pattern); 30 26 31 27 __u64 read_perf_max_sample_freq(void); 28 + int load_bpf_testmod(bool verbose); 29 + int unload_bpf_testmod(bool verbose); 30 + int kern_sync_rcu(void); 31 + 32 + #endif /* __TESTING_HELPERS_H */
+5
tools/testing/selftests/bpf/xsk.h
··· 134 134 __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE); 135 135 } 136 136 137 + static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb) 138 + { 139 + prod->cached_prod -= nb; 140 + } 141 + 137 142 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) 138 143 { 139 144 __u32 entries = xsk_cons_nb_avail(cons, nb);
+362 -407
tools/testing/selftests/bpf/xskxceiver.c
··· 76 76 #include <asm/barrier.h> 77 77 #include <linux/if_link.h> 78 78 #include <linux/if_ether.h> 79 - #include <linux/ip.h> 80 79 #include <linux/mman.h> 81 - #include <linux/udp.h> 82 80 #include <arpa/inet.h> 83 81 #include <net/if.h> 84 82 #include <locale.h> 85 83 #include <poll.h> 86 84 #include <pthread.h> 87 85 #include <signal.h> 88 - #include <stdbool.h> 89 86 #include <stdio.h> 90 87 #include <stdlib.h> 91 88 #include <string.h> ··· 91 94 #include <sys/socket.h> 92 95 #include <sys/time.h> 93 96 #include <sys/types.h> 94 - #include <sys/queue.h> 95 97 #include <time.h> 96 98 #include <unistd.h> 97 - #include <stdatomic.h> 98 99 99 100 #include "xsk_xdp_progs.skel.h" 100 101 #include "xsk.h" ··· 104 109 105 110 static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; 106 111 static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; 107 - static const char *IP1 = "192.168.100.162"; 108 - static const char *IP2 = "192.168.100.161"; 109 - static const u16 UDP_PORT1 = 2020; 110 - static const u16 UDP_PORT2 = 2121; 111 112 112 113 static void __exit_with_error(int error, const char *file, const char *func, int line) 113 114 { ··· 138 147 test->fail = true; 139 148 } 140 149 141 - static void memset32_htonl(void *dest, u32 val, u32 size) 142 - { 143 - u32 *ptr = (u32 *)dest; 144 - int i; 145 - 146 - val = htonl(val); 147 - 148 - for (i = 0; i < (size & (~0x3)); i += 4) 149 - ptr[i >> 2] = val; 150 - } 151 - 152 - /* 153 - * Fold a partial checksum 154 - * This function code has been taken from 155 - * Linux kernel include/asm-generic/checksum.h 150 + /* The payload is a word consisting of a packet sequence number in the upper 151 + * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's 152 + * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. 156 153 */ 157 - static __u16 csum_fold(__u32 csum) 154 + static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) 158 155 { 159 - u32 sum = (__force u32)csum; 156 + u32 *ptr = (u32 *)dest, i; 160 157 161 - sum = (sum & 0xffff) + (sum >> 16); 162 - sum = (sum & 0xffff) + (sum >> 16); 163 - return (__force __u16)~sum; 164 - } 165 - 166 - /* 167 - * This function code has been taken from 168 - * Linux kernel lib/checksum.c 169 - */ 170 - static u32 from64to32(u64 x) 171 - { 172 - /* add up 32-bit and 32-bit for 32+c bit */ 173 - x = (x & 0xffffffff) + (x >> 32); 174 - /* add up carry.. */ 175 - x = (x & 0xffffffff) + (x >> 32); 176 - return (u32)x; 177 - } 178 - 179 - /* 180 - * This function code has been taken from 181 - * Linux kernel lib/checksum.c 182 - */ 183 - static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 184 - { 185 - unsigned long long s = (__force u32)sum; 186 - 187 - s += (__force u32)saddr; 188 - s += (__force u32)daddr; 189 - #ifdef __BIG_ENDIAN__ 190 - s += proto + len; 191 - #else 192 - s += (proto + len) << 8; 193 - #endif 194 - return (__force __u32)from64to32(s); 195 - } 196 - 197 - /* 198 - * This function has been taken from 199 - * Linux kernel include/asm-generic/checksum.h 200 - */ 201 - static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 202 - { 203 - return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); 204 - } 205 - 206 - static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) 207 - { 208 - u32 csum = 0; 209 - u32 cnt = 0; 210 - 211 - /* udp hdr and data */ 212 - for (; cnt < len; cnt += 2) 213 - csum += udp_pkt[cnt >> 1]; 214 - 215 - return csum_tcpudp_magic(saddr, daddr, len, proto, csum); 158 + start /= sizeof(*ptr); 159 + size /= sizeof(*ptr); 160 + for (i = 0; i < size; i++) 161 + ptr[i] = htonl(pkt_nb << 16 | (i + start)); 216 162 } 217 163 218 164 static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) 219 165 { 220 166 memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); 221 167 memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); 222 - eth_hdr->h_proto = htons(ETH_P_IP); 223 - } 224 - 225 - static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) 226 - { 227 - ip_hdr->version = IP_PKT_VER; 228 - ip_hdr->ihl = 0x5; 229 - ip_hdr->tos = IP_PKT_TOS; 230 - ip_hdr->tot_len = htons(IP_PKT_SIZE); 231 - ip_hdr->id = 0; 232 - ip_hdr->frag_off = 0; 233 - ip_hdr->ttl = IPDEFTTL; 234 - ip_hdr->protocol = IPPROTO_UDP; 235 - ip_hdr->saddr = ifobject->src_ip; 236 - ip_hdr->daddr = ifobject->dst_ip; 237 - ip_hdr->check = 0; 238 - } 239 - 240 - static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, 241 - struct udphdr *udp_hdr) 242 - { 243 - udp_hdr->source = htons(ifobject->src_port); 244 - udp_hdr->dest = htons(ifobject->dst_port); 245 - udp_hdr->len = htons(UDP_PKT_SIZE); 246 - memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); 168 + eth_hdr->h_proto = htons(ETH_P_LOOPBACK); 247 169 } 248 170 249 171 static bool is_umem_valid(struct ifobject *ifobj) ··· 164 260 return !!ifobj->umem->umem; 165 261 } 166 262 167 - static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) 168 - { 169 - udp_hdr->check = 0; 170 - udp_hdr->check = 171 - udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); 172 - } 173 - 174 263 static u32 mode_to_xdp_flags(enum test_mode mode) 175 264 { 176 265 return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; 177 266 } 178 267 179 - static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) 268 + static u64 umem_size(struct xsk_umem_info *umem) 269 + { 270 + return umem->num_frames * umem->frame_size; 271 + } 272 + 273 + static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, 274 + u64 size) 180 275 { 181 276 struct xsk_umem_config cfg = { 182 277 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, ··· 195 292 return ret; 196 293 197 294 umem->buffer = buffer; 295 + if (ifobj->shared_umem && ifobj->rx_on) { 296 + umem->base_addr = umem_size(umem); 297 + umem->next_buffer = umem_size(umem); 298 + } 299 + 198 300 return 0; 301 + } 302 + 303 + static u64 umem_alloc_buffer(struct xsk_umem_info *umem) 304 + { 305 + u64 addr; 306 + 307 + addr = umem->next_buffer; 308 + umem->next_buffer += umem->frame_size; 309 + if (umem->next_buffer >= umem->base_addr + umem_size(umem)) 310 + umem->next_buffer = umem->base_addr; 311 + 312 + return addr; 313 + } 314 + 315 + static void umem_reset_alloc(struct xsk_umem_info *umem) 316 + { 317 + umem->next_buffer = 0; 199 318 } 200 319 201 320 static void enable_busy_poll(struct xsk_socket_info *xsk) ··· 279 354 exit_with_error(ENOMEM); 280 355 } 281 356 umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 282 - ret = xsk_configure_umem(umem, bufs, umem_sz); 357 + ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz); 283 358 if (ret) 284 359 exit_with_error(-ret); 285 360 ··· 305 380 static struct option long_options[] = { 306 381 {"interface", required_argument, 0, 'i'}, 307 382 {"busy-poll", no_argument, 0, 'b'}, 308 - {"dump-pkts", no_argument, 0, 'D'}, 309 383 {"verbose", no_argument, 0, 'v'}, 310 384 {0, 0, 0, 0} 311 385 }; ··· 315 391 " Usage: %s [OPTIONS]\n" 316 392 " Options:\n" 317 393 " -i, --interface Use interface\n" 318 - " -D, --dump-pkts Dump packets L2 - L5\n" 319 394 " -v, --verbose Verbose output\n" 320 395 " -b, --busy-poll Enable busy poll\n"; 321 396 ··· 338 415 opterr = 0; 339 416 340 417 for (;;) { 341 - c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); 418 + c = getopt_long(argc, argv, "i:vb", long_options, &option_index); 342 419 if (c == -1) 343 420 break; 344 421 ··· 359 436 exit_with_error(errno); 360 437 361 438 interface_nb++; 362 - break; 363 - case 'D': 364 - opt_pkt_dump = true; 365 439 break; 366 440 case 'v': 367 441 opt_verbose = true; ··· 402 482 memset(ifobj->umem, 0, sizeof(*ifobj->umem)); 403 483 ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; 404 484 ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 405 - if (ifobj->shared_umem && ifobj->rx_on) 406 - ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS * 407 - XSK_UMEM__DEFAULT_FRAME_SIZE; 408 485 409 486 for (j = 0; j < MAX_SOCKETS; j++) { 410 487 memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); ··· 471 554 static void pkt_stream_reset(struct pkt_stream *pkt_stream) 472 555 { 473 556 if (pkt_stream) 474 - pkt_stream->rx_pkt_nb = 0; 557 + pkt_stream->current_pkt_nb = 0; 475 558 } 476 559 477 - static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) 560 + static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) 478 561 { 479 - if (pkt_nb >= pkt_stream->nb_pkts) 562 + if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) 480 563 return NULL; 481 564 482 - return &pkt_stream->pkts[pkt_nb]; 565 + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; 483 566 } 484 567 485 568 static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) 486 569 { 487 - while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { 570 + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { 488 571 (*pkts_sent)++; 489 - if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) 490 - return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; 491 - pkt_stream->rx_pkt_nb++; 572 + if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) 573 + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; 574 + pkt_stream->current_pkt_nb++; 492 575 } 493 576 return NULL; 494 577 } ··· 533 616 return pkt_stream; 534 617 } 535 618 536 - static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) 619 + static u32 ceil_u32(u32 a, u32 b) 537 620 { 538 - pkt->addr = addr + umem->base_addr; 621 + return (a + b - 1) / b; 622 + } 623 + 624 + static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt) 625 + { 626 + if (!pkt || !pkt->valid) 627 + return 1; 628 + return ceil_u32(pkt->len, frame_size); 629 + } 630 + 631 + static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) 632 + { 633 + pkt->offset = offset; 539 634 pkt->len = len; 540 635 if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) 541 636 pkt->valid = false; 542 637 else 543 638 pkt->valid = true; 639 + } 640 + 641 + static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) 642 + { 643 + return ceil_u32(len, umem->frame_size) * umem->frame_size; 544 644 } 545 645 546 646 static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) ··· 569 635 if (!pkt_stream) 570 636 exit_with_error(ENOMEM); 571 637 638 + pkt_stream->nb_pkts = nb_pkts; 639 + pkt_stream->max_pkt_len = pkt_len; 572 640 for (i = 0; i < nb_pkts; i++) { 573 - pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, 574 - pkt_len); 575 - pkt_stream->pkts[i].payload = i; 641 + struct pkt *pkt = &pkt_stream->pkts[i]; 642 + 643 + pkt_set(umem, pkt, 0, pkt_len); 644 + pkt->pkt_nb = i; 576 645 } 577 646 578 647 return pkt_stream; ··· 606 669 607 670 pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); 608 671 for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) 609 - pkt_set(umem, &pkt_stream->pkts[i], 610 - (i % umem->num_frames) * umem->frame_size + offset, pkt_len); 672 + pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); 611 673 612 674 ifobj->pkt_stream = pkt_stream; 613 675 } ··· 630 694 pkt_stream->pkts[i].valid = false; 631 695 } 632 696 633 - static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) 697 + static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) 634 698 { 635 - struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); 636 - struct udphdr *udp_hdr; 637 - struct ethhdr *eth_hdr; 638 - struct iphdr *ip_hdr; 639 - void *data; 699 + if (!pkt->valid) 700 + return pkt->offset; 701 + return pkt->offset + umem_alloc_buffer(umem); 702 + } 640 703 641 - if (!pkt) 642 - return NULL; 643 - if (!pkt->valid || pkt->len < MIN_PKT_SIZE) 644 - return pkt; 704 + static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, 705 + u32 bytes_written) 706 + { 707 + void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); 645 708 646 - data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); 647 - udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); 648 - ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr)); 649 - eth_hdr = (struct ethhdr *)data; 709 + if (len < MIN_PKT_SIZE) 710 + return; 650 711 651 - gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr); 652 - gen_ip_hdr(ifobject, ip_hdr); 653 - gen_udp_csum(udp_hdr, ip_hdr); 654 - gen_eth_hdr(ifobject, eth_hdr); 712 + if (!bytes_written) { 713 + gen_eth_hdr(ifobject, data); 655 714 656 - return pkt; 715 + len -= PKT_HDR_SIZE; 716 + data += PKT_HDR_SIZE; 717 + } else { 718 + bytes_written -= PKT_HDR_SIZE; 719 + } 720 + 721 + write_payload(data, pkt_nb, bytes_written, len); 657 722 } 658 723 659 724 static void __pkt_stream_generate_custom(struct ifobject *ifobj, ··· 668 731 exit_with_error(ENOMEM); 669 732 670 733 for (i = 0; i < nb_pkts; i++) { 671 - pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr; 672 - pkt_stream->pkts[i].len = pkts[i].len; 673 - pkt_stream->pkts[i].payload = i; 674 - pkt_stream->pkts[i].valid = pkts[i].valid; 734 + struct pkt *pkt = &pkt_stream->pkts[i]; 735 + 736 + pkt->offset = pkts[i].offset; 737 + pkt->len = pkts[i].len; 738 + pkt->pkt_nb = i; 739 + pkt->valid = pkts[i].valid; 740 + if (pkt->len > pkt_stream->max_pkt_len) 741 + pkt_stream->max_pkt_len = pkt->len; 675 742 } 676 743 677 744 ifobj->pkt_stream = pkt_stream; ··· 687 746 __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts); 688 747 } 689 748 690 - static void pkt_dump(void *pkt, u32 len) 749 + static void pkt_print_data(u32 *data, u32 cnt) 691 750 { 692 - char s[INET_ADDRSTRLEN]; 693 - struct ethhdr *ethhdr; 694 - struct udphdr *udphdr; 695 - struct iphdr *iphdr; 696 - u32 payload, i; 751 + u32 i; 697 752 698 - ethhdr = pkt; 699 - iphdr = pkt + sizeof(*ethhdr); 700 - udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr); 753 + for (i = 0; i < cnt; i++) { 754 + u32 seqnum, pkt_nb; 701 755 702 - /*extract L2 frame */ 703 - fprintf(stdout, "DEBUG>> L2: dst mac: "); 704 - for (i = 0; i < ETH_ALEN; i++) 705 - fprintf(stdout, "%02X", ethhdr->h_dest[i]); 706 - 707 - fprintf(stdout, "\nDEBUG>> L2: src mac: "); 708 - for (i = 0; i < ETH_ALEN; i++) 709 - fprintf(stdout, "%02X", ethhdr->h_source[i]); 710 - 711 - /*extract L3 frame */ 712 - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); 713 - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", 714 - inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); 715 - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", 716 - inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); 717 - /*extract L4 frame */ 718 - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); 719 - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); 720 - /*extract L5 frame */ 721 - payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE))); 722 - 723 - fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); 724 - fprintf(stdout, "---------------------------------------\n"); 756 + seqnum = ntohl(*data) & 0xffff; 757 + pkt_nb = ntohl(*data) >> 16; 758 + fprintf(stdout, "%u:%u ", pkt_nb, seqnum); 759 + data++; 760 + } 725 761 } 726 762 727 - static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, 728 - u64 pkt_stream_addr) 763 + static void pkt_dump(void *pkt, u32 len, bool eth_header) 764 + { 765 + struct ethhdr *ethhdr = pkt; 766 + u32 i, *data; 767 + 768 + if (eth_header) { 769 + /*extract L2 frame */ 770 + fprintf(stdout, "DEBUG>> L2: dst mac: "); 771 + for (i = 0; i < ETH_ALEN; i++) 772 + fprintf(stdout, "%02X", ethhdr->h_dest[i]); 773 + 774 + fprintf(stdout, "\nDEBUG>> L2: src mac: "); 775 + for (i = 0; i < ETH_ALEN; i++) 776 + fprintf(stdout, "%02X", ethhdr->h_source[i]); 777 + 778 + data = pkt + PKT_HDR_SIZE; 779 + } else { 780 + data = pkt; 781 + } 782 + 783 + /*extract L5 frame */ 784 + fprintf(stdout, "\nDEBUG>> L5: seqnum: "); 785 + pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); 786 + fprintf(stdout, "...."); 787 + if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { 788 + fprintf(stdout, "\n.... "); 789 + pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, 790 + PKT_DUMP_NB_TO_PRINT); 791 + } 792 + fprintf(stdout, "\n---------------------------------------\n"); 793 + } 794 + 795 + static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) 729 796 { 730 797 u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; 731 - u32 offset = addr % umem->frame_size, expected_offset = 0; 798 + u32 offset = addr % umem->frame_size, expected_offset; 799 + int pkt_offset = pkt->valid ? pkt->offset : 0; 732 800 733 - if (!pkt_stream->use_addr_for_fill) 734 - pkt_stream_addr = 0; 801 + if (!umem->unaligned_mode) 802 + pkt_offset = 0; 735 803 736 - expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; 804 + expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; 737 805 738 806 if (offset == expected_offset) 739 807 return true; ··· 756 806 void *data = xsk_umem__get_data(buffer, addr); 757 807 struct xdp_info *meta = data - sizeof(struct xdp_info); 758 808 759 - if (meta->count != pkt->payload) { 809 + if (meta->count != pkt->pkt_nb) { 760 810 ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", 761 - __func__, pkt->payload, meta->count); 811 + __func__, pkt->pkt_nb, meta->count); 762 812 return false; 763 813 } 764 814 ··· 768 818 static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) 769 819 { 770 820 void *data = xsk_umem__get_data(buffer, addr); 771 - struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); 821 + u32 seqnum, pkt_data; 772 822 773 823 if (!pkt) { 774 824 ksft_print_msg("[%s] too many packets received\n", __func__); 775 - return false; 825 + goto error; 776 826 } 777 827 778 828 if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { ··· 783 833 if (pkt->len != len) { 784 834 ksft_print_msg("[%s] expected length [%d], got length [%d]\n", 785 835 __func__, pkt->len, len); 786 - return false; 836 + goto error; 787 837 } 788 838 789 - if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { 790 - u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); 839 + pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); 840 + seqnum = pkt_data >> 16; 791 841 792 - if (opt_pkt_dump) 793 - pkt_dump(data, PKT_SIZE); 794 - 795 - if (pkt->payload != seqnum) { 796 - ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", 797 - __func__, pkt->payload, seqnum); 798 - return false; 799 - } 800 - } else { 801 - ksft_print_msg("Invalid frame received: "); 802 - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, 803 - iphdr->tos); 804 - return false; 842 + if (pkt->pkt_nb != seqnum) { 843 + ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", 844 + __func__, pkt->pkt_nb, seqnum); 845 + goto error; 805 846 } 806 847 807 848 return true; 849 + 850 + error: 851 + pkt_dump(data, len, true); 852 + return false; 808 853 } 809 854 810 855 static void kick_tx(struct xsk_socket_info *xsk) ··· 921 976 addr = xsk_umem__add_offset_to_addr(addr); 922 977 923 978 if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || 924 - !is_offset_correct(umem, pkt_stream, addr, pkt->addr) || 979 + !is_offset_correct(umem, pkt, addr) || 925 980 (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) 926 981 return TEST_FAILURE; 927 982 ··· 937 992 938 993 pthread_mutex_lock(&pacing_mutex); 939 994 pkts_in_flight -= pkts_sent; 940 - if (pkts_in_flight < umem->num_frames) 941 - pthread_cond_signal(&pacing_cond); 942 995 pthread_mutex_unlock(&pacing_mutex); 943 996 pkts_sent = 0; 944 997 } ··· 944 1001 return TEST_PASS; 945 1002 } 946 1003 947 - static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds, 948 - bool timeout) 1004 + static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) 949 1005 { 950 1006 struct xsk_socket_info *xsk = ifobject->xsk; 1007 + struct xsk_umem_info *umem = ifobject->umem; 1008 + u32 i, idx = 0, valid_pkts = 0, buffer_len; 951 1009 bool use_poll = ifobject->use_poll; 952 - u32 i, idx = 0, valid_pkts = 0; 953 1010 int ret; 1011 + 1012 + buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len); 1013 + /* pkts_in_flight might be negative if many invalid packets are sent */ 1014 + if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { 1015 + kick_tx(xsk); 1016 + return TEST_CONTINUE; 1017 + } 954 1018 955 1019 while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { 956 1020 if (use_poll) { ··· 984 1034 985 1035 for (i = 0; i < BATCH_SIZE; i++) { 986 1036 struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); 987 - struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); 1037 + struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream); 988 1038 989 1039 if (!pkt) 990 1040 break; 991 1041 992 - tx_desc->addr = pkt->addr; 1042 + tx_desc->addr = pkt_get_addr(pkt, umem); 993 1043 tx_desc->len = pkt->len; 994 - (*pkt_nb)++; 995 - if (pkt->valid) 1044 + if (pkt->valid) { 996 1045 valid_pkts++; 1046 + pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0); 1047 + } 997 1048 } 998 1049 999 1050 pthread_mutex_lock(&pacing_mutex); 1000 1051 pkts_in_flight += valid_pkts; 1001 - /* pkts_in_flight might be negative if many invalid packets are sent */ 1002 - if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { 1003 - kick_tx(xsk); 1004 - pthread_cond_wait(&pacing_cond, &pacing_mutex); 1005 - } 1006 1052 pthread_mutex_unlock(&pacing_mutex); 1007 1053 1008 1054 xsk_ring_prod__submit(&xsk->tx, i); ··· 1034 1088 1035 1089 static int send_pkts(struct test_spec *test, struct ifobject *ifobject) 1036 1090 { 1091 + struct pkt_stream *pkt_stream = ifobject->pkt_stream; 1037 1092 bool timeout = !is_umem_valid(test->ifobj_rx); 1038 1093 struct pollfd fds = { }; 1039 - u32 pkt_cnt = 0, ret; 1094 + u32 ret; 1040 1095 1041 1096 fds.fd = xsk_socket__fd(ifobject->xsk->xsk); 1042 1097 fds.events = POLLOUT; 1043 1098 1044 - while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { 1045 - ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout); 1099 + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { 1100 + ret = __send_pkts(ifobject, &fds, timeout); 1101 + if (ret == TEST_CONTINUE && !test->fail) 1102 + continue; 1046 1103 if ((ret || test->fail) && !timeout) 1047 1104 return TEST_FAILURE; 1048 - else if (ret == TEST_PASS && timeout) 1105 + if (ret == TEST_PASS && timeout) 1049 1106 return ret; 1050 1107 } 1051 1108 ··· 1198 1249 ifobject->xsk = &ifobject->xsk_arr[0]; 1199 1250 ifobject->xskmap = test->ifobj_rx->xskmap; 1200 1251 memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); 1252 + ifobject->umem->base_addr = 0; 1201 1253 } 1202 1254 1203 - static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) 1255 + static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, 1256 + bool fill_up) 1204 1257 { 1205 - u32 idx = 0, i, buffers_to_fill; 1258 + u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; 1259 + u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; 1206 1260 int ret; 1207 1261 1208 1262 if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) ··· 1216 1264 ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); 1217 1265 if (ret != buffers_to_fill) 1218 1266 exit_with_error(ENOSPC); 1219 - for (i = 0; i < buffers_to_fill; i++) { 1267 + 1268 + while (filled < buffers_to_fill) { 1269 + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); 1220 1270 u64 addr; 1271 + u32 i; 1221 1272 1222 - if (pkt_stream->use_addr_for_fill) { 1223 - struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); 1273 + for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) { 1274 + if (!pkt) { 1275 + if (!fill_up) 1276 + break; 1277 + addr = filled * umem->frame_size + umem->base_addr; 1278 + } else if (pkt->offset >= 0) { 1279 + addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); 1280 + } else { 1281 + addr = pkt->offset + umem_alloc_buffer(umem); 1282 + } 1224 1283 1225 - if (!pkt) 1284 + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; 1285 + if (++filled >= buffers_to_fill) 1226 1286 break; 1227 - addr = pkt->addr; 1228 - } else { 1229 - addr = i * umem->frame_size; 1230 1287 } 1231 - 1232 - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; 1233 1288 } 1234 - xsk_ring_prod__submit(&umem->fq, i); 1289 + xsk_ring_prod__submit(&umem->fq, filled); 1290 + xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); 1291 + 1292 + pkt_stream_reset(pkt_stream); 1293 + umem_reset_alloc(umem); 1235 1294 } 1236 1295 1237 1296 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) ··· 1263 1300 if (bufs == MAP_FAILED) 1264 1301 exit_with_error(errno); 1265 1302 1266 - ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); 1303 + ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); 1267 1304 if (ret) 1268 1305 exit_with_error(-ret); 1269 - 1270 - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); 1271 1306 1272 1307 xsk_configure_socket(test, ifobject, ifobject->umem, false); 1273 1308 ··· 1273 1312 1274 1313 if (!ifobject->rx_on) 1275 1314 return; 1315 + 1316 + xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); 1276 1317 1277 1318 ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); 1278 1319 if (ret) ··· 1333 1370 1334 1371 if (!err && ifobject->validation_func) 1335 1372 err = ifobject->validation_func(ifobject); 1336 - if (err) { 1373 + if (err) 1337 1374 report_failure(test); 1338 - pthread_mutex_lock(&pacing_mutex); 1339 - pthread_cond_signal(&pacing_cond); 1340 - pthread_mutex_unlock(&pacing_mutex); 1341 - } 1342 1375 1343 1376 pthread_exit(NULL); 1344 1377 } ··· 1361 1402 pthread_exit(NULL); 1362 1403 } 1363 1404 1364 - static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj) 1405 + static bool xdp_prog_changed_rx(struct test_spec *test) 1365 1406 { 1407 + struct ifobject *ifobj = test->ifobj_rx; 1408 + 1366 1409 return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; 1410 + } 1411 + 1412 + static bool xdp_prog_changed_tx(struct test_spec *test) 1413 + { 1414 + struct ifobject *ifobj = test->ifobj_tx; 1415 + 1416 + return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; 1367 1417 } 1368 1418 1369 1419 static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, ··· 1401 1433 static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, 1402 1434 struct ifobject *ifobj_tx) 1403 1435 { 1404 - if (xdp_prog_changed(test, ifobj_rx)) 1436 + if (xdp_prog_changed_rx(test)) 1405 1437 xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); 1406 1438 1407 1439 if (!ifobj_tx || ifobj_tx->shared_umem) 1408 1440 return; 1409 1441 1410 - if (xdp_prog_changed(test, ifobj_tx)) 1442 + if (xdp_prog_changed_tx(test)) 1411 1443 xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); 1412 1444 } 1413 1445 ··· 1416 1448 { 1417 1449 pthread_t t0, t1; 1418 1450 1419 - if (ifobj2) 1451 + if (ifobj2) { 1420 1452 if (pthread_barrier_init(&barr, NULL, 2)) 1421 1453 exit_with_error(errno); 1454 + pkt_stream_reset(ifobj2->pkt_stream); 1455 + } 1422 1456 1423 1457 test->current_step++; 1424 1458 pkt_stream_reset(ifobj1->pkt_stream); ··· 1463 1493 struct ifobject *ifobj_rx = test->ifobj_rx; 1464 1494 struct ifobject *ifobj_tx = test->ifobj_tx; 1465 1495 1496 + if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || 1497 + (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { 1498 + ksft_test_result_skip("No huge pages present.\n"); 1499 + return TEST_SKIP; 1500 + } 1501 + 1466 1502 xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); 1467 1503 return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); 1468 1504 } ··· 1478 1502 return __testapp_validate_traffic(test, ifobj, NULL); 1479 1503 } 1480 1504 1481 - static void testapp_teardown(struct test_spec *test) 1505 + static int testapp_teardown(struct test_spec *test) 1482 1506 { 1483 1507 int i; 1484 1508 1485 1509 test_spec_set_name(test, "TEARDOWN"); 1486 1510 for (i = 0; i < MAX_TEARDOWN_ITER; i++) { 1487 1511 if (testapp_validate_traffic(test)) 1488 - return; 1512 + return TEST_FAILURE; 1489 1513 test_spec_reset(test); 1490 1514 } 1515 + 1516 + return TEST_PASS; 1491 1517 } 1492 1518 1493 1519 static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) ··· 1504 1526 *ifobj2 = tmp_ifobj; 1505 1527 } 1506 1528 1507 - static void testapp_bidi(struct test_spec *test) 1529 + static int testapp_bidi(struct test_spec *test) 1508 1530 { 1531 + int res; 1532 + 1509 1533 test_spec_set_name(test, "BIDIRECTIONAL"); 1510 1534 test->ifobj_tx->rx_on = true; 1511 1535 test->ifobj_rx->tx_on = true; 1512 1536 test->total_steps = 2; 1513 1537 if (testapp_validate_traffic(test)) 1514 - return; 1538 + return TEST_FAILURE; 1515 1539 1516 1540 print_verbose("Switching Tx/Rx vectors\n"); 1517 1541 swap_directions(&test->ifobj_rx, &test->ifobj_tx); 1518 - __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); 1542 + res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); 1519 1543 1520 1544 swap_directions(&test->ifobj_rx, &test->ifobj_tx); 1545 + return res; 1521 1546 } 1522 1547 1523 1548 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) ··· 1537 1556 exit_with_error(errno); 1538 1557 } 1539 1558 1540 - static void testapp_bpf_res(struct test_spec *test) 1559 + static int testapp_bpf_res(struct test_spec *test) 1541 1560 { 1542 1561 test_spec_set_name(test, "BPF_RES"); 1543 1562 test->total_steps = 2; 1544 1563 test->nb_sockets = 2; 1545 1564 if (testapp_validate_traffic(test)) 1546 - return; 1565 + return TEST_FAILURE; 1547 1566 1548 1567 swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); 1549 - testapp_validate_traffic(test); 1568 + return testapp_validate_traffic(test); 1550 1569 } 1551 1570 1552 - static void testapp_headroom(struct test_spec *test) 1571 + static int testapp_headroom(struct test_spec *test) 1553 1572 { 1554 1573 test_spec_set_name(test, "UMEM_HEADROOM"); 1555 1574 test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; 1556 - testapp_validate_traffic(test); 1575 + return testapp_validate_traffic(test); 1557 1576 } 1558 1577 1559 - static void testapp_stats_rx_dropped(struct test_spec *test) 1578 + static int testapp_stats_rx_dropped(struct test_spec *test) 1560 1579 { 1561 1580 test_spec_set_name(test, "STAT_RX_DROPPED"); 1581 + if (test->mode == TEST_MODE_ZC) { 1582 + ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); 1583 + return TEST_SKIP; 1584 + } 1585 + 1562 1586 pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); 1563 1587 test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - 1564 1588 XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; 1565 1589 pkt_stream_receive_half(test); 1566 1590 test->ifobj_rx->validation_func = validate_rx_dropped; 1567 - testapp_validate_traffic(test); 1591 + return testapp_validate_traffic(test); 1568 1592 } 1569 1593 1570 - static void testapp_stats_tx_invalid_descs(struct test_spec *test) 1594 + static int testapp_stats_tx_invalid_descs(struct test_spec *test) 1571 1595 { 1572 1596 test_spec_set_name(test, "STAT_TX_INVALID"); 1573 1597 pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); 1574 1598 test->ifobj_tx->validation_func = validate_tx_invalid_descs; 1575 - testapp_validate_traffic(test); 1599 + return testapp_validate_traffic(test); 1576 1600 } 1577 1601 1578 - static void testapp_stats_rx_full(struct test_spec *test) 1602 + static int testapp_stats_rx_full(struct test_spec *test) 1579 1603 { 1580 1604 test_spec_set_name(test, "STAT_RX_FULL"); 1581 - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); 1605 + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); 1582 1606 test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 1583 - DEFAULT_UMEM_BUFFERS, PKT_SIZE); 1584 - if (!test->ifobj_rx->pkt_stream) 1585 - exit_with_error(ENOMEM); 1607 + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); 1586 1608 1587 1609 test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; 1588 1610 test->ifobj_rx->release_rx = false; 1589 1611 test->ifobj_rx->validation_func = validate_rx_full; 1590 - testapp_validate_traffic(test); 1612 + return testapp_validate_traffic(test); 1591 1613 } 1592 1614 1593 - static void testapp_stats_fill_empty(struct test_spec *test) 1615 + static int testapp_stats_fill_empty(struct test_spec *test) 1594 1616 { 1595 1617 test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); 1596 - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); 1618 + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); 1597 1619 test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 1598 - DEFAULT_UMEM_BUFFERS, PKT_SIZE); 1599 - if (!test->ifobj_rx->pkt_stream) 1600 - exit_with_error(ENOMEM); 1620 + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); 1601 1621 1602 1622 test->ifobj_rx->use_fill_ring = false; 1603 1623 test->ifobj_rx->validation_func = validate_fill_empty; 1604 - testapp_validate_traffic(test); 1624 + return testapp_validate_traffic(test); 1605 1625 } 1606 1626 1607 - /* Simple test */ 1608 - static bool hugepages_present(struct ifobject *ifobject) 1627 + static int testapp_unaligned(struct test_spec *test) 1609 1628 { 1610 - size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; 1611 - void *bufs; 1612 - 1613 - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1614 - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB, -1, 0); 1615 - if (bufs == MAP_FAILED) 1616 - return false; 1617 - 1618 - mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; 1619 - munmap(bufs, mmap_sz); 1620 - return true; 1621 - } 1622 - 1623 - static bool testapp_unaligned(struct test_spec *test) 1624 - { 1625 - if (!hugepages_present(test->ifobj_tx)) { 1626 - ksft_test_result_skip("No 2M huge pages present.\n"); 1627 - return false; 1628 - } 1629 - 1630 1629 test_spec_set_name(test, "UNALIGNED_MODE"); 1631 1630 test->ifobj_tx->umem->unaligned_mode = true; 1632 1631 test->ifobj_rx->umem->unaligned_mode = true; 1633 - /* Let half of the packets straddle a buffer boundrary */ 1634 - pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); 1635 - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; 1636 - testapp_validate_traffic(test); 1632 + /* Let half of the packets straddle a 4K buffer boundary */ 1633 + pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2); 1637 1634 1638 - return true; 1635 + return testapp_validate_traffic(test); 1639 1636 } 1640 1637 1641 - static void testapp_single_pkt(struct test_spec *test) 1638 + static int testapp_single_pkt(struct test_spec *test) 1642 1639 { 1643 - struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; 1640 + struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; 1644 1641 1645 1642 pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); 1646 - testapp_validate_traffic(test); 1643 + return testapp_validate_traffic(test); 1647 1644 } 1648 1645 1649 - static void testapp_invalid_desc(struct test_spec *test) 1646 + static int testapp_invalid_desc(struct test_spec *test) 1650 1647 { 1651 - u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; 1648 + struct xsk_umem_info *umem = test->ifobj_tx->umem; 1649 + u64 umem_size = umem->num_frames * umem->frame_size; 1652 1650 struct pkt pkts[] = { 1653 1651 /* Zero packet address allowed */ 1654 - {0, PKT_SIZE, 0, true}, 1652 + {0, MIN_PKT_SIZE, 0, true}, 1655 1653 /* Allowed packet */ 1656 - {0x1000, PKT_SIZE, 0, true}, 1654 + {0, MIN_PKT_SIZE, 0, true}, 1657 1655 /* Straddling the start of umem */ 1658 - {-2, PKT_SIZE, 0, false}, 1656 + {-2, MIN_PKT_SIZE, 0, false}, 1659 1657 /* Packet too large */ 1660 - {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, 1658 + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, 1661 1659 /* Up to end of umem allowed */ 1662 - {umem_size - PKT_SIZE, PKT_SIZE, 0, true}, 1660 + {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, 1663 1661 /* After umem ends */ 1664 - {umem_size, PKT_SIZE, 0, false}, 1662 + {umem_size, MIN_PKT_SIZE, 0, false}, 1665 1663 /* Straddle the end of umem */ 1666 - {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false}, 1667 - /* Straddle a page boundrary */ 1668 - {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, 1669 - /* Straddle a 2K boundrary */ 1670 - {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, 1664 + {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, 1665 + /* Straddle a 4K boundary */ 1666 + {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, 1667 + /* Straddle a 2K boundary */ 1668 + {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, 1671 1669 /* Valid packet for synch so that something is received */ 1672 - {0x4000, PKT_SIZE, 0, true}}; 1670 + {0, MIN_PKT_SIZE, 0, true}}; 1673 1671 1674 - if (test->ifobj_tx->umem->unaligned_mode) { 1675 - /* Crossing a page boundrary allowed */ 1672 + if (umem->unaligned_mode) { 1673 + /* Crossing a page boundary allowed */ 1676 1674 pkts[7].valid = true; 1677 1675 } 1678 - if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { 1679 - /* Crossing a 2K frame size boundrary not allowed */ 1676 + if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { 1677 + /* Crossing a 2K frame size boundary not allowed */ 1680 1678 pkts[8].valid = false; 1681 1679 } 1682 1680 1683 1681 if (test->ifobj_tx->shared_umem) { 1684 - pkts[4].addr += umem_size; 1685 - pkts[5].addr += umem_size; 1686 - pkts[6].addr += umem_size; 1682 + pkts[4].offset += umem_size; 1683 + pkts[5].offset += umem_size; 1684 + pkts[6].offset += umem_size; 1687 1685 } 1688 1686 1689 1687 pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); 1690 - testapp_validate_traffic(test); 1688 + return testapp_validate_traffic(test); 1691 1689 } 1692 1690 1693 - static void testapp_xdp_drop(struct test_spec *test) 1691 + static int testapp_xdp_drop(struct test_spec *test) 1694 1692 { 1695 1693 struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; 1696 1694 struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; ··· 1679 1719 skel_rx->maps.xsk, skel_tx->maps.xsk); 1680 1720 1681 1721 pkt_stream_receive_half(test); 1682 - testapp_validate_traffic(test); 1722 + return testapp_validate_traffic(test); 1683 1723 } 1684 1724 1685 - static void testapp_xdp_metadata_count(struct test_spec *test) 1725 + static int testapp_xdp_metadata_count(struct test_spec *test) 1686 1726 { 1687 1727 struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; 1688 1728 struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; ··· 1703 1743 if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) 1704 1744 exit_with_error(errno); 1705 1745 1706 - testapp_validate_traffic(test); 1746 + return testapp_validate_traffic(test); 1707 1747 } 1708 1748 1709 - static void testapp_poll_txq_tmout(struct test_spec *test) 1749 + static int testapp_poll_txq_tmout(struct test_spec *test) 1710 1750 { 1711 1751 test_spec_set_name(test, "POLL_TXQ_FULL"); 1712 1752 ··· 1714 1754 /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ 1715 1755 test->ifobj_tx->umem->frame_size = 2048; 1716 1756 pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); 1717 - testapp_validate_traffic_single_thread(test, test->ifobj_tx); 1757 + return testapp_validate_traffic_single_thread(test, test->ifobj_tx); 1718 1758 } 1719 1759 1720 - static void testapp_poll_rxq_tmout(struct test_spec *test) 1760 + static int testapp_poll_rxq_tmout(struct test_spec *test) 1721 1761 { 1722 1762 test_spec_set_name(test, "POLL_RXQ_EMPTY"); 1723 1763 test->ifobj_rx->use_poll = true; 1724 - testapp_validate_traffic_single_thread(test, test->ifobj_rx); 1764 + return testapp_validate_traffic_single_thread(test, test->ifobj_rx); 1725 1765 } 1726 1766 1727 1767 static int xsk_load_xdp_programs(struct ifobject *ifobj) ··· 1738 1778 xsk_xdp_progs__destroy(ifobj->xdp_progs); 1739 1779 } 1740 1780 1741 - static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, 1742 - const char *dst_ip, const char *src_ip, const u16 dst_port, 1743 - const u16 src_port, thread_func_t func_ptr) 1781 + /* Simple test */ 1782 + static bool hugepages_present(void) 1744 1783 { 1745 - struct in_addr ip; 1784 + size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; 1785 + void *bufs; 1786 + 1787 + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1788 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); 1789 + if (bufs == MAP_FAILED) 1790 + return false; 1791 + 1792 + mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; 1793 + munmap(bufs, mmap_sz); 1794 + return true; 1795 + } 1796 + 1797 + static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, 1798 + thread_func_t func_ptr) 1799 + { 1746 1800 int err; 1747 1801 1748 1802 memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); 1749 1803 memcpy(ifobj->src_mac, src_mac, ETH_ALEN); 1750 - 1751 - inet_aton(dst_ip, &ip); 1752 - ifobj->dst_ip = ip.s_addr; 1753 - 1754 - inet_aton(src_ip, &ip); 1755 - ifobj->src_ip = ip.s_addr; 1756 - 1757 - ifobj->dst_port = dst_port; 1758 - ifobj->src_port = src_port; 1759 1804 1760 1805 ifobj->func_ptr = func_ptr; 1761 1806 ··· 1769 1804 printf("Error loading XDP program\n"); 1770 1805 exit_with_error(err); 1771 1806 } 1807 + 1808 + if (hugepages_present()) 1809 + ifobj->unaligned_supp = true; 1772 1810 } 1773 1811 1774 1812 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) 1775 1813 { 1814 + int ret = TEST_SKIP; 1815 + 1776 1816 switch (type) { 1777 1817 case TEST_TYPE_STATS_RX_DROPPED: 1778 - if (mode == TEST_MODE_ZC) { 1779 - ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); 1780 - return; 1781 - } 1782 - testapp_stats_rx_dropped(test); 1818 + ret = testapp_stats_rx_dropped(test); 1783 1819 break; 1784 1820 case TEST_TYPE_STATS_TX_INVALID_DESCS: 1785 - testapp_stats_tx_invalid_descs(test); 1821 + ret = testapp_stats_tx_invalid_descs(test); 1786 1822 break; 1787 1823 case TEST_TYPE_STATS_RX_FULL: 1788 - testapp_stats_rx_full(test); 1824 + ret = testapp_stats_rx_full(test); 1789 1825 break; 1790 1826 case TEST_TYPE_STATS_FILL_EMPTY: 1791 - testapp_stats_fill_empty(test); 1827 + ret = testapp_stats_fill_empty(test); 1792 1828 break; 1793 1829 case TEST_TYPE_TEARDOWN: 1794 - testapp_teardown(test); 1830 + ret = testapp_teardown(test); 1795 1831 break; 1796 1832 case TEST_TYPE_BIDI: 1797 - testapp_bidi(test); 1833 + ret = testapp_bidi(test); 1798 1834 break; 1799 1835 case TEST_TYPE_BPF_RES: 1800 - testapp_bpf_res(test); 1836 + ret = testapp_bpf_res(test); 1801 1837 break; 1802 1838 case TEST_TYPE_RUN_TO_COMPLETION: 1803 1839 test_spec_set_name(test, "RUN_TO_COMPLETION"); 1804 - testapp_validate_traffic(test); 1840 + ret = testapp_validate_traffic(test); 1805 1841 break; 1806 1842 case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: 1807 1843 test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); 1808 - testapp_single_pkt(test); 1844 + ret = testapp_single_pkt(test); 1809 1845 break; 1810 1846 case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: 1811 1847 test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); 1812 1848 test->ifobj_tx->umem->frame_size = 2048; 1813 1849 test->ifobj_rx->umem->frame_size = 2048; 1814 - pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); 1815 - testapp_validate_traffic(test); 1850 + pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); 1851 + ret = testapp_validate_traffic(test); 1816 1852 break; 1817 1853 case TEST_TYPE_RX_POLL: 1818 1854 test->ifobj_rx->use_poll = true; 1819 1855 test_spec_set_name(test, "POLL_RX"); 1820 - testapp_validate_traffic(test); 1856 + ret = testapp_validate_traffic(test); 1821 1857 break; 1822 1858 case TEST_TYPE_TX_POLL: 1823 1859 test->ifobj_tx->use_poll = true; 1824 1860 test_spec_set_name(test, "POLL_TX"); 1825 - testapp_validate_traffic(test); 1861 + ret = testapp_validate_traffic(test); 1826 1862 break; 1827 1863 case TEST_TYPE_POLL_TXQ_TMOUT: 1828 - testapp_poll_txq_tmout(test); 1864 + ret = testapp_poll_txq_tmout(test); 1829 1865 break; 1830 1866 case TEST_TYPE_POLL_RXQ_TMOUT: 1831 - testapp_poll_rxq_tmout(test); 1867 + ret = testapp_poll_rxq_tmout(test); 1832 1868 break; 1833 1869 case TEST_TYPE_ALIGNED_INV_DESC: 1834 1870 test_spec_set_name(test, "ALIGNED_INV_DESC"); 1835 - testapp_invalid_desc(test); 1871 + ret = testapp_invalid_desc(test); 1836 1872 break; 1837 1873 case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: 1838 1874 test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); 1839 1875 test->ifobj_tx->umem->frame_size = 2048; 1840 1876 test->ifobj_rx->umem->frame_size = 2048; 1841 - testapp_invalid_desc(test); 1877 + ret = testapp_invalid_desc(test); 1842 1878 break; 1843 1879 case TEST_TYPE_UNALIGNED_INV_DESC: 1844 - if (!hugepages_present(test->ifobj_tx)) { 1845 - ksft_test_result_skip("No 2M huge pages present.\n"); 1846 - return; 1847 - } 1848 1880 test_spec_set_name(test, "UNALIGNED_INV_DESC"); 1849 1881 test->ifobj_tx->umem->unaligned_mode = true; 1850 1882 test->ifobj_rx->umem->unaligned_mode = true; 1851 - testapp_invalid_desc(test); 1883 + ret = testapp_invalid_desc(test); 1852 1884 break; 1853 1885 case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: { 1854 1886 u64 page_size, umem_size; 1855 1887 1856 - if (!hugepages_present(test->ifobj_tx)) { 1857 - ksft_test_result_skip("No 2M huge pages present.\n"); 1858 - return; 1859 - } 1860 1888 test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE"); 1861 1889 /* Odd frame size so the UMEM doesn't end near a page boundary. */ 1862 1890 test->ifobj_tx->umem->frame_size = 4001; ··· 1861 1903 */ 1862 1904 page_size = sysconf(_SC_PAGESIZE); 1863 1905 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; 1864 - assert(umem_size % page_size > PKT_SIZE); 1865 - assert(umem_size % page_size < page_size - PKT_SIZE); 1866 - testapp_invalid_desc(test); 1906 + assert(umem_size % page_size > MIN_PKT_SIZE); 1907 + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); 1908 + ret = testapp_invalid_desc(test); 1867 1909 break; 1868 1910 } 1869 1911 case TEST_TYPE_UNALIGNED: 1870 - if (!testapp_unaligned(test)) 1871 - return; 1912 + ret = testapp_unaligned(test); 1872 1913 break; 1873 1914 case TEST_TYPE_HEADROOM: 1874 - testapp_headroom(test); 1915 + ret = testapp_headroom(test); 1875 1916 break; 1876 1917 case TEST_TYPE_XDP_DROP_HALF: 1877 - testapp_xdp_drop(test); 1918 + ret = testapp_xdp_drop(test); 1878 1919 break; 1879 1920 case TEST_TYPE_XDP_METADATA_COUNT: 1880 - testapp_xdp_metadata_count(test); 1921 + ret = testapp_xdp_metadata_count(test); 1881 1922 break; 1882 1923 default: 1883 1924 break; 1884 1925 } 1885 1926 1886 - if (!test->fail) 1927 + if (ret == TEST_PASS) 1887 1928 ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), 1888 1929 test->name); 1889 1930 pkt_stream_restore_default(test); ··· 1987 2030 modes++; 1988 2031 } 1989 2032 1990 - init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, 1991 - worker_testapp_validate_rx); 1992 - init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, 1993 - worker_testapp_validate_tx); 2033 + init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); 2034 + init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); 1994 2035 1995 2036 test_spec_init(&test, ifobj_tx, ifobj_rx, 0); 1996 - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); 1997 - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE); 2037 + tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); 2038 + rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); 1998 2039 if (!tx_pkt_stream_default || !rx_pkt_stream_default) 1999 2040 exit_with_error(ENOMEM); 2000 2041 test.tx_pkt_stream_default = tx_pkt_stream_default;
+10 -21
tools/testing/selftests/bpf/xskxceiver.h
··· 30 30 #define TEST_PASS 0 31 31 #define TEST_FAILURE -1 32 32 #define TEST_CONTINUE 1 33 + #define TEST_SKIP 2 33 34 #define MAX_INTERFACES 2 34 35 #define MAX_INTERFACE_NAME_CHARS 16 35 36 #define MAX_SOCKETS 2 36 37 #define MAX_TEST_NAME_SIZE 32 37 38 #define MAX_TEARDOWN_ITER 10 38 - #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ 39 - sizeof(struct udphdr)) 40 - #define MIN_ETH_PKT_SIZE 64 41 - #define ETH_FCS_SIZE 4 42 - #define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) 43 - #define PKT_SIZE (MIN_PKT_SIZE) 44 - #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) 45 - #define IP_PKT_VER 0x4 46 - #define IP_PKT_TOS 0x9 47 - #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) 48 - #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) 39 + #define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ 40 + #define MIN_PKT_SIZE 64 49 41 #define USLEEP_MAX 10000 50 42 #define SOCK_RECONF_CTR 10 51 43 #define BATCH_SIZE 64 ··· 49 57 #define UMEM_HEADROOM_TEST_SIZE 128 50 58 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) 51 59 #define HUGEPAGE_SIZE (2 * 1024 * 1024) 60 + #define PKT_DUMP_NB_TO_PRINT 16 52 61 53 62 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) 54 63 ··· 86 93 TEST_TYPE_MAX 87 94 }; 88 95 89 - static bool opt_pkt_dump; 90 96 static bool opt_verbose; 91 97 92 98 struct xsk_umem_info { 93 99 struct xsk_ring_prod fq; 94 100 struct xsk_ring_cons cq; 95 101 struct xsk_umem *umem; 102 + u64 next_buffer; 96 103 u32 num_frames; 97 104 u32 frame_headroom; 98 105 void *buffer; ··· 111 118 }; 112 119 113 120 struct pkt { 114 - u64 addr; 121 + int offset; 115 122 u32 len; 116 - u32 payload; 123 + u32 pkt_nb; 117 124 bool valid; 118 125 }; 119 126 120 127 struct pkt_stream { 121 128 u32 nb_pkts; 122 - u32 rx_pkt_nb; 129 + u32 current_pkt_nb; 123 130 struct pkt *pkts; 124 - bool use_addr_for_fill; 131 + u32 max_pkt_len; 125 132 }; 126 133 127 134 struct ifobject; ··· 141 148 struct bpf_program *xdp_prog; 142 149 enum test_mode mode; 143 150 int ifindex; 144 - u32 dst_ip; 145 - u32 src_ip; 146 151 u32 bind_flags; 147 - u16 src_port; 148 - u16 dst_port; 149 152 bool tx_on; 150 153 bool rx_on; 151 154 bool use_poll; ··· 150 161 bool release_rx; 151 162 bool shared_umem; 152 163 bool use_metadata; 164 + bool unaligned_supp; 153 165 u8 dst_mac[ETH_ALEN]; 154 166 u8 src_mac[ETH_ALEN]; 155 167 }; ··· 174 184 175 185 pthread_barrier_t barr; 176 186 pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; 177 - pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; 178 187 179 188 int pkts_in_flight; 180 189