Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+6 -3

Documentation/bpf/instruction-set.rst

··· 163 163 BPF_DIV 0x30 dst = (src != 0) ? (dst / src) : 0 164 164 BPF_OR 0x40 dst \|= src 165 165 BPF_AND 0x50 dst &= src 166 - BPF_LSH 0x60 dst <<= src 167 - BPF_RSH 0x70 dst >>= src 166 + BPF_LSH 0x60 dst <<= (src & mask) 167 + BPF_RSH 0x70 dst >>= (src & mask) 168 168 BPF_NEG 0x80 dst = ~src 169 169 BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst 170 170 BPF_XOR 0xa0 dst ^= src 171 171 BPF_MOV 0xb0 dst = src 172 - BPF_ARSH 0xc0 sign extending shift right 172 + BPF_ARSH 0xc0 sign extending dst >>= (src & mask) 173 173 BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below) 174 174 ======== ===== ========================================================== 175 175 ··· 203 203 for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result 204 204 interpreted as an unsigned 64-bit value. There are no instructions for 205 205 signed division or modulo. 206 + 207 + Shift operations use a mask of 0x3F (63) for 64-bit operations and 0x1F (31) 208 + for 32-bit operations. 206 209 207 210 Byte swap instructions 208 211 ~~~~~~~~~~~~~~~~~~~~~~

+2 -2

include/linux/bpf.h

··· 2077 2077 struct bpf_link *bpf_link_get_from_fd(u32 ufd); 2078 2078 struct bpf_link *bpf_link_get_curr_or_next(u32 *id); 2079 2079 2080 - int bpf_obj_pin_user(u32 ufd, const char __user *pathname); 2081 - int bpf_obj_get_user(const char __user *pathname, int flags); 2080 + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname); 2081 + int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags); 2082 2082 2083 2083 #define BPF_ITER_FUNC_PREFIX "bpf_iter_" 2084 2084 #define DEFINE_BPF_ITER_FUNC(target, args...) \

+11 -7

include/linux/btf.h

··· 98 98 union bpf_attr; 99 99 struct btf_show; 100 100 struct btf_id_set; 101 + struct bpf_prog; 102 + 103 + typedef int (*btf_kfunc_filter_t)(const struct bpf_prog *prog, u32 kfunc_id); 101 104 102 105 struct btf_kfunc_id_set { 103 106 struct module *owner; 104 107 struct btf_id_set8 *set; 108 + btf_kfunc_filter_t filter; 105 109 }; 106 110 107 111 struct btf_id_dtor_kfunc { ··· 483 479 return bsearch(&id, set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func); 484 480 } 485 481 486 - struct bpf_prog; 487 482 struct bpf_verifier_log; 488 483 489 484 #ifdef CONFIG_BPF_SYSCALL ··· 490 487 const char *btf_name_by_offset(const struct btf *btf, u32 offset); 491 488 struct btf *btf_parse_vmlinux(void); 492 489 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); 493 - u32 *btf_kfunc_id_set_contains(const struct btf *btf, 494 - enum bpf_prog_type prog_type, 495 - u32 kfunc_btf_id); 496 - u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id); 490 + u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, 491 + const struct bpf_prog *prog); 492 + u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, 493 + const struct bpf_prog *prog); 497 494 int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, 498 495 const struct btf_kfunc_id_set *s); 499 496 int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset); ··· 520 517 return NULL; 521 518 } 522 519 static inline u32 *btf_kfunc_id_set_contains(const struct btf *btf, 523 - enum bpf_prog_type prog_type, 524 - u32 kfunc_btf_id) 520 + u32 kfunc_btf_id, 521 + struct bpf_prog *prog) 522 + 525 523 { 526 524 return NULL; 527 525 }

-1

include/net/udp.h

··· 437 437 struct udp_iter_state { 438 438 struct seq_net_private p; 439 439 int bucket; 440 - struct udp_seq_afinfo *bpf_seq_afinfo; 441 440 }; 442 441 443 442 void *udp_seq_start(struct seq_file *seq, loff_t *pos);

+10

include/uapi/linux/bpf.h

··· 1272 1272 1273 1273 /* Create a map that will be registered/unregesitered by the backed bpf_link */ 1274 1274 BPF_F_LINK = (1U << 13), 1275 + 1276 + /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ 1277 + BPF_F_PATH_FD = (1U << 14), 1275 1278 }; 1276 1279 1277 1280 /* Flags for BPF_PROG_QUERY. */ ··· 1423 1420 __aligned_u64 pathname; 1424 1421 __u32 bpf_fd; 1425 1422 __u32 file_flags; 1423 + /* Same as dirfd in openat() syscall; see openat(2) 1424 + * manpage for details of path FD and pathname semantics; 1425 + * path_fd should accompanied by BPF_F_PATH_FD flag set in 1426 + * file_flags field, otherwise it should be set to zero; 1427 + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. 1428 + */ 1429 + __s32 path_fd; 1426 1430 }; 1427 1431 1428 1432 struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */

+54 -11

kernel/bpf/btf.c

··· 222 222 enum { 223 223 BTF_KFUNC_SET_MAX_CNT = 256, 224 224 BTF_DTOR_KFUNC_MAX_CNT = 256, 225 + BTF_KFUNC_FILTER_MAX_CNT = 16, 226 + }; 227 + 228 + struct btf_kfunc_hook_filter { 229 + btf_kfunc_filter_t filters[BTF_KFUNC_FILTER_MAX_CNT]; 230 + u32 nr_filters; 225 231 }; 226 232 227 233 struct btf_kfunc_set_tab { 228 234 struct btf_id_set8 *sets[BTF_KFUNC_HOOK_MAX]; 235 + struct btf_kfunc_hook_filter hook_filters[BTF_KFUNC_HOOK_MAX]; 229 236 }; 230 237 231 238 struct btf_id_dtor_kfunc_tab { ··· 7676 7669 /* Kernel Function (kfunc) BTF ID set registration API */ 7677 7670 7678 7671 static int btf_populate_kfunc_set(struct btf *btf, enum btf_kfunc_hook hook, 7679 - struct btf_id_set8 *add_set) 7672 + const struct btf_kfunc_id_set *kset) 7680 7673 { 7674 + struct btf_kfunc_hook_filter *hook_filter; 7675 + struct btf_id_set8 *add_set = kset->set; 7681 7676 bool vmlinux_set = !btf_is_module(btf); 7677 + bool add_filter = !!kset->filter; 7682 7678 struct btf_kfunc_set_tab *tab; 7683 7679 struct btf_id_set8 *set; 7684 7680 u32 set_cnt; ··· 7696 7686 return 0; 7697 7687 7698 7688 tab = btf->kfunc_set_tab; 7689 + 7690 + if (tab && add_filter) { 7691 + u32 i; 7692 + 7693 + hook_filter = &tab->hook_filters[hook]; 7694 + for (i = 0; i < hook_filter->nr_filters; i++) { 7695 + if (hook_filter->filters[i] == kset->filter) { 7696 + add_filter = false; 7697 + break; 7698 + } 7699 + } 7700 + 7701 + if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) { 7702 + ret = -E2BIG; 7703 + goto end; 7704 + } 7705 + } 7706 + 7699 7707 if (!tab) { 7700 7708 tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); 7701 7709 if (!tab) ··· 7736 7708 */ 7737 7709 if (!vmlinux_set) { 7738 7710 tab->sets[hook] = add_set; 7739 - return 0; 7711 + goto do_add_filter; 7740 7712 } 7741 7713 7742 7714 /* In case of vmlinux sets, there may be more than one set being ··· 7778 7750 7779 7751 sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); 7780 7752 7753 + do_add_filter: 7754 + if (add_filter) { 7755 + hook_filter = &tab->hook_filters[hook]; 7756 + hook_filter->filters[hook_filter->nr_filters++] = kset->filter; 7757 + } 7781 7758 return 0; 7782 7759 end: 7783 7760 btf_free_kfunc_set_tab(btf); ··· 7791 7758 7792 7759 static u32 *__btf_kfunc_id_set_contains(const struct btf *btf, 7793 7760 enum btf_kfunc_hook hook, 7794 - u32 kfunc_btf_id) 7761 + u32 kfunc_btf_id, 7762 + const struct bpf_prog *prog) 7795 7763 { 7764 + struct btf_kfunc_hook_filter *hook_filter; 7796 7765 struct btf_id_set8 *set; 7797 - u32 *id; 7766 + u32 *id, i; 7798 7767 7799 7768 if (hook >= BTF_KFUNC_HOOK_MAX) 7800 7769 return NULL; 7801 7770 if (!btf->kfunc_set_tab) 7802 7771 return NULL; 7772 + hook_filter = &btf->kfunc_set_tab->hook_filters[hook]; 7773 + for (i = 0; i < hook_filter->nr_filters; i++) { 7774 + if (hook_filter->filters[i](prog, kfunc_btf_id)) 7775 + return NULL; 7776 + } 7803 7777 set = btf->kfunc_set_tab->sets[hook]; 7804 7778 if (!set) 7805 7779 return NULL; ··· 7861 7821 * protection for looking up a well-formed btf->kfunc_set_tab. 7862 7822 */ 7863 7823 u32 *btf_kfunc_id_set_contains(const struct btf *btf, 7864 - enum bpf_prog_type prog_type, 7865 - u32 kfunc_btf_id) 7824 + u32 kfunc_btf_id, 7825 + const struct bpf_prog *prog) 7866 7826 { 7827 + enum bpf_prog_type prog_type = resolve_prog_type(prog); 7867 7828 enum btf_kfunc_hook hook; 7868 7829 u32 *kfunc_flags; 7869 7830 7870 - kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id); 7831 + kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog); 7871 7832 if (kfunc_flags) 7872 7833 return kfunc_flags; 7873 7834 7874 7835 hook = bpf_prog_type_to_kfunc_hook(prog_type); 7875 - return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id); 7836 + return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id, prog); 7876 7837 } 7877 7838 7878 - u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id) 7839 + u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id, 7840 + const struct bpf_prog *prog) 7879 7841 { 7880 - return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id); 7842 + return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id, prog); 7881 7843 } 7882 7844 7883 7845 static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook, ··· 7910 7868 goto err_out; 7911 7869 } 7912 7870 7913 - ret = btf_populate_kfunc_set(btf, hook, kset->set); 7871 + ret = btf_populate_kfunc_set(btf, hook, kset); 7872 + 7914 7873 err_out: 7915 7874 btf_put(btf); 7916 7875 return ret;

+13 -14

kernel/bpf/inode.c

··· 435 435 return ret; 436 436 } 437 437 438 - static int bpf_obj_do_pin(const char __user *pathname, void *raw, 438 + static int bpf_obj_do_pin(int path_fd, const char __user *pathname, void *raw, 439 439 enum bpf_type type) 440 440 { 441 441 struct dentry *dentry; ··· 444 444 umode_t mode; 445 445 int ret; 446 446 447 - dentry = user_path_create(AT_FDCWD, pathname, &path, 0); 447 + dentry = user_path_create(path_fd, pathname, &path, 0); 448 448 if (IS_ERR(dentry)) 449 449 return PTR_ERR(dentry); 450 - 451 - mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 452 - 453 - ret = security_path_mknod(&path, dentry, mode, 0); 454 - if (ret) 455 - goto out; 456 450 457 451 dir = d_inode(path.dentry); 458 452 if (dir->i_op != &bpf_dir_iops) { 459 453 ret = -EPERM; 460 454 goto out; 461 455 } 456 + 457 + mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); 458 + ret = security_path_mknod(&path, dentry, mode, 0); 459 + if (ret) 460 + goto out; 462 461 463 462 switch (type) { 464 463 case BPF_TYPE_PROG: ··· 477 478 return ret; 478 479 } 479 480 480 - int bpf_obj_pin_user(u32 ufd, const char __user *pathname) 481 + int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname) 481 482 { 482 483 enum bpf_type type; 483 484 void *raw; ··· 487 488 if (IS_ERR(raw)) 488 489 return PTR_ERR(raw); 489 490 490 - ret = bpf_obj_do_pin(pathname, raw, type); 491 + ret = bpf_obj_do_pin(path_fd, pathname, raw, type); 491 492 if (ret != 0) 492 493 bpf_any_put(raw, type); 493 494 494 495 return ret; 495 496 } 496 497 497 - static void *bpf_obj_do_get(const char __user *pathname, 498 + static void *bpf_obj_do_get(int path_fd, const char __user *pathname, 498 499 enum bpf_type *type, int flags) 499 500 { 500 501 struct inode *inode; ··· 502 503 void *raw; 503 504 int ret; 504 505 505 - ret = user_path_at(AT_FDCWD, pathname, LOOKUP_FOLLOW, &path); 506 + ret = user_path_at(path_fd, pathname, LOOKUP_FOLLOW, &path); 506 507 if (ret) 507 508 return ERR_PTR(ret); 508 509 ··· 526 527 return ERR_PTR(ret); 527 528 } 528 529 529 - int bpf_obj_get_user(const char __user *pathname, int flags) 530 + int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags) 530 531 { 531 532 enum bpf_type type = BPF_TYPE_UNSPEC; 532 533 int f_flags; ··· 537 538 if (f_flags < 0) 538 539 return f_flags; 539 540 540 - raw = bpf_obj_do_get(pathname, &type, f_flags); 541 + raw = bpf_obj_do_get(path_fd, pathname, &type, f_flags); 541 542 if (IS_ERR(raw)) 542 543 return PTR_ERR(raw); 543 544

-3

kernel/bpf/log.c

··· 62 62 63 63 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args); 64 64 65 - WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, 66 - "verifier log line truncated - local buffer too short\n"); 67 - 68 65 if (log->level == BPF_LOG_KERNEL) { 69 66 bool newline = n > 0 && log->kbuf[n - 1] == '\n'; 70 67

+34 -11

kernel/bpf/syscall.c

··· 1931 1931 return -ENOTSUPP; 1932 1932 } 1933 1933 1934 + if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { 1935 + fdput(f); 1936 + return -EPERM; 1937 + } 1938 + 1934 1939 mutex_lock(&map->freeze_mutex); 1935 1940 if (bpf_map_write_active(map)) { 1936 1941 err = -EBUSY; ··· 1943 1938 } 1944 1939 if (READ_ONCE(map->frozen)) { 1945 1940 err = -EBUSY; 1946 - goto err_put; 1947 - } 1948 - if (!bpf_capable()) { 1949 - err = -EPERM; 1950 1941 goto err_put; 1951 1942 } 1952 1943 ··· 2698 2697 return err; 2699 2698 } 2700 2699 2701 - #define BPF_OBJ_LAST_FIELD file_flags 2700 + #define BPF_OBJ_LAST_FIELD path_fd 2702 2701 2703 2702 static int bpf_obj_pin(const union bpf_attr *attr) 2704 2703 { 2705 - if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0) 2704 + int path_fd; 2705 + 2706 + if (CHECK_ATTR(BPF_OBJ) || attr->file_flags & ~BPF_F_PATH_FD) 2706 2707 return -EINVAL; 2707 2708 2708 - return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname)); 2709 + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 2710 + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 2711 + return -EINVAL; 2712 + 2713 + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 2714 + return bpf_obj_pin_user(attr->bpf_fd, path_fd, 2715 + u64_to_user_ptr(attr->pathname)); 2709 2716 } 2710 2717 2711 2718 static int bpf_obj_get(const union bpf_attr *attr) 2712 2719 { 2720 + int path_fd; 2721 + 2713 2722 if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 || 2714 - attr->file_flags & ~BPF_OBJ_FLAG_MASK) 2723 + attr->file_flags & ~(BPF_OBJ_FLAG_MASK | BPF_F_PATH_FD)) 2715 2724 return -EINVAL; 2716 2725 2717 - return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 2726 + /* path_fd has to be accompanied by BPF_F_PATH_FD flag */ 2727 + if (!(attr->file_flags & BPF_F_PATH_FD) && attr->path_fd) 2728 + return -EINVAL; 2729 + 2730 + path_fd = attr->file_flags & BPF_F_PATH_FD ? attr->path_fd : AT_FDCWD; 2731 + return bpf_obj_get_user(path_fd, u64_to_user_ptr(attr->pathname), 2718 2732 attr->file_flags); 2719 2733 } 2720 2734 ··· 2984 2968 { 2985 2969 struct bpf_tracing_link *tr_link = 2986 2970 container_of(link, struct bpf_tracing_link, link.link); 2971 + u32 target_btf_id, target_obj_id; 2987 2972 2973 + bpf_trampoline_unpack_key(tr_link->trampoline->key, 2974 + &target_obj_id, &target_btf_id); 2988 2975 seq_printf(seq, 2989 - "attach_type:\t%d\n", 2990 - tr_link->attach_type); 2976 + "attach_type:\t%d\n" 2977 + "target_obj_id:\t%u\n" 2978 + "target_btf_id:\t%u\n", 2979 + tr_link->attach_type, 2980 + target_obj_id, 2981 + target_btf_id); 2991 2982 } 2992 2983 2993 2984 static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,

+4 -3

kernel/bpf/verifier.c

··· 10939 10939 *kfunc_name = func_name; 10940 10940 func_proto = btf_type_by_id(desc_btf, func->type); 10941 10941 10942 - kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id); 10942 + kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog); 10943 10943 if (!kfunc_flags) { 10944 10944 return -EACCES; 10945 10945 } ··· 19010 19010 * in the fmodret id set with the KF_SLEEPABLE flag. 19011 19011 */ 19012 19012 else { 19013 - u32 *flags = btf_kfunc_is_modify_return(btf, btf_id); 19013 + u32 *flags = btf_kfunc_is_modify_return(btf, btf_id, 19014 + prog); 19014 19015 19015 19016 if (flags && (*flags & KF_SLEEPABLE)) 19016 19017 ret = 0; ··· 19039 19038 return -EINVAL; 19040 19039 } 19041 19040 ret = -EINVAL; 19042 - if (btf_kfunc_is_modify_return(btf, btf_id) || 19041 + if (btf_kfunc_is_modify_return(btf, btf_id, prog) || 19043 19042 !check_attach_modify_return(addr, tname)) 19044 19043 ret = 0; 19045 19044 if (ret) {

-201

net/bpf/test_run.c

··· 561 561 return a + *b; 562 562 } 563 563 564 - __bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) 565 - { 566 - return a + b + c + d; 567 - } 568 - 569 - __bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) 570 - { 571 - return a + b; 572 - } 573 - 574 - __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) 575 - { 576 - return sk; 577 - } 578 - 579 - long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) 580 - { 581 - /* Provoke the compiler to assume that the caller has sign-extended a, 582 - * b and c on platforms where this is required (e.g. s390x). 583 - */ 584 - return (long)a + (long)b + (long)c + d; 585 - } 586 - 587 564 int noinline bpf_fentry_shadow_test(int a) 588 565 { 589 566 return a + 1; ··· 583 606 refcount_t cnt; 584 607 }; 585 608 586 - static struct prog_test_ref_kfunc prog_test_struct = { 587 - .a = 42, 588 - .b = 108, 589 - .next = &prog_test_struct, 590 - .cnt = REFCOUNT_INIT(1), 591 - }; 592 - 593 - __bpf_kfunc struct prog_test_ref_kfunc * 594 - bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) 595 - { 596 - refcount_inc(&prog_test_struct.cnt); 597 - return &prog_test_struct; 598 - } 599 - 600 - __bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) 601 - { 602 - WARN_ON_ONCE(1); 603 - } 604 - 605 - __bpf_kfunc struct prog_test_member * 606 - bpf_kfunc_call_memb_acquire(void) 607 - { 608 - WARN_ON_ONCE(1); 609 - return NULL; 610 - } 611 - 612 609 __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) 613 610 { 614 611 refcount_dec(&p->cnt); ··· 590 639 591 640 __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p) 592 641 { 593 - } 594 - 595 - __bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) 596 - { 597 - WARN_ON_ONCE(1); 598 - } 599 - 600 - static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) 601 - { 602 - if (size > 2 * sizeof(int)) 603 - return NULL; 604 - 605 - return (int *)p; 606 - } 607 - 608 - __bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, 609 - const int rdwr_buf_size) 610 - { 611 - return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); 612 - } 613 - 614 - __bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, 615 - const int rdonly_buf_size) 616 - { 617 - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 618 - } 619 - 620 - /* the next 2 ones can't be really used for testing expect to ensure 621 - * that the verifier rejects the call. 622 - * Acquire functions must return struct pointers, so these ones are 623 - * failing. 624 - */ 625 - __bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, 626 - const int rdonly_buf_size) 627 - { 628 - return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 629 - } 630 - 631 - __bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) 632 - { 633 - } 634 - 635 - struct prog_test_pass1 { 636 - int x0; 637 - struct { 638 - int x1; 639 - struct { 640 - int x2; 641 - struct { 642 - int x3; 643 - }; 644 - }; 645 - }; 646 - }; 647 - 648 - struct prog_test_pass2 { 649 - int len; 650 - short arr1[4]; 651 - struct { 652 - char arr2[4]; 653 - unsigned long arr3[8]; 654 - } x; 655 - }; 656 - 657 - struct prog_test_fail1 { 658 - void *p; 659 - int x; 660 - }; 661 - 662 - struct prog_test_fail2 { 663 - int x8; 664 - struct prog_test_pass1 x; 665 - }; 666 - 667 - struct prog_test_fail3 { 668 - int len; 669 - char arr1[2]; 670 - char arr2[]; 671 - }; 672 - 673 - __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) 674 - { 675 - } 676 - 677 - __bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) 678 - { 679 - } 680 - 681 - __bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) 682 - { 683 - } 684 - 685 - __bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) 686 - { 687 - } 688 - 689 - __bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) 690 - { 691 - } 692 - 693 - __bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) 694 - { 695 - } 696 - 697 - __bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) 698 - { 699 - } 700 - 701 - __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) 702 - { 703 - } 704 - 705 - __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) 706 - { 707 - } 708 - 709 - __bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) 710 - { 711 - /* p != NULL, but p->cnt could be 0 */ 712 - } 713 - 714 - __bpf_kfunc void bpf_kfunc_call_test_destructive(void) 715 - { 716 - } 717 - 718 - __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) 719 - { 720 - return arg; 721 642 } 722 643 723 644 __diag_pop(); ··· 605 782 }; 606 783 607 784 BTF_SET8_START(test_sk_check_kfunc_ids) 608 - BTF_ID_FLAGS(func, bpf_kfunc_call_test1) 609 - BTF_ID_FLAGS(func, bpf_kfunc_call_test2) 610 - BTF_ID_FLAGS(func, bpf_kfunc_call_test3) 611 - BTF_ID_FLAGS(func, bpf_kfunc_call_test4) 612 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) 613 - BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) 614 785 BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE) 615 786 BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE) 616 - BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) 617 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) 618 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) 619 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) 620 - BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) 621 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) 622 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) 623 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) 624 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) 625 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) 626 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) 627 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) 628 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) 629 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) 630 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) 631 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) 632 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) 633 - BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) 634 787 BTF_SET8_END(test_sk_check_kfunc_ids) 635 788 636 789 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,

+63

net/core/filter.c

··· 11723 11723 return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp); 11724 11724 } 11725 11725 late_initcall(bpf_kfunc_init); 11726 + 11727 + /* Disables missing prototype warnings */ 11728 + __diag_push(); 11729 + __diag_ignore_all("-Wmissing-prototypes", 11730 + "Global functions as their definitions will be in vmlinux BTF"); 11731 + 11732 + /* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code. 11733 + * 11734 + * The function expects a non-NULL pointer to a socket, and invokes the 11735 + * protocol specific socket destroy handlers. 11736 + * 11737 + * The helper can only be called from BPF contexts that have acquired the socket 11738 + * locks. 11739 + * 11740 + * Parameters: 11741 + * @sock: Pointer to socket to be destroyed 11742 + * 11743 + * Return: 11744 + * On error, may return EPROTONOSUPPORT, EINVAL. 11745 + * EPROTONOSUPPORT if protocol specific destroy handler is not supported. 11746 + * 0 otherwise 11747 + */ 11748 + __bpf_kfunc int bpf_sock_destroy(struct sock_common *sock) 11749 + { 11750 + struct sock *sk = (struct sock *)sock; 11751 + 11752 + /* The locking semantics that allow for synchronous execution of the 11753 + * destroy handlers are only supported for TCP and UDP. 11754 + * Supporting protocols will need to acquire sock lock in the BPF context 11755 + * prior to invoking this kfunc. 11756 + */ 11757 + if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP && 11758 + sk->sk_protocol != IPPROTO_UDP)) 11759 + return -EOPNOTSUPP; 11760 + 11761 + return sk->sk_prot->diag_destroy(sk, ECONNABORTED); 11762 + } 11763 + 11764 + __diag_pop() 11765 + 11766 + BTF_SET8_START(bpf_sk_iter_kfunc_ids) 11767 + BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS) 11768 + BTF_SET8_END(bpf_sk_iter_kfunc_ids) 11769 + 11770 + static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id) 11771 + { 11772 + if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) && 11773 + prog->expected_attach_type != BPF_TRACE_ITER) 11774 + return -EACCES; 11775 + return 0; 11776 + } 11777 + 11778 + static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = { 11779 + .owner = THIS_MODULE, 11780 + .set = &bpf_sk_iter_kfunc_ids, 11781 + .filter = tracing_iter_filter, 11782 + }; 11783 + 11784 + static int init_subsystem(void) 11785 + { 11786 + return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set); 11787 + } 11788 + late_initcall(init_subsystem);

+6 -3

net/ipv4/tcp.c

··· 4553 4553 return 0; 4554 4554 } 4555 4555 4556 - /* Don't race with userspace socket closes such as tcp_close. */ 4557 - lock_sock(sk); 4556 + /* BPF context ensures sock locking. */ 4557 + if (!has_current_bpf_ctx()) 4558 + /* Don't race with userspace socket closes such as tcp_close. */ 4559 + lock_sock(sk); 4558 4560 4559 4561 if (sk->sk_state == TCP_LISTEN) { 4560 4562 tcp_set_state(sk, TCP_CLOSE); ··· 4580 4578 bh_unlock_sock(sk); 4581 4579 local_bh_enable(); 4582 4580 tcp_write_queue_purge(sk); 4583 - release_sock(sk); 4581 + if (!has_current_bpf_ctx()) 4582 + release_sock(sk); 4584 4583 return 0; 4585 4584 } 4586 4585 EXPORT_SYMBOL_GPL(tcp_abort);

+3 -4

net/ipv4/tcp_ipv4.c

··· 2967 2967 struct bpf_iter_meta meta; 2968 2968 struct bpf_prog *prog; 2969 2969 struct sock *sk = v; 2970 - bool slow; 2971 2970 uid_t uid; 2972 2971 int ret; 2973 2972 ··· 2974 2975 return 0; 2975 2976 2976 2977 if (sk_fullsock(sk)) 2977 - slow = lock_sock_fast(sk); 2978 + lock_sock(sk); 2978 2979 2979 2980 if (unlikely(sk_unhashed(sk))) { 2980 2981 ret = SEQ_SKIP; ··· 2998 2999 2999 3000 unlock: 3000 3001 if (sk_fullsock(sk)) 3001 - unlock_sock_fast(sk, slow); 3002 + release_sock(sk); 3002 3003 return ret; 3003 3004 3004 3005 } ··· 3360 3361 .ctx_arg_info_size = 1, 3361 3362 .ctx_arg_info = { 3362 3363 { offsetof(struct bpf_iter__tcp, sk_common), 3363 - PTR_TO_BTF_ID_OR_NULL }, 3364 + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, 3364 3365 }, 3365 3366 .get_func_proto = bpf_iter_tcp_get_func_proto, 3366 3367 .seq_info = &tcp_seq_info,

+241 -50

net/ipv4/udp.c

··· 2883 2883 2884 2884 int udp_abort(struct sock *sk, int err) 2885 2885 { 2886 - lock_sock(sk); 2886 + if (!has_current_bpf_ctx()) 2887 + lock_sock(sk); 2887 2888 2888 2889 /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing 2889 2890 * with close() ··· 2897 2896 __udp_disconnect(sk, 0); 2898 2897 2899 2898 out: 2900 - release_sock(sk); 2899 + if (!has_current_bpf_ctx()) 2900 + release_sock(sk); 2901 2901 2902 2902 return 0; 2903 2903 } ··· 2943 2941 /* ------------------------------------------------------------------------ */ 2944 2942 #ifdef CONFIG_PROC_FS 2945 2943 2946 - static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo, 2947 - struct net *net) 2944 + static unsigned short seq_file_family(const struct seq_file *seq); 2945 + static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) 2948 2946 { 2947 + unsigned short family = seq_file_family(seq); 2948 + 2949 + /* AF_UNSPEC is used as a match all */ 2950 + return ((family == AF_UNSPEC || family == sk->sk_family) && 2951 + net_eq(sock_net(sk), seq_file_net(seq))); 2952 + } 2953 + 2954 + #ifdef CONFIG_BPF_SYSCALL 2955 + static const struct seq_operations bpf_iter_udp_seq_ops; 2956 + #endif 2957 + static struct udp_table *udp_get_table_seq(struct seq_file *seq, 2958 + struct net *net) 2959 + { 2960 + const struct udp_seq_afinfo *afinfo; 2961 + 2962 + #ifdef CONFIG_BPF_SYSCALL 2963 + if (seq->op == &bpf_iter_udp_seq_ops) 2964 + return net->ipv4.udp_table; 2965 + #endif 2966 + 2967 + afinfo = pde_data(file_inode(seq->file)); 2949 2968 return afinfo->udp_table ? : net->ipv4.udp_table; 2950 2969 } 2951 2970 ··· 2974 2951 { 2975 2952 struct udp_iter_state *state = seq->private; 2976 2953 struct net *net = seq_file_net(seq); 2977 - struct udp_seq_afinfo *afinfo; 2978 2954 struct udp_table *udptable; 2979 2955 struct sock *sk; 2980 2956 2981 - if (state->bpf_seq_afinfo) 2982 - afinfo = state->bpf_seq_afinfo; 2983 - else 2984 - afinfo = pde_data(file_inode(seq->file)); 2985 - 2986 - udptable = udp_get_table_afinfo(afinfo, net); 2957 + udptable = udp_get_table_seq(seq, net); 2987 2958 2988 2959 for (state->bucket = start; state->bucket <= udptable->mask; 2989 2960 ++state->bucket) { ··· 2988 2971 2989 2972 spin_lock_bh(&hslot->lock); 2990 2973 sk_for_each(sk, &hslot->head) { 2991 - if (!net_eq(sock_net(sk), net)) 2992 - continue; 2993 - if (afinfo->family == AF_UNSPEC || 2994 - sk->sk_family == afinfo->family) 2974 + if (seq_sk_match(seq, sk)) 2995 2975 goto found; 2996 2976 } 2997 2977 spin_unlock_bh(&hslot->lock); ··· 3002 2988 { 3003 2989 struct udp_iter_state *state = seq->private; 3004 2990 struct net *net = seq_file_net(seq); 3005 - struct udp_seq_afinfo *afinfo; 3006 2991 struct udp_table *udptable; 3007 - 3008 - if (state->bpf_seq_afinfo) 3009 - afinfo = state->bpf_seq_afinfo; 3010 - else 3011 - afinfo = pde_data(file_inode(seq->file)); 3012 2992 3013 2993 do { 3014 2994 sk = sk_next(sk); 3015 - } while (sk && (!net_eq(sock_net(sk), net) || 3016 - (afinfo->family != AF_UNSPEC && 3017 - sk->sk_family != afinfo->family))); 2995 + } while (sk && !seq_sk_match(seq, sk)); 3018 2996 3019 2997 if (!sk) { 3020 - udptable = udp_get_table_afinfo(afinfo, net); 2998 + udptable = udp_get_table_seq(seq, net); 3021 2999 3022 3000 if (state->bucket <= udptable->mask) 3023 3001 spin_unlock_bh(&udptable->hash[state->bucket].lock); ··· 3055 3049 void udp_seq_stop(struct seq_file *seq, void *v) 3056 3050 { 3057 3051 struct udp_iter_state *state = seq->private; 3058 - struct udp_seq_afinfo *afinfo; 3059 3052 struct udp_table *udptable; 3060 3053 3061 - if (state->bpf_seq_afinfo) 3062 - afinfo = state->bpf_seq_afinfo; 3063 - else 3064 - afinfo = pde_data(file_inode(seq->file)); 3065 - 3066 - udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq)); 3054 + udptable = udp_get_table_seq(seq, seq_file_net(seq)); 3067 3055 3068 3056 if (state->bucket <= udptable->mask) 3069 3057 spin_unlock_bh(&udptable->hash[state->bucket].lock); ··· 3110 3110 int bucket __aligned(8); 3111 3111 }; 3112 3112 3113 + struct bpf_udp_iter_state { 3114 + struct udp_iter_state state; 3115 + unsigned int cur_sk; 3116 + unsigned int end_sk; 3117 + unsigned int max_sk; 3118 + int offset; 3119 + struct sock **batch; 3120 + bool st_bucket_done; 3121 + }; 3122 + 3123 + static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, 3124 + unsigned int new_batch_sz); 3125 + static struct sock *bpf_iter_udp_batch(struct seq_file *seq) 3126 + { 3127 + struct bpf_udp_iter_state *iter = seq->private; 3128 + struct udp_iter_state *state = &iter->state; 3129 + struct net *net = seq_file_net(seq); 3130 + struct udp_table *udptable; 3131 + unsigned int batch_sks = 0; 3132 + bool resized = false; 3133 + struct sock *sk; 3134 + 3135 + /* The current batch is done, so advance the bucket. */ 3136 + if (iter->st_bucket_done) { 3137 + state->bucket++; 3138 + iter->offset = 0; 3139 + } 3140 + 3141 + udptable = udp_get_table_seq(seq, net); 3142 + 3143 + again: 3144 + /* New batch for the next bucket. 3145 + * Iterate over the hash table to find a bucket with sockets matching 3146 + * the iterator attributes, and return the first matching socket from 3147 + * the bucket. The remaining matched sockets from the bucket are batched 3148 + * before releasing the bucket lock. This allows BPF programs that are 3149 + * called in seq_show to acquire the bucket lock if needed. 3150 + */ 3151 + iter->cur_sk = 0; 3152 + iter->end_sk = 0; 3153 + iter->st_bucket_done = false; 3154 + batch_sks = 0; 3155 + 3156 + for (; state->bucket <= udptable->mask; state->bucket++) { 3157 + struct udp_hslot *hslot2 = &udptable->hash2[state->bucket]; 3158 + 3159 + if (hlist_empty(&hslot2->head)) { 3160 + iter->offset = 0; 3161 + continue; 3162 + } 3163 + 3164 + spin_lock_bh(&hslot2->lock); 3165 + udp_portaddr_for_each_entry(sk, &hslot2->head) { 3166 + if (seq_sk_match(seq, sk)) { 3167 + /* Resume from the last iterated socket at the 3168 + * offset in the bucket before iterator was stopped. 3169 + */ 3170 + if (iter->offset) { 3171 + --iter->offset; 3172 + continue; 3173 + } 3174 + if (iter->end_sk < iter->max_sk) { 3175 + sock_hold(sk); 3176 + iter->batch[iter->end_sk++] = sk; 3177 + } 3178 + batch_sks++; 3179 + } 3180 + } 3181 + spin_unlock_bh(&hslot2->lock); 3182 + 3183 + if (iter->end_sk) 3184 + break; 3185 + 3186 + /* Reset the current bucket's offset before moving to the next bucket. */ 3187 + iter->offset = 0; 3188 + } 3189 + 3190 + /* All done: no batch made. */ 3191 + if (!iter->end_sk) 3192 + return NULL; 3193 + 3194 + if (iter->end_sk == batch_sks) { 3195 + /* Batching is done for the current bucket; return the first 3196 + * socket to be iterated from the batch. 3197 + */ 3198 + iter->st_bucket_done = true; 3199 + goto done; 3200 + } 3201 + if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) { 3202 + resized = true; 3203 + /* After allocating a larger batch, retry one more time to grab 3204 + * the whole bucket. 3205 + */ 3206 + state->bucket--; 3207 + goto again; 3208 + } 3209 + done: 3210 + return iter->batch[0]; 3211 + } 3212 + 3213 + static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3214 + { 3215 + struct bpf_udp_iter_state *iter = seq->private; 3216 + struct sock *sk; 3217 + 3218 + /* Whenever seq_next() is called, the iter->cur_sk is 3219 + * done with seq_show(), so unref the iter->cur_sk. 3220 + */ 3221 + if (iter->cur_sk < iter->end_sk) { 3222 + sock_put(iter->batch[iter->cur_sk++]); 3223 + ++iter->offset; 3224 + } 3225 + 3226 + /* After updating iter->cur_sk, check if there are more sockets 3227 + * available in the current bucket batch. 3228 + */ 3229 + if (iter->cur_sk < iter->end_sk) 3230 + sk = iter->batch[iter->cur_sk]; 3231 + else 3232 + /* Prepare a new batch. */ 3233 + sk = bpf_iter_udp_batch(seq); 3234 + 3235 + ++*pos; 3236 + return sk; 3237 + } 3238 + 3239 + static void *bpf_iter_udp_seq_start(struct seq_file *seq, loff_t *pos) 3240 + { 3241 + /* bpf iter does not support lseek, so it always 3242 + * continue from where it was stop()-ped. 3243 + */ 3244 + if (*pos) 3245 + return bpf_iter_udp_batch(seq); 3246 + 3247 + return SEQ_START_TOKEN; 3248 + } 3249 + 3113 3250 static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, 3114 3251 struct udp_sock *udp_sk, uid_t uid, int bucket) 3115 3252 { ··· 3267 3130 struct bpf_prog *prog; 3268 3131 struct sock *sk = v; 3269 3132 uid_t uid; 3133 + int ret; 3270 3134 3271 3135 if (v == SEQ_START_TOKEN) 3272 3136 return 0; 3273 3137 3138 + lock_sock(sk); 3139 + 3140 + if (unlikely(sk_unhashed(sk))) { 3141 + ret = SEQ_SKIP; 3142 + goto unlock; 3143 + } 3144 + 3274 3145 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3275 3146 meta.seq = seq; 3276 3147 prog = bpf_iter_get_info(&meta, false); 3277 - return udp_prog_seq_show(prog, &meta, v, uid, state->bucket); 3148 + ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket); 3149 + 3150 + unlock: 3151 + release_sock(sk); 3152 + return ret; 3153 + } 3154 + 3155 + static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter) 3156 + { 3157 + while (iter->cur_sk < iter->end_sk) 3158 + sock_put(iter->batch[iter->cur_sk++]); 3278 3159 } 3279 3160 3280 3161 static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v) 3281 3162 { 3163 + struct bpf_udp_iter_state *iter = seq->private; 3282 3164 struct bpf_iter_meta meta; 3283 3165 struct bpf_prog *prog; 3284 3166 ··· 3308 3152 (void)udp_prog_seq_show(prog, &meta, v, 0, 0); 3309 3153 } 3310 3154 3311 - udp_seq_stop(seq, v); 3155 + if (iter->cur_sk < iter->end_sk) { 3156 + bpf_iter_udp_put_batch(iter); 3157 + iter->st_bucket_done = false; 3158 + } 3312 3159 } 3313 3160 3314 3161 static const struct seq_operations bpf_iter_udp_seq_ops = { 3315 - .start = udp_seq_start, 3316 - .next = udp_seq_next, 3162 + .start = bpf_iter_udp_seq_start, 3163 + .next = bpf_iter_udp_seq_next, 3317 3164 .stop = bpf_iter_udp_seq_stop, 3318 3165 .show = bpf_iter_udp_seq_show, 3319 3166 }; 3320 3167 #endif 3168 + 3169 + static unsigned short seq_file_family(const struct seq_file *seq) 3170 + { 3171 + const struct udp_seq_afinfo *afinfo; 3172 + 3173 + #ifdef CONFIG_BPF_SYSCALL 3174 + /* BPF iterator: bpf programs to filter sockets. */ 3175 + if (seq->op == &bpf_iter_udp_seq_ops) 3176 + return AF_UNSPEC; 3177 + #endif 3178 + 3179 + /* Proc fs iterator */ 3180 + afinfo = pde_data(file_inode(seq->file)); 3181 + return afinfo->family; 3182 + } 3321 3183 3322 3184 const struct seq_operations udp_seq_ops = { 3323 3185 .start = udp_seq_start, ··· 3545 3371 DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, 3546 3372 struct udp_sock *udp_sk, uid_t uid, int bucket) 3547 3373 3548 - static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) 3374 + static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter, 3375 + unsigned int new_batch_sz) 3549 3376 { 3550 - struct udp_iter_state *st = priv_data; 3551 - struct udp_seq_afinfo *afinfo; 3552 - int ret; 3377 + struct sock **new_batch; 3553 3378 3554 - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); 3555 - if (!afinfo) 3379 + new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch), 3380 + GFP_USER | __GFP_NOWARN); 3381 + if (!new_batch) 3556 3382 return -ENOMEM; 3557 3383 3558 - afinfo->family = AF_UNSPEC; 3559 - afinfo->udp_table = NULL; 3560 - st->bpf_seq_afinfo = afinfo; 3384 + bpf_iter_udp_put_batch(iter); 3385 + kvfree(iter->batch); 3386 + iter->batch = new_batch; 3387 + iter->max_sk = new_batch_sz; 3388 + 3389 + return 0; 3390 + } 3391 + 3392 + #define INIT_BATCH_SZ 16 3393 + 3394 + static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) 3395 + { 3396 + struct bpf_udp_iter_state *iter = priv_data; 3397 + int ret; 3398 + 3561 3399 ret = bpf_iter_init_seq_net(priv_data, aux); 3562 3400 if (ret) 3563 - kfree(afinfo); 3401 + return ret; 3402 + 3403 + ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ); 3404 + if (ret) 3405 + bpf_iter_fini_seq_net(priv_data); 3406 + 3564 3407 return ret; 3565 3408 } 3566 3409 3567 3410 static void bpf_iter_fini_udp(void *priv_data) 3568 3411 { 3569 - struct udp_iter_state *st = priv_data; 3412 + struct bpf_udp_iter_state *iter = priv_data; 3570 3413 3571 - kfree(st->bpf_seq_afinfo); 3572 3414 bpf_iter_fini_seq_net(priv_data); 3415 + kvfree(iter->batch); 3573 3416 } 3574 3417 3575 3418 static const struct bpf_iter_seq_info udp_seq_info = { 3576 3419 .seq_ops = &bpf_iter_udp_seq_ops, 3577 3420 .init_seq_private = bpf_iter_init_udp, 3578 3421 .fini_seq_private = bpf_iter_fini_udp, 3579 - .seq_priv_size = sizeof(struct udp_iter_state), 3422 + .seq_priv_size = sizeof(struct bpf_udp_iter_state), 3580 3423 }; 3581 3424 3582 3425 static struct bpf_iter_reg udp_reg_info = { ··· 3601 3410 .ctx_arg_info_size = 1, 3602 3411 .ctx_arg_info = { 3603 3412 { offsetof(struct bpf_iter__udp, udp_sk), 3604 - PTR_TO_BTF_ID_OR_NULL }, 3413 + PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED }, 3605 3414 }, 3606 3415 .seq_info = &udp_seq_info, 3607 3416 };

+4 -4

tools/bpf/bpftool/Documentation/bpftool-map.rst

··· 28 28 | **bpftool** **map** { **show** | **list** } [*MAP*] 29 29 | **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \ 30 30 | **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] \ 31 - | [**dev** *NAME*] 31 + | [**offload_dev** *NAME*] 32 32 | **bpftool** **map dump** *MAP* 33 33 | **bpftool** **map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] 34 34 | **bpftool** **map lookup** *MAP* [**key** *DATA*] ··· 73 73 maps. On such kernels bpftool will automatically emit this 74 74 information as well. 75 75 76 - **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**dev** *NAME*] 76 + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**inner_map** *MAP*] [**offload_dev** *NAME*] 77 77 Create a new map with given parameters and pin it to *bpffs* 78 78 as *FILE*. 79 79 ··· 86 86 kernel needs it to collect metadata related to the inner maps 87 87 that the new map will work with. 88 88 89 - Keyword **dev** expects a network interface name, and is used 90 - to request hardware offload for the map. 89 + Keyword **offload_dev** expects a network interface name, 90 + and is used to request hardware offload for the map. 91 91 92 92 **bpftool map dump** *MAP* 93 93 Dump all entries in a given *MAP*. In case of **name**,

+7 -4

tools/bpf/bpftool/Documentation/bpftool-prog.rst

··· 31 31 | **bpftool** **prog dump xlated** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] [**visual**] }] 32 32 | **bpftool** **prog dump jited** *PROG* [{ **file** *FILE* | [**opcodes**] [**linum**] }] 33 33 | **bpftool** **prog pin** *PROG* *FILE* 34 - | **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 34 + | **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 35 35 | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] 36 36 | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] 37 37 | **bpftool** **prog tracelog** ··· 129 129 contain a dot character ('.'), which is reserved for future 130 130 extensions of *bpffs*. 131 131 132 - **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 132 + **bpftool prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** { **idx** *IDX* | **name** *NAME* } *MAP*] [{ **offload_dev** | **xdpmeta_dev** } *NAME*] [**pinmaps** *MAP_DIR*] [**autoattach**] 133 133 Load bpf program(s) from binary *OBJ* and pin as *PATH*. 134 134 **bpftool prog load** pins only the first program from the 135 135 *OBJ* as *PATH*. **bpftool prog loadall** pins all programs ··· 143 143 to be replaced in the ELF file counting from 0, while *NAME* 144 144 allows to replace a map by name. *MAP* specifies the map to 145 145 use, referring to it by **id** or through a **pinned** file. 146 - If **dev** *NAME* is specified program will be loaded onto 147 - given networking device (offload). 146 + If **offload_dev** *NAME* is specified program will be loaded 147 + onto given networking device (offload). 148 + If **xdpmeta_dev** *NAME* is specified program will become 149 + device-bound without offloading, this facilitates access 150 + to XDP metadata. 148 151 Optional **pinmaps** argument can be provided to pin all 149 152 maps under *MAP_DIR* directory. 150 153

+4 -3

tools/bpf/bpftool/bash-completion/bpftool

··· 278 278 _bpftool_get_prog_tags 279 279 return 0 280 280 ;; 281 - dev) 281 + dev|offload_dev|xdpmeta_dev) 282 282 _sysfs_get_netdevs 283 283 return 0 284 284 ;; ··· 508 508 ;; 509 509 *) 510 510 COMPREPLY=( $( compgen -W "map" -- "$cur" ) ) 511 - _bpftool_once_attr 'type dev pinmaps autoattach' 511 + _bpftool_once_attr 'type pinmaps autoattach' 512 + _bpftool_one_of_list 'offload_dev xdpmeta_dev' 512 513 return 0 513 514 ;; 514 515 esac ··· 734 733 esac 735 734 ;; 736 735 *) 737 - _bpftool_once_attr 'type key value entries name flags dev' 736 + _bpftool_once_attr 'type key value entries name flags offload_dev' 738 737 if _bpftool_search_list 'array_of_maps' 'hash_of_maps'; then 739 738 _bpftool_once_attr 'inner_map' 740 739 fi

+6 -3

tools/bpf/bpftool/common.c

··· 68 68 va_end(ap); 69 69 } 70 70 71 - static bool is_bpffs(char *path) 71 + static bool is_bpffs(const char *path) 72 72 { 73 73 struct statfs st_fs; 74 74 ··· 244 244 return fd; 245 245 } 246 246 247 - int mount_bpffs_for_pin(const char *name) 247 + int mount_bpffs_for_pin(const char *name, bool is_dir) 248 248 { 249 249 char err_str[ERR_MAX_LEN]; 250 250 char *file; 251 251 char *dir; 252 252 int err = 0; 253 + 254 + if (is_dir && is_bpffs(name)) 255 + return err; 253 256 254 257 file = malloc(strlen(name) + 1); 255 258 if (!file) { ··· 289 286 { 290 287 int err; 291 288 292 - err = mount_bpffs_for_pin(name); 289 + err = mount_bpffs_for_pin(name, false); 293 290 if (err) 294 291 return err; 295 292

+1 -1

tools/bpf/bpftool/iter.c

··· 76 76 goto close_obj; 77 77 } 78 78 79 - err = mount_bpffs_for_pin(path); 79 + err = mount_bpffs_for_pin(path, false); 80 80 if (err) 81 81 goto close_link; 82 82

+6

tools/bpf/bpftool/link.c

··· 195 195 196 196 show_link_attach_type_json(info->tracing.attach_type, 197 197 json_wtr); 198 + jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id); 199 + jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id); 198 200 break; 199 201 case BPF_LINK_TYPE_CGROUP: 200 202 jsonw_lluint_field(json_wtr, "cgroup_id", ··· 377 375 printf("\n\tprog_type %u ", prog_info.type); 378 376 379 377 show_link_attach_type_plain(info->tracing.attach_type); 378 + if (info->tracing.target_obj_id || info->tracing.target_btf_id) 379 + printf("\n\ttarget_obj_id %u target_btf_id %u ", 380 + info->tracing.target_obj_id, 381 + info->tracing.target_btf_id); 380 382 break; 381 383 case BPF_LINK_TYPE_CGROUP: 382 384 printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);

+1 -1

tools/bpf/bpftool/main.h

··· 142 142 char *get_fdinfo(int fd, const char *key); 143 143 int open_obj_pinned(const char *path, bool quiet); 144 144 int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); 145 - int mount_bpffs_for_pin(const char *name); 145 + int mount_bpffs_for_pin(const char *name, bool is_dir); 146 146 int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); 147 147 int do_pin_fd(int fd, const char *name); 148 148

+6 -1

tools/bpf/bpftool/map.c

··· 1287 1287 "flags")) 1288 1288 goto exit; 1289 1289 } else if (is_prefix(*argv, "dev")) { 1290 + p_info("Warning: 'bpftool map create [...] dev <ifname>' syntax is deprecated.\n" 1291 + "Going further, please use 'offload_dev <ifname>' to request hardware offload for the map."); 1292 + goto offload_dev; 1293 + } else if (is_prefix(*argv, "offload_dev")) { 1294 + offload_dev: 1290 1295 NEXT_ARG(); 1291 1296 1292 1297 if (attr.map_ifindex) { ··· 1436 1431 "Usage: %1$s %2$s { show | list } [MAP]\n" 1437 1432 " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n" 1438 1433 " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n" 1439 - " [inner_map MAP] [dev NAME]\n" 1434 + " [inner_map MAP] [offload_dev NAME]\n" 1440 1435 " %1$s %2$s dump MAP\n" 1441 1436 " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" 1442 1437 " %1$s %2$s lookup MAP [key DATA]\n"

+44 -9

tools/bpf/bpftool/prog.c

··· 1517 1517 struct bpf_program *prog = NULL, *pos; 1518 1518 unsigned int old_map_fds = 0; 1519 1519 const char *pinmaps = NULL; 1520 + __u32 xdpmeta_ifindex = 0; 1521 + __u32 offload_ifindex = 0; 1520 1522 bool auto_attach = false; 1521 1523 struct bpf_object *obj; 1522 1524 struct bpf_map *map; 1523 1525 const char *pinfile; 1524 1526 unsigned int i, j; 1525 - __u32 ifindex = 0; 1526 1527 const char *file; 1527 1528 int idx, err; 1528 1529 ··· 1615 1614 map_replace[old_map_fds].fd = fd; 1616 1615 old_map_fds++; 1617 1616 } else if (is_prefix(*argv, "dev")) { 1617 + p_info("Warning: 'bpftool prog load [...] dev <ifname>' syntax is deprecated.\n" 1618 + "Going further, please use 'offload_dev <ifname>' to offload program to device.\n" 1619 + "For applications using XDP hints only, use 'xdpmeta_dev <ifname>'."); 1620 + goto offload_dev; 1621 + } else if (is_prefix(*argv, "offload_dev")) { 1622 + offload_dev: 1618 1623 NEXT_ARG(); 1619 1624 1620 - if (ifindex) { 1621 - p_err("offload device already specified"); 1625 + if (offload_ifindex) { 1626 + p_err("offload_dev already specified"); 1627 + goto err_free_reuse_maps; 1628 + } else if (xdpmeta_ifindex) { 1629 + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); 1622 1630 goto err_free_reuse_maps; 1623 1631 } 1624 1632 if (!REQ_ARGS(1)) 1625 1633 goto err_free_reuse_maps; 1626 1634 1627 - ifindex = if_nametoindex(*argv); 1628 - if (!ifindex) { 1635 + offload_ifindex = if_nametoindex(*argv); 1636 + if (!offload_ifindex) { 1637 + p_err("unrecognized netdevice '%s': %s", 1638 + *argv, strerror(errno)); 1639 + goto err_free_reuse_maps; 1640 + } 1641 + NEXT_ARG(); 1642 + } else if (is_prefix(*argv, "xdpmeta_dev")) { 1643 + NEXT_ARG(); 1644 + 1645 + if (xdpmeta_ifindex) { 1646 + p_err("xdpmeta_dev already specified"); 1647 + goto err_free_reuse_maps; 1648 + } else if (offload_ifindex) { 1649 + p_err("xdpmeta_dev and offload_dev are mutually exclusive"); 1650 + goto err_free_reuse_maps; 1651 + } 1652 + if (!REQ_ARGS(1)) 1653 + goto err_free_reuse_maps; 1654 + 1655 + xdpmeta_ifindex = if_nametoindex(*argv); 1656 + if (!xdpmeta_ifindex) { 1629 1657 p_err("unrecognized netdevice '%s': %s", 1630 1658 *argv, strerror(errno)); 1631 1659 goto err_free_reuse_maps; ··· 1701 1671 goto err_close_obj; 1702 1672 } 1703 1673 1704 - bpf_program__set_ifindex(pos, ifindex); 1674 + if (prog_type == BPF_PROG_TYPE_XDP && xdpmeta_ifindex) { 1675 + bpf_program__set_flags(pos, BPF_F_XDP_DEV_BOUND_ONLY); 1676 + bpf_program__set_ifindex(pos, xdpmeta_ifindex); 1677 + } else { 1678 + bpf_program__set_ifindex(pos, offload_ifindex); 1679 + } 1705 1680 if (bpf_program__type(pos) != prog_type) 1706 1681 bpf_program__set_type(pos, prog_type); 1707 1682 bpf_program__set_expected_attach_type(pos, expected_attach_type); ··· 1744 1709 idx = 0; 1745 1710 bpf_object__for_each_map(map, obj) { 1746 1711 if (bpf_map__type(map) != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 1747 - bpf_map__set_ifindex(map, ifindex); 1712 + bpf_map__set_ifindex(map, offload_ifindex); 1748 1713 1749 1714 if (j < old_map_fds && idx == map_replace[j].idx) { 1750 1715 err = bpf_map__reuse_fd(map, map_replace[j++].fd); ··· 1774 1739 goto err_close_obj; 1775 1740 } 1776 1741 1777 - err = mount_bpffs_for_pin(pinfile); 1742 + err = mount_bpffs_for_pin(pinfile, !first_prog_only); 1778 1743 if (err) 1779 1744 goto err_close_obj; 1780 1745 ··· 2451 2416 " %1$s %2$s dump jited PROG [{ file FILE | [opcodes] [linum] }]\n" 2452 2417 " %1$s %2$s pin PROG FILE\n" 2453 2418 " %1$s %2$s { load | loadall } OBJ PATH \\\n" 2454 - " [type TYPE] [dev NAME] \\\n" 2419 + " [type TYPE] [{ offload_dev | xdpmeta_dev } NAME] \\\n" 2455 2420 " [map { idx IDX | name NAME } MAP]\\\n" 2456 2421 " [pinmaps MAP_DIR]\n" 2457 2422 " [autoattach]\n"

+1 -1

tools/bpf/bpftool/struct_ops.c

··· 509 509 if (argc == 1) 510 510 linkdir = GET_ARG(); 511 511 512 - if (linkdir && mount_bpffs_for_pin(linkdir)) { 512 + if (linkdir && mount_bpffs_for_pin(linkdir, true)) { 513 513 p_err("can't mount bpffs for pinning"); 514 514 return -1; 515 515 }

+10

tools/include/uapi/linux/bpf.h

··· 1272 1272 1273 1273 /* Create a map that will be registered/unregesitered by the backed bpf_link */ 1274 1274 BPF_F_LINK = (1U << 13), 1275 + 1276 + /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ 1277 + BPF_F_PATH_FD = (1U << 14), 1275 1278 }; 1276 1279 1277 1280 /* Flags for BPF_PROG_QUERY. */ ··· 1423 1420 __aligned_u64 pathname; 1424 1421 __u32 bpf_fd; 1425 1422 __u32 file_flags; 1423 + /* Same as dirfd in openat() syscall; see openat(2) 1424 + * manpage for details of path FD and pathname semantics; 1425 + * path_fd should accompanied by BPF_F_PATH_FD flag set in 1426 + * file_flags field, otherwise it should be set to zero; 1427 + * if BPF_F_PATH_FD flag is not set, AT_FDCWD is assumed. 1428 + */ 1429 + __s32 path_fd; 1426 1430 }; 1427 1431 1428 1432 struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */

+14 -3

tools/lib/bpf/bpf.c

··· 572 572 (void *)keys, (void *)values, count, opts); 573 573 } 574 574 575 - int bpf_obj_pin(int fd, const char *pathname) 575 + int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts) 576 576 { 577 - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); 577 + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); 578 578 union bpf_attr attr; 579 579 int ret; 580 580 581 + if (!OPTS_VALID(opts, bpf_obj_pin_opts)) 582 + return libbpf_err(-EINVAL); 583 + 581 584 memset(&attr, 0, attr_sz); 585 + attr.path_fd = OPTS_GET(opts, path_fd, 0); 582 586 attr.pathname = ptr_to_u64((void *)pathname); 587 + attr.file_flags = OPTS_GET(opts, file_flags, 0); 583 588 attr.bpf_fd = fd; 584 589 585 590 ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz); 586 591 return libbpf_err_errno(ret); 592 + } 593 + 594 + int bpf_obj_pin(int fd, const char *pathname) 595 + { 596 + return bpf_obj_pin_opts(fd, pathname, NULL); 587 597 } 588 598 589 599 int bpf_obj_get(const char *pathname) ··· 603 593 604 594 int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts) 605 595 { 606 - const size_t attr_sz = offsetofend(union bpf_attr, file_flags); 596 + const size_t attr_sz = offsetofend(union bpf_attr, path_fd); 607 597 union bpf_attr attr; 608 598 int fd; 609 599 ··· 611 601 return libbpf_err(-EINVAL); 612 602 613 603 memset(&attr, 0, attr_sz); 604 + attr.path_fd = OPTS_GET(opts, path_fd, 0); 614 605 attr.pathname = ptr_to_u64((void *)pathname); 615 606 attr.file_flags = OPTS_GET(opts, file_flags, 0); 616 607

+16 -2

tools/lib/bpf/bpf.h

··· 284 284 __u32 *count, 285 285 const struct bpf_map_batch_opts *opts); 286 286 287 + struct bpf_obj_pin_opts { 288 + size_t sz; /* size of this struct for forward/backward compatibility */ 289 + 290 + __u32 file_flags; 291 + int path_fd; 292 + 293 + size_t :0; 294 + }; 295 + #define bpf_obj_pin_opts__last_field path_fd 296 + 297 + LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 298 + LIBBPF_API int bpf_obj_pin_opts(int fd, const char *pathname, 299 + const struct bpf_obj_pin_opts *opts); 300 + 287 301 struct bpf_obj_get_opts { 288 302 size_t sz; /* size of this struct for forward/backward compatibility */ 289 303 290 304 __u32 file_flags; 305 + int path_fd; 291 306 292 307 size_t :0; 293 308 }; 294 - #define bpf_obj_get_opts__last_field file_flags 309 + #define bpf_obj_get_opts__last_field path_fd 295 310 296 - LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 297 311 LIBBPF_API int bpf_obj_get(const char *pathname); 298 312 LIBBPF_API int bpf_obj_get_opts(const char *pathname, 299 313 const struct bpf_obj_get_opts *opts);

+1 -1

tools/lib/bpf/btf.c

··· 1064 1064 int err = 0; 1065 1065 long sz; 1066 1066 1067 - f = fopen(path, "rb"); 1067 + f = fopen(path, "rbe"); 1068 1068 if (!f) { 1069 1069 err = -errno; 1070 1070 goto err_out;

+7 -7

tools/lib/bpf/gen_loader.c

··· 703 703 /* obtain fd in BPF_REG_9 */ 704 704 emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); 705 705 emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); 706 - /* jump to fd_array store if fd denotes module BTF */ 706 + /* load fd_array slot pointer */ 707 + emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 708 + 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); 709 + /* store BTF fd in slot, 0 for vmlinux */ 710 + emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); 711 + /* jump to insn[insn_idx].off store if fd denotes module BTF */ 707 712 emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); 708 713 /* set the default value for off */ 709 714 emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); 710 715 /* skip BTF fd store for vmlinux BTF */ 711 - emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4)); 712 - /* load fd_array slot pointer */ 713 - emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE, 714 - 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); 715 - /* store BTF fd in slot */ 716 - emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); 716 + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); 717 717 /* store index into insn[insn_idx].off */ 718 718 emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx)); 719 719 log:

+135 -19

tools/lib/bpf/libbpf.c

··· 1500 1500 return map; 1501 1501 } 1502 1502 1503 - static size_t bpf_map_mmap_sz(const struct bpf_map *map) 1503 + static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries) 1504 1504 { 1505 - long page_sz = sysconf(_SC_PAGE_SIZE); 1505 + const long page_sz = sysconf(_SC_PAGE_SIZE); 1506 1506 size_t map_sz; 1507 1507 1508 - map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries; 1508 + map_sz = (size_t)roundup(value_sz, 8) * max_entries; 1509 1509 map_sz = roundup(map_sz, page_sz); 1510 1510 return map_sz; 1511 + } 1512 + 1513 + static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz) 1514 + { 1515 + void *mmaped; 1516 + 1517 + if (!map->mmaped) 1518 + return -EINVAL; 1519 + 1520 + if (old_sz == new_sz) 1521 + return 0; 1522 + 1523 + mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1524 + if (mmaped == MAP_FAILED) 1525 + return -errno; 1526 + 1527 + memcpy(mmaped, map->mmaped, min(old_sz, new_sz)); 1528 + munmap(map->mmaped, old_sz); 1529 + map->mmaped = mmaped; 1530 + return 0; 1511 1531 } 1512 1532 1513 1533 static char *internal_map_name(struct bpf_object *obj, const char *real_name) ··· 1628 1608 { 1629 1609 struct bpf_map_def *def; 1630 1610 struct bpf_map *map; 1611 + size_t mmap_sz; 1631 1612 int err; 1632 1613 1633 1614 map = bpf_object__add_map(obj); ··· 1663 1642 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 1664 1643 map->name, map->sec_idx, map->sec_offset, def->map_flags); 1665 1644 1666 - map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, 1645 + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 1646 + map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1667 1647 MAP_SHARED | MAP_ANONYMOUS, -1, 0); 1668 1648 if (map->mmaped == MAP_FAILED) { 1669 1649 err = -errno; ··· 4351 4329 snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd); 4352 4330 memset(info, 0, sizeof(*info)); 4353 4331 4354 - fp = fopen(file, "r"); 4332 + fp = fopen(file, "re"); 4355 4333 if (!fp) { 4356 4334 err = -errno; 4357 4335 pr_warn("failed to open %s: %d. No procfs support?\n", file, ··· 4414 4392 if (!new_name) 4415 4393 return libbpf_err(-errno); 4416 4394 4417 - new_fd = open("/", O_RDONLY | O_CLOEXEC); 4395 + /* 4396 + * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set. 4397 + * This is similar to what we do in ensure_good_fd(), but without 4398 + * closing original FD. 4399 + */ 4400 + new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); 4418 4401 if (new_fd < 0) { 4419 4402 err = -errno; 4420 4403 goto err_free_new_name; 4421 - } 4422 - 4423 - new_fd = dup3(fd, new_fd, O_CLOEXEC); 4424 - if (new_fd < 0) { 4425 - err = -errno; 4426 - goto err_close_new_fd; 4427 4404 } 4428 4405 4429 4406 err = zclose(map->fd); ··· 7454 7433 int ret, err = 0; 7455 7434 FILE *f; 7456 7435 7457 - f = fopen("/proc/kallsyms", "r"); 7436 + f = fopen("/proc/kallsyms", "re"); 7458 7437 if (!f) { 7459 7438 err = -errno; 7460 7439 pr_warn("failed to open /proc/kallsyms: %d\n", err); ··· 8315 8294 map->init_slots_sz = 0; 8316 8295 8317 8296 if (map->mmaped) { 8318 - munmap(map->mmaped, bpf_map_mmap_sz(map)); 8297 + size_t mmap_sz; 8298 + 8299 + mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 8300 + munmap(map->mmaped, mmap_sz); 8319 8301 map->mmaped = NULL; 8320 8302 } 8321 8303 ··· 9436 9412 return map->def.value_size; 9437 9413 } 9438 9414 9415 + static int map_btf_datasec_resize(struct bpf_map *map, __u32 size) 9416 + { 9417 + struct btf *btf; 9418 + struct btf_type *datasec_type, *var_type; 9419 + struct btf_var_secinfo *var; 9420 + const struct btf_type *array_type; 9421 + const struct btf_array *array; 9422 + int vlen, element_sz, new_array_id; 9423 + __u32 nr_elements; 9424 + 9425 + /* check btf existence */ 9426 + btf = bpf_object__btf(map->obj); 9427 + if (!btf) 9428 + return -ENOENT; 9429 + 9430 + /* verify map is datasec */ 9431 + datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map)); 9432 + if (!btf_is_datasec(datasec_type)) { 9433 + pr_warn("map '%s': cannot be resized, map value type is not a datasec\n", 9434 + bpf_map__name(map)); 9435 + return -EINVAL; 9436 + } 9437 + 9438 + /* verify datasec has at least one var */ 9439 + vlen = btf_vlen(datasec_type); 9440 + if (vlen == 0) { 9441 + pr_warn("map '%s': cannot be resized, map value datasec is empty\n", 9442 + bpf_map__name(map)); 9443 + return -EINVAL; 9444 + } 9445 + 9446 + /* verify last var in the datasec is an array */ 9447 + var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9448 + var_type = btf_type_by_id(btf, var->type); 9449 + array_type = skip_mods_and_typedefs(btf, var_type->type, NULL); 9450 + if (!btf_is_array(array_type)) { 9451 + pr_warn("map '%s': cannot be resized, last var must be an array\n", 9452 + bpf_map__name(map)); 9453 + return -EINVAL; 9454 + } 9455 + 9456 + /* verify request size aligns with array */ 9457 + array = btf_array(array_type); 9458 + element_sz = btf__resolve_size(btf, array->type); 9459 + if (element_sz <= 0 || (size - var->offset) % element_sz != 0) { 9460 + pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n", 9461 + bpf_map__name(map), element_sz, size); 9462 + return -EINVAL; 9463 + } 9464 + 9465 + /* create a new array based on the existing array, but with new length */ 9466 + nr_elements = (size - var->offset) / element_sz; 9467 + new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements); 9468 + if (new_array_id < 0) 9469 + return new_array_id; 9470 + 9471 + /* adding a new btf type invalidates existing pointers to btf objects, 9472 + * so refresh pointers before proceeding 9473 + */ 9474 + datasec_type = btf_type_by_id(btf, map->btf_value_type_id); 9475 + var = &btf_var_secinfos(datasec_type)[vlen - 1]; 9476 + var_type = btf_type_by_id(btf, var->type); 9477 + 9478 + /* finally update btf info */ 9479 + datasec_type->size = size; 9480 + var->size = size - var->offset; 9481 + var_type->type = new_array_id; 9482 + 9483 + return 0; 9484 + } 9485 + 9439 9486 int bpf_map__set_value_size(struct bpf_map *map, __u32 size) 9440 9487 { 9441 9488 if (map->fd >= 0) 9442 9489 return libbpf_err(-EBUSY); 9490 + 9491 + if (map->mmaped) { 9492 + int err; 9493 + size_t mmap_old_sz, mmap_new_sz; 9494 + 9495 + mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 9496 + mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries); 9497 + err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); 9498 + if (err) { 9499 + pr_warn("map '%s': failed to resize memory-mapped region: %d\n", 9500 + bpf_map__name(map), err); 9501 + return err; 9502 + } 9503 + err = map_btf_datasec_resize(map, size); 9504 + if (err && err != -ENOENT) { 9505 + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", 9506 + bpf_map__name(map), err); 9507 + map->btf_value_type_id = 0; 9508 + map->btf_key_type_id = 0; 9509 + } 9510 + } 9511 + 9443 9512 map->def.value_size = size; 9444 9513 return 0; 9445 9514 } ··· 9558 9441 return 0; 9559 9442 } 9560 9443 9561 - const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) 9444 + void *bpf_map__initial_value(struct bpf_map *map, size_t *psize) 9562 9445 { 9563 9446 if (!map->mmaped) 9564 9447 return NULL; ··· 10074 9957 int err, ret; 10075 9958 FILE *f; 10076 9959 10077 - f = fopen(file, "r"); 9960 + f = fopen(file, "re"); 10078 9961 if (!f) { 10079 9962 err = -errno; 10080 9963 pr_debug("failed to open '%s': %s\n", file, ··· 12810 12693 12811 12694 for (i = 0; i < s->map_cnt; i++) { 12812 12695 struct bpf_map *map = *s->maps[i].map; 12813 - size_t mmap_sz = bpf_map_mmap_sz(map); 12696 + size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries); 12814 12697 int prot, map_fd = bpf_map__fd(map); 12815 12698 void **mmaped = s->maps[i].mmaped; 12816 12699 ··· 12837 12720 * as per normal clean up procedure, so we don't need to worry 12838 12721 * about it from skeleton's clean up perspective. 12839 12722 */ 12840 - *mmaped = mmap(map->mmaped, mmap_sz, prot, 12841 - MAP_SHARED | MAP_FIXED, map_fd, 0); 12723 + *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); 12842 12724 if (*mmaped == MAP_FAILED) { 12843 12725 err = -errno; 12844 12726 *mmaped = NULL;

+16 -2

tools/lib/bpf/libbpf.h

··· 869 869 /* get/set map key size */ 870 870 LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); 871 871 LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); 872 - /* get/set map value size */ 872 + /* get map value size */ 873 873 LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); 874 + /** 875 + * @brief **bpf_map__set_value_size()** sets map value size. 876 + * @param map the BPF map instance 877 + * @return 0, on success; negative error, otherwise 878 + * 879 + * There is a special case for maps with associated memory-mapped regions, like 880 + * the global data section maps (bss, data, rodata). When this function is used 881 + * on such a map, the mapped region is resized. Afterward, an attempt is made to 882 + * adjust the corresponding BTF info. This attempt is best-effort and can only 883 + * succeed if the last variable of the data section map is an array. The array 884 + * BTF type is replaced by a new BTF array type with a different length. 885 + * Any previously existing pointers returned from bpf_map__initial_value() or 886 + * corresponding data section skeleton pointer must be reinitialized. 887 + */ 874 888 LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); 875 889 /* get map key/value BTF type IDs */ 876 890 LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); ··· 898 884 899 885 LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, 900 886 const void *data, size_t size); 901 - LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); 887 + LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); 902 888 903 889 /** 904 890 * @brief **bpf_map__is_internal()** tells the caller whether or not the

+5

tools/lib/bpf/libbpf.map

··· 391 391 bpf_map_get_info_by_fd; 392 392 bpf_prog_get_info_by_fd; 393 393 } LIBBPF_1.1.0; 394 + 395 + LIBBPF_1.3.0 { 396 + global: 397 + bpf_obj_pin_opts; 398 + } LIBBPF_1.2.0;

+1 -1

tools/lib/bpf/libbpf_probes.c

··· 38 38 if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) != 0) 39 39 return 0; 40 40 41 - f = fopen(ubuntu_kver_file, "r"); 41 + f = fopen(ubuntu_kver_file, "re"); 42 42 if (!f) 43 43 return 0; 44 44

+1 -1

tools/lib/bpf/libbpf_version.h

··· 4 4 #define __LIBBPF_VERSION_H 5 5 6 6 #define LIBBPF_MAJOR_VERSION 1 7 - #define LIBBPF_MINOR_VERSION 2 7 + #define LIBBPF_MINOR_VERSION 3 8 8 9 9 #endif /* __LIBBPF_VERSION_H */

+2 -3

tools/lib/bpf/usdt.c

··· 466 466 467 467 proceed: 468 468 sprintf(line, "/proc/%d/maps", pid); 469 - f = fopen(line, "r"); 469 + f = fopen(line, "re"); 470 470 if (!f) { 471 471 err = -errno; 472 472 pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", ··· 954 954 spec_map_fd = bpf_map__fd(man->specs_map); 955 955 ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); 956 956 957 - /* TODO: perform path resolution similar to uprobe's */ 958 - fd = open(path, O_RDONLY); 957 + fd = open(path, O_RDONLY | O_CLOEXEC); 959 958 if (fd < 0) { 960 959 err = -errno; 961 960 pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err);

+1 -2

tools/testing/selftests/bpf/Makefile

··· 88 88 xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ 89 89 xdp_features 90 90 91 - TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file 92 - TEST_GEN_FILES += liburandom_read.so 91 + TEST_GEN_FILES += liburandom_read.so urandom_read sign-file 93 92 94 93 # Emit succinct information message describing current building step 95 94 # $1 - generic step name (e.g., CC, LINK, etc);

+2 -2

tools/testing/selftests/bpf/bpf_kfuncs.h

··· 36 36 void *buffer, __u32 buffer__szk) __ksym; 37 37 38 38 extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym; 39 - extern int bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; 40 - extern int bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; 39 + extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; 40 + extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; 41 41 extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; 42 42 extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; 43 43

+166

tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c

··· 9 9 #include <linux/sysfs.h> 10 10 #include <linux/tracepoint.h> 11 11 #include "bpf_testmod.h" 12 + #include "bpf_testmod_kfunc.h" 12 13 13 14 #define CREATE_TRACE_POINTS 14 15 #include "bpf_testmod-events.h" ··· 290 289 .set = &bpf_testmod_common_kfunc_ids, 291 290 }; 292 291 292 + __bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) 293 + { 294 + return a + b + c + d; 295 + } 296 + 297 + __bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) 298 + { 299 + return a + b; 300 + } 301 + 302 + __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) 303 + { 304 + return sk; 305 + } 306 + 307 + __bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) 308 + { 309 + /* Provoke the compiler to assume that the caller has sign-extended a, 310 + * b and c on platforms where this is required (e.g. s390x). 311 + */ 312 + return (long)a + (long)b + (long)c + d; 313 + } 314 + 315 + static struct prog_test_ref_kfunc prog_test_struct = { 316 + .a = 42, 317 + .b = 108, 318 + .next = &prog_test_struct, 319 + .cnt = REFCOUNT_INIT(1), 320 + }; 321 + 322 + __bpf_kfunc struct prog_test_ref_kfunc * 323 + bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) 324 + { 325 + refcount_inc(&prog_test_struct.cnt); 326 + return &prog_test_struct; 327 + } 328 + 329 + __bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) 330 + { 331 + WARN_ON_ONCE(1); 332 + } 333 + 334 + __bpf_kfunc struct prog_test_member * 335 + bpf_kfunc_call_memb_acquire(void) 336 + { 337 + WARN_ON_ONCE(1); 338 + return NULL; 339 + } 340 + 341 + __bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) 342 + { 343 + WARN_ON_ONCE(1); 344 + } 345 + 346 + static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) 347 + { 348 + if (size > 2 * sizeof(int)) 349 + return NULL; 350 + 351 + return (int *)p; 352 + } 353 + 354 + __bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, 355 + const int rdwr_buf_size) 356 + { 357 + return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); 358 + } 359 + 360 + __bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, 361 + const int rdonly_buf_size) 362 + { 363 + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 364 + } 365 + 366 + /* the next 2 ones can't be really used for testing expect to ensure 367 + * that the verifier rejects the call. 368 + * Acquire functions must return struct pointers, so these ones are 369 + * failing. 370 + */ 371 + __bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, 372 + const int rdonly_buf_size) 373 + { 374 + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); 375 + } 376 + 377 + __bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) 378 + { 379 + } 380 + 381 + __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) 382 + { 383 + } 384 + 385 + __bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) 386 + { 387 + } 388 + 389 + __bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) 390 + { 391 + } 392 + 393 + __bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) 394 + { 395 + } 396 + 397 + __bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) 398 + { 399 + } 400 + 401 + __bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) 402 + { 403 + } 404 + 405 + __bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) 406 + { 407 + } 408 + 409 + __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) 410 + { 411 + } 412 + 413 + __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) 414 + { 415 + } 416 + 417 + __bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) 418 + { 419 + /* p != NULL, but p->cnt could be 0 */ 420 + } 421 + 422 + __bpf_kfunc void bpf_kfunc_call_test_destructive(void) 423 + { 424 + } 425 + 426 + __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) 427 + { 428 + return arg; 429 + } 430 + 293 431 BTF_SET8_START(bpf_testmod_check_kfunc_ids) 294 432 BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) 433 + BTF_ID_FLAGS(func, bpf_kfunc_call_test1) 434 + BTF_ID_FLAGS(func, bpf_kfunc_call_test2) 435 + BTF_ID_FLAGS(func, bpf_kfunc_call_test3) 436 + BTF_ID_FLAGS(func, bpf_kfunc_call_test4) 437 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) 438 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) 439 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) 440 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) 441 + BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) 442 + BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) 443 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) 444 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) 445 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) 446 + BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) 447 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) 448 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) 449 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) 450 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) 451 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) 452 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) 453 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) 454 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) 455 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) 456 + BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) 295 457 BTF_SET8_END(bpf_testmod_check_kfunc_ids) 296 458 297 459 static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { ··· 476 312 477 313 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); 478 314 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); 315 + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); 316 + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); 479 317 if (ret < 0) 480 318 return ret; 481 319 if (bpf_fentry_test1(0) < 0)

+100

tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef _BPF_TESTMOD_KFUNC_H 4 + #define _BPF_TESTMOD_KFUNC_H 5 + 6 + #ifndef __KERNEL__ 7 + #include <vmlinux.h> 8 + #include <bpf/bpf_helpers.h> 9 + #else 10 + #define __ksym 11 + struct prog_test_member1 { 12 + int a; 13 + }; 14 + 15 + struct prog_test_member { 16 + struct prog_test_member1 m; 17 + int c; 18 + }; 19 + 20 + struct prog_test_ref_kfunc { 21 + int a; 22 + int b; 23 + struct prog_test_member memb; 24 + struct prog_test_ref_kfunc *next; 25 + refcount_t cnt; 26 + }; 27 + #endif 28 + 29 + struct prog_test_pass1 { 30 + int x0; 31 + struct { 32 + int x1; 33 + struct { 34 + int x2; 35 + struct { 36 + int x3; 37 + }; 38 + }; 39 + }; 40 + }; 41 + 42 + struct prog_test_pass2 { 43 + int len; 44 + short arr1[4]; 45 + struct { 46 + char arr2[4]; 47 + unsigned long arr3[8]; 48 + } x; 49 + }; 50 + 51 + struct prog_test_fail1 { 52 + void *p; 53 + int x; 54 + }; 55 + 56 + struct prog_test_fail2 { 57 + int x8; 58 + struct prog_test_pass1 x; 59 + }; 60 + 61 + struct prog_test_fail3 { 62 + int len; 63 + char arr1[2]; 64 + char arr2[]; 65 + }; 66 + 67 + struct prog_test_ref_kfunc * 68 + bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; 69 + void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 70 + void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; 71 + 72 + void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; 73 + int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; 74 + int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 75 + int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 76 + void bpf_kfunc_call_int_mem_release(int *p) __ksym; 77 + 78 + /* The bpf_kfunc_call_test_static_unused_arg is defined as static, 79 + * but bpf program compilation needs to see it as global symbol. 80 + */ 81 + #ifndef __KERNEL__ 82 + u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; 83 + #endif 84 + 85 + void bpf_testmod_test_mod_kfunc(int i) __ksym; 86 + 87 + __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 88 + __u32 c, __u64 d) __ksym; 89 + int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; 90 + struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; 91 + long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; 92 + 93 + void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; 94 + void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; 95 + void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; 96 + void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; 97 + 98 + void bpf_kfunc_call_test_destructive(void) __ksym; 99 + 100 + #endif /* _BPF_TESTMOD_KFUNC_H */

+23

tools/testing/selftests/bpf/network_helpers.c

··· 427 427 close(token->orig_netns_fd); 428 428 free(token); 429 429 } 430 + 431 + int get_socket_local_port(int sock_fd) 432 + { 433 + struct sockaddr_storage addr; 434 + socklen_t addrlen = sizeof(addr); 435 + int err; 436 + 437 + err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); 438 + if (err < 0) 439 + return err; 440 + 441 + if (addr.ss_family == AF_INET) { 442 + struct sockaddr_in *sin = (struct sockaddr_in *)&addr; 443 + 444 + return sin->sin_port; 445 + } else if (addr.ss_family == AF_INET6) { 446 + struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; 447 + 448 + return sin->sin6_port; 449 + } 450 + 451 + return -1; 452 + }

+1

tools/testing/selftests/bpf/network_helpers.h

··· 56 56 int make_sockaddr(int family, const char *addr_str, __u16 port, 57 57 struct sockaddr_storage *addr, socklen_t *len); 58 58 char *ping_command(int family); 59 + int get_socket_local_port(int sock_fd); 59 60 60 61 struct nstoken; 61 62 /**

+5 -29

tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c

··· 11 11 #include "ksym_race.skel.h" 12 12 #include "bpf_mod_race.skel.h" 13 13 #include "kfunc_call_race.skel.h" 14 + #include "testing_helpers.h" 14 15 15 16 /* This test crafts a race between btf_try_get_module and do_init_module, and 16 17 * checks whether btf_try_get_module handles the invocation for a well-formed ··· 45 44 46 45 static _Atomic enum bpf_test_state state = _TS_INVALID; 47 46 48 - static int sys_finit_module(int fd, const char *param_values, int flags) 49 - { 50 - return syscall(__NR_finit_module, fd, param_values, flags); 51 - } 52 - 53 - static int sys_delete_module(const char *name, unsigned int flags) 54 - { 55 - return syscall(__NR_delete_module, name, flags); 56 - } 57 - 58 - static int load_module(const char *mod) 59 - { 60 - int ret, fd; 61 - 62 - fd = open("bpf_testmod.ko", O_RDONLY); 63 - if (fd < 0) 64 - return fd; 65 - 66 - ret = sys_finit_module(fd, "", 0); 67 - close(fd); 68 - if (ret < 0) 69 - return ret; 70 - return 0; 71 - } 72 - 73 47 static void *load_module_thread(void *p) 74 48 { 75 49 76 - if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) 50 + if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail")) 77 51 atomic_store(&state, TS_MODULE_LOAD); 78 52 else 79 53 atomic_store(&state, TS_MODULE_LOAD_FAIL); ··· 100 124 if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) 101 125 return; 102 126 103 - if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) 127 + if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod")) 104 128 goto end_mmap; 105 129 106 130 skel = bpf_mod_race__open(); ··· 178 202 bpf_mod_race__destroy(skel); 179 203 ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); 180 204 end_module: 181 - sys_delete_module("bpf_testmod", 0); 182 - ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); 205 + unload_bpf_testmod(false); 206 + ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod"); 183 207 end_mmap: 184 208 munmap(fault_addr, 4096); 185 209 atomic_store(&state, _TS_INVALID);

+268

tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + #define _GNU_SOURCE 4 + #include <test_progs.h> 5 + #include <bpf/btf.h> 6 + #include <fcntl.h> 7 + #include <unistd.h> 8 + #include <linux/unistd.h> 9 + #include <linux/mount.h> 10 + #include <sys/syscall.h> 11 + 12 + static inline int sys_fsopen(const char *fsname, unsigned flags) 13 + { 14 + return syscall(__NR_fsopen, fsname, flags); 15 + } 16 + 17 + static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) 18 + { 19 + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); 20 + } 21 + 22 + static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) 23 + { 24 + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); 25 + } 26 + 27 + __attribute__((unused)) 28 + static inline int sys_move_mount(int from_dfd, const char *from_path, 29 + int to_dfd, const char *to_path, 30 + unsigned int ms_flags) 31 + { 32 + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags); 33 + } 34 + 35 + static void bpf_obj_pinning_detached(void) 36 + { 37 + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); 38 + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); 39 + int fs_fd = -1, mnt_fd = -1; 40 + int map_fd = -1, map_fd2 = -1; 41 + int zero = 0, src_value, dst_value, err; 42 + const char *map_name = "fsmount_map"; 43 + 44 + /* A bunch of below UAPI calls are constructed based on reading: 45 + * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html 46 + */ 47 + 48 + /* create VFS context */ 49 + fs_fd = sys_fsopen("bpf", 0); 50 + if (!ASSERT_GE(fs_fd, 0, "fs_fd")) 51 + goto cleanup; 52 + 53 + /* instantiate FS object */ 54 + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); 55 + if (!ASSERT_OK(err, "fs_create")) 56 + goto cleanup; 57 + 58 + /* create O_PATH fd for detached mount */ 59 + mnt_fd = sys_fsmount(fs_fd, 0, 0); 60 + if (!ASSERT_GE(mnt_fd, 0, "mnt_fd")) 61 + goto cleanup; 62 + 63 + /* If we wanted to expose detached mount in the file system, we'd do 64 + * something like below. But the whole point is that we actually don't 65 + * even have to expose BPF FS in the file system to be able to work 66 + * (pin/get objects) with it. 67 + * 68 + * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH); 69 + * if (!ASSERT_OK(err, "move_mount")) 70 + * goto cleanup; 71 + */ 72 + 73 + /* create BPF map to pin */ 74 + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); 75 + if (!ASSERT_GE(map_fd, 0, "map_fd")) 76 + goto cleanup; 77 + 78 + /* pin BPF map into detached BPF FS through mnt_fd */ 79 + pin_opts.file_flags = BPF_F_PATH_FD; 80 + pin_opts.path_fd = mnt_fd; 81 + err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts); 82 + if (!ASSERT_OK(err, "map_pin")) 83 + goto cleanup; 84 + 85 + /* get BPF map from detached BPF FS through mnt_fd */ 86 + get_opts.file_flags = BPF_F_PATH_FD; 87 + get_opts.path_fd = mnt_fd; 88 + map_fd2 = bpf_obj_get_opts(map_name, &get_opts); 89 + if (!ASSERT_GE(map_fd2, 0, "map_get")) 90 + goto cleanup; 91 + 92 + /* update map through one FD */ 93 + src_value = 0xcafebeef; 94 + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); 95 + ASSERT_OK(err, "map_update"); 96 + 97 + /* check values written/read through different FDs do match */ 98 + dst_value = 0; 99 + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); 100 + ASSERT_OK(err, "map_lookup"); 101 + ASSERT_EQ(dst_value, src_value, "map_value_eq1"); 102 + ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2"); 103 + 104 + cleanup: 105 + if (map_fd >= 0) 106 + ASSERT_OK(close(map_fd), "close_map_fd"); 107 + if (map_fd2 >= 0) 108 + ASSERT_OK(close(map_fd2), "close_map_fd2"); 109 + if (fs_fd >= 0) 110 + ASSERT_OK(close(fs_fd), "close_fs_fd"); 111 + if (mnt_fd >= 0) 112 + ASSERT_OK(close(mnt_fd), "close_mnt_fd"); 113 + } 114 + 115 + enum path_kind 116 + { 117 + PATH_STR_ABS, 118 + PATH_STR_REL, 119 + PATH_FD_REL, 120 + }; 121 + 122 + static void validate_pin(int map_fd, const char *map_name, int src_value, 123 + enum path_kind path_kind) 124 + { 125 + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); 126 + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; 127 + const char *pin_path = NULL; 128 + int zero = 0, dst_value, map_fd2, err; 129 + 130 + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); 131 + old_cwd[0] = '\0'; 132 + 133 + switch (path_kind) { 134 + case PATH_STR_ABS: 135 + /* absolute path */ 136 + pin_path = abs_path; 137 + break; 138 + case PATH_STR_REL: 139 + /* cwd + relative path */ 140 + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); 141 + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); 142 + pin_path = map_name; 143 + break; 144 + case PATH_FD_REL: 145 + /* dir fd + relative path */ 146 + pin_opts.file_flags = BPF_F_PATH_FD; 147 + pin_opts.path_fd = open("/sys/fs/bpf", O_PATH); 148 + ASSERT_GE(pin_opts.path_fd, 0, "path_fd"); 149 + pin_path = map_name; 150 + break; 151 + } 152 + 153 + /* pin BPF map using specified path definition */ 154 + err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts); 155 + ASSERT_OK(err, "obj_pin"); 156 + 157 + /* cleanup */ 158 + if (pin_opts.path_fd >= 0) 159 + close(pin_opts.path_fd); 160 + if (old_cwd[0]) 161 + ASSERT_OK(chdir(old_cwd), "restore_cwd"); 162 + 163 + map_fd2 = bpf_obj_get(abs_path); 164 + if (!ASSERT_GE(map_fd2, 0, "map_get")) 165 + goto cleanup; 166 + 167 + /* update map through one FD */ 168 + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); 169 + ASSERT_OK(err, "map_update"); 170 + 171 + /* check values written/read through different FDs do match */ 172 + dst_value = 0; 173 + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); 174 + ASSERT_OK(err, "map_lookup"); 175 + ASSERT_EQ(dst_value, src_value, "map_value_eq"); 176 + cleanup: 177 + if (map_fd2 >= 0) 178 + ASSERT_OK(close(map_fd2), "close_map_fd2"); 179 + unlink(abs_path); 180 + } 181 + 182 + static void validate_get(int map_fd, const char *map_name, int src_value, 183 + enum path_kind path_kind) 184 + { 185 + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); 186 + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; 187 + const char *pin_path = NULL; 188 + int zero = 0, dst_value, map_fd2, err; 189 + 190 + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); 191 + /* pin BPF map using specified path definition */ 192 + err = bpf_obj_pin(map_fd, abs_path); 193 + if (!ASSERT_OK(err, "pin_map")) 194 + return; 195 + 196 + old_cwd[0] = '\0'; 197 + 198 + switch (path_kind) { 199 + case PATH_STR_ABS: 200 + /* absolute path */ 201 + pin_path = abs_path; 202 + break; 203 + case PATH_STR_REL: 204 + /* cwd + relative path */ 205 + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); 206 + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); 207 + pin_path = map_name; 208 + break; 209 + case PATH_FD_REL: 210 + /* dir fd + relative path */ 211 + get_opts.file_flags = BPF_F_PATH_FD; 212 + get_opts.path_fd = open("/sys/fs/bpf", O_PATH); 213 + ASSERT_GE(get_opts.path_fd, 0, "path_fd"); 214 + pin_path = map_name; 215 + break; 216 + } 217 + 218 + map_fd2 = bpf_obj_get_opts(pin_path, &get_opts); 219 + if (!ASSERT_GE(map_fd2, 0, "map_get")) 220 + goto cleanup; 221 + 222 + /* cleanup */ 223 + if (get_opts.path_fd >= 0) 224 + close(get_opts.path_fd); 225 + if (old_cwd[0]) 226 + ASSERT_OK(chdir(old_cwd), "restore_cwd"); 227 + 228 + /* update map through one FD */ 229 + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); 230 + ASSERT_OK(err, "map_update"); 231 + 232 + /* check values written/read through different FDs do match */ 233 + dst_value = 0; 234 + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); 235 + ASSERT_OK(err, "map_lookup"); 236 + ASSERT_EQ(dst_value, src_value, "map_value_eq"); 237 + cleanup: 238 + if (map_fd2 >= 0) 239 + ASSERT_OK(close(map_fd2), "close_map_fd2"); 240 + unlink(abs_path); 241 + } 242 + 243 + static void bpf_obj_pinning_mounted(enum path_kind path_kind) 244 + { 245 + const char *map_name = "mounted_map"; 246 + int map_fd; 247 + 248 + /* create BPF map to pin */ 249 + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); 250 + if (!ASSERT_GE(map_fd, 0, "map_fd")) 251 + return; 252 + 253 + validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind); 254 + validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind); 255 + ASSERT_OK(close(map_fd), "close_map_fd"); 256 + } 257 + 258 + void test_bpf_obj_pinning() 259 + { 260 + if (test__start_subtest("detached")) 261 + bpf_obj_pinning_detached(); 262 + if (test__start_subtest("mounted-str-abs")) 263 + bpf_obj_pinning_mounted(PATH_STR_ABS); 264 + if (test__start_subtest("mounted-str-rel")) 265 + bpf_obj_pinning_mounted(PATH_STR_REL); 266 + if (test__start_subtest("mounted-fd-rel")) 267 + bpf_obj_pinning_mounted(PATH_FD_REL); 268 + }

+227

tools/testing/selftests/bpf/prog_tests/global_map_resize.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + #include <errno.h> 4 + #include <sys/syscall.h> 5 + #include <unistd.h> 6 + #include "test_global_map_resize.skel.h" 7 + #include "test_progs.h" 8 + 9 + static void run_prog_bss_array_sum(void) 10 + { 11 + (void)syscall(__NR_getpid); 12 + } 13 + 14 + static void run_prog_data_array_sum(void) 15 + { 16 + (void)syscall(__NR_getuid); 17 + } 18 + 19 + static void global_map_resize_bss_subtest(void) 20 + { 21 + int err; 22 + struct test_global_map_resize *skel; 23 + struct bpf_map *map; 24 + const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2; 25 + size_t array_len, actual_sz; 26 + 27 + skel = test_global_map_resize__open(); 28 + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) 29 + goto teardown; 30 + 31 + /* set some initial value before resizing. 32 + * it is expected this non-zero value will be preserved 33 + * while resizing. 34 + */ 35 + skel->bss->array[0] = 1; 36 + 37 + /* resize map value and verify the new size */ 38 + map = skel->maps.bss; 39 + err = bpf_map__set_value_size(map, desired_sz); 40 + if (!ASSERT_OK(err, "bpf_map__set_value_size")) 41 + goto teardown; 42 + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) 43 + goto teardown; 44 + 45 + /* set the expected number of elements based on the resized array */ 46 + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]); 47 + if (!ASSERT_GT(array_len, 1, "array_len")) 48 + goto teardown; 49 + 50 + skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz); 51 + if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)")) 52 + goto teardown; 53 + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) 54 + goto teardown; 55 + 56 + /* fill the newly resized array with ones, 57 + * skipping the first element which was previously set 58 + */ 59 + for (int i = 1; i < array_len; i++) 60 + skel->bss->array[i] = 1; 61 + 62 + /* set global const values before loading */ 63 + skel->rodata->pid = getpid(); 64 + skel->rodata->bss_array_len = array_len; 65 + skel->rodata->data_array_len = 1; 66 + 67 + err = test_global_map_resize__load(skel); 68 + if (!ASSERT_OK(err, "test_global_map_resize__load")) 69 + goto teardown; 70 + err = test_global_map_resize__attach(skel); 71 + if (!ASSERT_OK(err, "test_global_map_resize__attach")) 72 + goto teardown; 73 + 74 + /* run the bpf program which will sum the contents of the array. 75 + * since the array was filled with ones,verify the sum equals array_len 76 + */ 77 + run_prog_bss_array_sum(); 78 + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) 79 + goto teardown; 80 + 81 + teardown: 82 + test_global_map_resize__destroy(skel); 83 + } 84 + 85 + static void global_map_resize_data_subtest(void) 86 + { 87 + int err; 88 + struct test_global_map_resize *skel; 89 + struct bpf_map *map; 90 + const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2; 91 + size_t array_len, actual_sz; 92 + 93 + skel = test_global_map_resize__open(); 94 + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) 95 + goto teardown; 96 + 97 + /* set some initial value before resizing. 98 + * it is expected this non-zero value will be preserved 99 + * while resizing. 100 + */ 101 + skel->data_custom->my_array[0] = 1; 102 + 103 + /* resize map value and verify the new size */ 104 + map = skel->maps.data_custom; 105 + err = bpf_map__set_value_size(map, desired_sz); 106 + if (!ASSERT_OK(err, "bpf_map__set_value_size")) 107 + goto teardown; 108 + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) 109 + goto teardown; 110 + 111 + /* set the expected number of elements based on the resized array */ 112 + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]); 113 + if (!ASSERT_GT(array_len, 1, "array_len")) 114 + goto teardown; 115 + 116 + skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz); 117 + if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)")) 118 + goto teardown; 119 + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) 120 + goto teardown; 121 + 122 + /* fill the newly resized array with ones, 123 + * skipping the first element which was previously set 124 + */ 125 + for (int i = 1; i < array_len; i++) 126 + skel->data_custom->my_array[i] = 1; 127 + 128 + /* set global const values before loading */ 129 + skel->rodata->pid = getpid(); 130 + skel->rodata->bss_array_len = 1; 131 + skel->rodata->data_array_len = array_len; 132 + 133 + err = test_global_map_resize__load(skel); 134 + if (!ASSERT_OK(err, "test_global_map_resize__load")) 135 + goto teardown; 136 + err = test_global_map_resize__attach(skel); 137 + if (!ASSERT_OK(err, "test_global_map_resize__attach")) 138 + goto teardown; 139 + 140 + /* run the bpf program which will sum the contents of the array. 141 + * since the array was filled with ones,verify the sum equals array_len 142 + */ 143 + run_prog_data_array_sum(); 144 + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) 145 + goto teardown; 146 + 147 + teardown: 148 + test_global_map_resize__destroy(skel); 149 + } 150 + 151 + static void global_map_resize_invalid_subtest(void) 152 + { 153 + int err; 154 + struct test_global_map_resize *skel; 155 + struct bpf_map *map; 156 + __u32 element_sz, desired_sz; 157 + 158 + skel = test_global_map_resize__open(); 159 + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) 160 + return; 161 + 162 + /* attempt to resize a global datasec map to size 163 + * which does NOT align with array 164 + */ 165 + map = skel->maps.data_custom; 166 + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf")) 167 + goto teardown; 168 + /* set desired size a fraction of element size beyond an aligned size */ 169 + element_sz = sizeof(skel->data_custom->my_array[0]); 170 + desired_sz = element_sz + element_sz / 2; 171 + /* confirm desired size does NOT align with array */ 172 + if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment")) 173 + goto teardown; 174 + err = bpf_map__set_value_size(map, desired_sz); 175 + /* confirm resize is OK but BTF info is cleared */ 176 + if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") || 177 + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") || 178 + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val")) 179 + goto teardown; 180 + 181 + /* attempt to resize a global datasec map whose only var is NOT an array */ 182 + map = skel->maps.data_non_array; 183 + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf")) 184 + goto teardown; 185 + /* set desired size to arbitrary value */ 186 + desired_sz = 1024; 187 + err = bpf_map__set_value_size(map, desired_sz); 188 + /* confirm resize is OK but BTF info is cleared */ 189 + if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") || 190 + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") || 191 + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val")) 192 + goto teardown; 193 + 194 + /* attempt to resize a global datasec map 195 + * whose last var is NOT an array 196 + */ 197 + map = skel->maps.data_array_not_last; 198 + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf")) 199 + goto teardown; 200 + /* set desired size to a multiple of element size */ 201 + element_sz = sizeof(skel->data_array_not_last->my_array_first[0]); 202 + desired_sz = element_sz * 8; 203 + /* confirm desired size aligns with array */ 204 + if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment")) 205 + goto teardown; 206 + err = bpf_map__set_value_size(map, desired_sz); 207 + /* confirm resize is OK but BTF info is cleared */ 208 + if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") || 209 + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") || 210 + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val")) 211 + goto teardown; 212 + 213 + teardown: 214 + test_global_map_resize__destroy(skel); 215 + } 216 + 217 + void test_global_map_resize(void) 218 + { 219 + if (test__start_subtest("global_map_resize_bss")) 220 + global_map_resize_bss_subtest(); 221 + 222 + if (test__start_subtest("global_map_resize_data")) 223 + global_map_resize_data_subtest(); 224 + 225 + if (test__start_subtest("global_map_resize_invalid")) 226 + global_map_resize_invalid_subtest(); 227 + }

+4 -8

tools/testing/selftests/bpf/prog_tests/module_attach.c

··· 4 4 #include <test_progs.h> 5 5 #include <stdbool.h> 6 6 #include "test_module_attach.skel.h" 7 + #include "testing_helpers.h" 7 8 8 9 static int duration; 9 10 ··· 31 30 close(fd); 32 31 33 32 return 0; 34 - } 35 - 36 - static int delete_module(const char *name, int flags) 37 - { 38 - return syscall(__NR_delete_module, name, flags); 39 33 } 40 34 41 35 void test_module_attach(void) ··· 89 93 if (!ASSERT_OK_PTR(link, "attach_fentry")) 90 94 goto cleanup; 91 95 92 - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); 96 + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); 93 97 bpf_link__destroy(link); 94 98 95 99 link = bpf_program__attach(skel->progs.handle_fexit); 96 100 if (!ASSERT_OK_PTR(link, "attach_fexit")) 97 101 goto cleanup; 98 102 99 - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); 103 + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); 100 104 bpf_link__destroy(link); 101 105 102 106 link = bpf_program__attach(skel->progs.kprobe_multi); 103 107 if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) 104 108 goto cleanup; 105 109 106 - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); 110 + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); 107 111 bpf_link__destroy(link); 108 112 109 113 cleanup:

+2 -2

tools/testing/selftests/bpf/prog_tests/netcnt.c

··· 67 67 } 68 68 69 69 /* No packets should be lost */ 70 - ASSERT_EQ(packets, 10000, "packets"); 70 + ASSERT_GE(packets, 10000, "packets"); 71 71 72 72 /* Let's check that bytes counter matches the number of packets 73 73 * multiplied by the size of ipv6 ICMP packet. 74 74 */ 75 - ASSERT_EQ(bytes, packets * 104, "bytes"); 75 + ASSERT_GE(bytes, packets * 104, "bytes"); 76 76 77 77 err: 78 78 if (cg_fd != -1)

+221

tools/testing/selftests/bpf/prog_tests/sock_destroy.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <bpf/bpf_endian.h> 4 + 5 + #include "sock_destroy_prog.skel.h" 6 + #include "sock_destroy_prog_fail.skel.h" 7 + #include "network_helpers.h" 8 + 9 + #define TEST_NS "sock_destroy_netns" 10 + 11 + static void start_iter_sockets(struct bpf_program *prog) 12 + { 13 + struct bpf_link *link; 14 + char buf[50] = {}; 15 + int iter_fd, len; 16 + 17 + link = bpf_program__attach_iter(prog, NULL); 18 + if (!ASSERT_OK_PTR(link, "attach_iter")) 19 + return; 20 + 21 + iter_fd = bpf_iter_create(bpf_link__fd(link)); 22 + if (!ASSERT_GE(iter_fd, 0, "create_iter")) 23 + goto free_link; 24 + 25 + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) 26 + ; 27 + ASSERT_GE(len, 0, "read"); 28 + 29 + close(iter_fd); 30 + 31 + free_link: 32 + bpf_link__destroy(link); 33 + } 34 + 35 + static void test_tcp_client(struct sock_destroy_prog *skel) 36 + { 37 + int serv = -1, clien = -1, accept_serv = -1, n; 38 + 39 + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); 40 + if (!ASSERT_GE(serv, 0, "start_server")) 41 + goto cleanup; 42 + 43 + clien = connect_to_fd(serv, 0); 44 + if (!ASSERT_GE(clien, 0, "connect_to_fd")) 45 + goto cleanup; 46 + 47 + accept_serv = accept(serv, NULL, NULL); 48 + if (!ASSERT_GE(accept_serv, 0, "serv accept")) 49 + goto cleanup; 50 + 51 + n = send(clien, "t", 1, 0); 52 + if (!ASSERT_EQ(n, 1, "client send")) 53 + goto cleanup; 54 + 55 + /* Run iterator program that destroys connected client sockets. */ 56 + start_iter_sockets(skel->progs.iter_tcp6_client); 57 + 58 + n = send(clien, "t", 1, 0); 59 + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) 60 + goto cleanup; 61 + ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"); 62 + 63 + cleanup: 64 + if (clien != -1) 65 + close(clien); 66 + if (accept_serv != -1) 67 + close(accept_serv); 68 + if (serv != -1) 69 + close(serv); 70 + } 71 + 72 + static void test_tcp_server(struct sock_destroy_prog *skel) 73 + { 74 + int serv = -1, clien = -1, accept_serv = -1, n, serv_port; 75 + 76 + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); 77 + if (!ASSERT_GE(serv, 0, "start_server")) 78 + goto cleanup; 79 + serv_port = get_socket_local_port(serv); 80 + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) 81 + goto cleanup; 82 + skel->bss->serv_port = (__be16) serv_port; 83 + 84 + clien = connect_to_fd(serv, 0); 85 + if (!ASSERT_GE(clien, 0, "connect_to_fd")) 86 + goto cleanup; 87 + 88 + accept_serv = accept(serv, NULL, NULL); 89 + if (!ASSERT_GE(accept_serv, 0, "serv accept")) 90 + goto cleanup; 91 + 92 + n = send(clien, "t", 1, 0); 93 + if (!ASSERT_EQ(n, 1, "client send")) 94 + goto cleanup; 95 + 96 + /* Run iterator program that destroys server sockets. */ 97 + start_iter_sockets(skel->progs.iter_tcp6_server); 98 + 99 + n = send(clien, "t", 1, 0); 100 + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) 101 + goto cleanup; 102 + ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket"); 103 + 104 + cleanup: 105 + if (clien != -1) 106 + close(clien); 107 + if (accept_serv != -1) 108 + close(accept_serv); 109 + if (serv != -1) 110 + close(serv); 111 + } 112 + 113 + static void test_udp_client(struct sock_destroy_prog *skel) 114 + { 115 + int serv = -1, clien = -1, n = 0; 116 + 117 + serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0); 118 + if (!ASSERT_GE(serv, 0, "start_server")) 119 + goto cleanup; 120 + 121 + clien = connect_to_fd(serv, 0); 122 + if (!ASSERT_GE(clien, 0, "connect_to_fd")) 123 + goto cleanup; 124 + 125 + n = send(clien, "t", 1, 0); 126 + if (!ASSERT_EQ(n, 1, "client send")) 127 + goto cleanup; 128 + 129 + /* Run iterator program that destroys sockets. */ 130 + start_iter_sockets(skel->progs.iter_udp6_client); 131 + 132 + n = send(clien, "t", 1, 0); 133 + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) 134 + goto cleanup; 135 + /* UDP sockets have an overriding error code after they are disconnected, 136 + * so we don't check for ECONNABORTED error code. 137 + */ 138 + 139 + cleanup: 140 + if (clien != -1) 141 + close(clien); 142 + if (serv != -1) 143 + close(serv); 144 + } 145 + 146 + static void test_udp_server(struct sock_destroy_prog *skel) 147 + { 148 + int *listen_fds = NULL, n, i, serv_port; 149 + unsigned int num_listens = 5; 150 + char buf[1]; 151 + 152 + /* Start reuseport servers. */ 153 + listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM, 154 + "::1", 0, 0, num_listens); 155 + if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server")) 156 + goto cleanup; 157 + serv_port = get_socket_local_port(listen_fds[0]); 158 + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) 159 + goto cleanup; 160 + skel->bss->serv_port = (__be16) serv_port; 161 + 162 + /* Run iterator program that destroys server sockets. */ 163 + start_iter_sockets(skel->progs.iter_udp6_server); 164 + 165 + for (i = 0; i < num_listens; ++i) { 166 + n = read(listen_fds[i], buf, sizeof(buf)); 167 + if (!ASSERT_EQ(n, -1, "read") || 168 + !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket")) 169 + break; 170 + } 171 + ASSERT_EQ(i, num_listens, "server socket"); 172 + 173 + cleanup: 174 + free_fds(listen_fds, num_listens); 175 + } 176 + 177 + void test_sock_destroy(void) 178 + { 179 + struct sock_destroy_prog *skel; 180 + struct nstoken *nstoken = NULL; 181 + int cgroup_fd; 182 + 183 + skel = sock_destroy_prog__open_and_load(); 184 + if (!ASSERT_OK_PTR(skel, "skel_open")) 185 + return; 186 + 187 + cgroup_fd = test__join_cgroup("/sock_destroy"); 188 + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) 189 + goto cleanup; 190 + 191 + skel->links.sock_connect = bpf_program__attach_cgroup( 192 + skel->progs.sock_connect, cgroup_fd); 193 + if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach")) 194 + goto cleanup; 195 + 196 + SYS(cleanup, "ip netns add %s", TEST_NS); 197 + SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS); 198 + 199 + nstoken = open_netns(TEST_NS); 200 + if (!ASSERT_OK_PTR(nstoken, "open_netns")) 201 + goto cleanup; 202 + 203 + if (test__start_subtest("tcp_client")) 204 + test_tcp_client(skel); 205 + if (test__start_subtest("tcp_server")) 206 + test_tcp_server(skel); 207 + if (test__start_subtest("udp_client")) 208 + test_udp_client(skel); 209 + if (test__start_subtest("udp_server")) 210 + test_udp_server(skel); 211 + 212 + RUN_TESTS(sock_destroy_prog_fail); 213 + 214 + cleanup: 215 + if (nstoken) 216 + close_netns(nstoken); 217 + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); 218 + if (cgroup_fd >= 0) 219 + close(cgroup_fd); 220 + sock_destroy_prog__destroy(skel); 221 + }

+3 -1

tools/testing/selftests/bpf/prog_tests/sockopt.c

··· 1060 1060 return; 1061 1061 1062 1062 for (i = 0; i < ARRAY_SIZE(tests); i++) { 1063 - test__start_subtest(tests[i].descr); 1063 + if (!test__start_subtest(tests[i].descr)) 1064 + continue; 1065 + 1064 1066 ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); 1065 1067 } 1066 1068

+121

tools/testing/selftests/bpf/prog_tests/xdp_bonding.c

··· 18 18 #include <linux/if_bonding.h> 19 19 #include <linux/limits.h> 20 20 #include <linux/udp.h> 21 + #include <uapi/linux/netdev.h> 21 22 22 23 #include "xdp_dummy.skel.h" 23 24 #include "xdp_redirect_multi_kern.skel.h" ··· 493 492 system("ip link del bond_nest2"); 494 493 } 495 494 495 + static void test_xdp_bonding_features(struct skeletons *skeletons) 496 + { 497 + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); 498 + int bond_idx, veth1_idx, err; 499 + struct bpf_link *link = NULL; 500 + 501 + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) 502 + goto out; 503 + 504 + bond_idx = if_nametoindex("bond"); 505 + if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond")) 506 + goto out; 507 + 508 + /* query default xdp-feature for bond device */ 509 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 510 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 511 + goto out; 512 + 513 + if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, 514 + "bond query_opts.feature_flags")) 515 + goto out; 516 + 517 + if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"), 518 + "add veth{0,1} pair")) 519 + goto out; 520 + 521 + if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"), 522 + "add veth{2,3} pair")) 523 + goto out; 524 + 525 + if (!ASSERT_OK(system("ip link set veth0 master bond"), 526 + "add veth0 to master bond")) 527 + goto out; 528 + 529 + /* xdp-feature for bond device should be obtained from the single slave 530 + * device (veth0) 531 + */ 532 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 533 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 534 + goto out; 535 + 536 + if (!ASSERT_EQ(query_opts.feature_flags, 537 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 538 + NETDEV_XDP_ACT_RX_SG, 539 + "bond query_opts.feature_flags")) 540 + goto out; 541 + 542 + veth1_idx = if_nametoindex("veth1"); 543 + if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1")) 544 + goto out; 545 + 546 + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, 547 + veth1_idx); 548 + if (!ASSERT_OK_PTR(link, "attach program to veth1")) 549 + goto out; 550 + 551 + /* xdp-feature for veth0 are changed */ 552 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 553 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 554 + goto out; 555 + 556 + if (!ASSERT_EQ(query_opts.feature_flags, 557 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 558 + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | 559 + NETDEV_XDP_ACT_NDO_XMIT_SG, 560 + "bond query_opts.feature_flags")) 561 + goto out; 562 + 563 + if (!ASSERT_OK(system("ip link set veth2 master bond"), 564 + "add veth2 to master bond")) 565 + goto out; 566 + 567 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 568 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 569 + goto out; 570 + 571 + /* xdp-feature for bond device should be set to the most restrict 572 + * value obtained from attached slave devices (veth0 and veth2) 573 + */ 574 + if (!ASSERT_EQ(query_opts.feature_flags, 575 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 576 + NETDEV_XDP_ACT_RX_SG, 577 + "bond query_opts.feature_flags")) 578 + goto out; 579 + 580 + if (!ASSERT_OK(system("ip link set veth2 nomaster"), 581 + "del veth2 to master bond")) 582 + goto out; 583 + 584 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 585 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 586 + goto out; 587 + 588 + if (!ASSERT_EQ(query_opts.feature_flags, 589 + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | 590 + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | 591 + NETDEV_XDP_ACT_NDO_XMIT_SG, 592 + "bond query_opts.feature_flags")) 593 + goto out; 594 + 595 + if (!ASSERT_OK(system("ip link set veth0 nomaster"), 596 + "del veth0 to master bond")) 597 + goto out; 598 + 599 + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); 600 + if (!ASSERT_OK(err, "bond bpf_xdp_query")) 601 + goto out; 602 + 603 + ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, 604 + "bond query_opts.feature_flags"); 605 + out: 606 + bpf_link__destroy(link); 607 + system("ip link del veth0"); 608 + system("ip link del veth2"); 609 + system("ip link del bond"); 610 + } 611 + 496 612 static int libbpf_debug_print(enum libbpf_print_level level, 497 613 const char *format, va_list args) 498 614 { ··· 663 545 664 546 if (test__start_subtest("xdp_bonding_nested")) 665 547 test_xdp_bonding_nested(&skeletons); 548 + 549 + if (test__start_subtest("xdp_bonding_features")) 550 + test_xdp_bonding_features(&skeletons); 666 551 667 552 for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { 668 553 struct bond_test_case *test_case = &bond_test_cases[i];

+1 -3

tools/testing/selftests/bpf/progs/cb_refs.c

··· 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_tracing.h> 4 4 #include <bpf/bpf_helpers.h> 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 5 6 6 7 struct map_value { 7 8 struct prog_test_ref_kfunc __kptr *ptr; ··· 14 13 __type(value, struct map_value); 15 14 __uint(max_entries, 16); 16 15 } array_map SEC(".maps"); 17 - 18 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 19 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 20 16 21 17 static __noinline int cb1(void *map, void *key, void *value, void *ctx) 22 18 {

+1

tools/testing/selftests/bpf/progs/dynptr_fail.c

··· 3 3 4 4 #include <errno.h> 5 5 #include <string.h> 6 + #include <stdbool.h> 6 7 #include <linux/bpf.h> 7 8 #include <bpf/bpf_helpers.h> 8 9 #include <linux/if_ether.h>

+1

tools/testing/selftests/bpf/progs/dynptr_success.c

+1 -3

tools/testing/selftests/bpf/progs/jit_probe_mem.c

··· 3 3 #include <vmlinux.h> 4 4 #include <bpf/bpf_tracing.h> 5 5 #include <bpf/bpf_helpers.h> 6 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 7 7 8 static struct prog_test_ref_kfunc __kptr *v; 8 9 long total_sum = -1; 9 - 10 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 11 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 12 10 13 11 SEC("tc") 14 12 int test_jit_probe_mem(struct __sk_buff *ctx)

+1 -2

tools/testing/selftests/bpf/progs/kfunc_call_destructive.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_helpers.h> 4 - 5 - extern void bpf_kfunc_call_test_destructive(void) __ksym; 4 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 5 7 6 SEC("tc") 8 7 int kfunc_destructive_test(void)

+1 -8

tools/testing/selftests/bpf/progs/kfunc_call_fail.c

··· 2 2 /* Copyright (c) 2021 Facebook */ 3 3 #include <vmlinux.h> 4 4 #include <bpf/bpf_helpers.h> 5 - 6 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 7 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 8 - extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; 9 - extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; 10 - extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 11 - extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 12 - extern void bpf_kfunc_call_int_mem_release(int *p) __ksym; 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 13 6 14 7 struct syscall_test_args { 15 8 __u8 data[16];

+1 -2

tools/testing/selftests/bpf/progs/kfunc_call_race.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_helpers.h> 4 - 5 - extern void bpf_testmod_test_mod_kfunc(int i) __ksym; 4 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 5 7 6 SEC("tc") 8 7 int kfunc_call_fail(struct __sk_buff *ctx)

+1 -16

tools/testing/selftests/bpf/progs/kfunc_call_test.c

··· 2 2 /* Copyright (c) 2021 Facebook */ 3 3 #include <vmlinux.h> 4 4 #include <bpf/bpf_helpers.h> 5 - 6 - extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; 7 - extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; 8 - extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 9 - __u32 c, __u64 d) __ksym; 10 - 11 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 12 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 13 - extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; 14 - extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; 15 - extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; 16 - extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; 17 - extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; 18 - extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; 19 - extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; 20 - extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 21 6 22 7 SEC("tc") 23 8 int kfunc_call_test4(struct __sk_buff *skb)

+2 -7

tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2021 Facebook */ 3 - #include <linux/bpf.h> 4 - #include <bpf/bpf_helpers.h> 5 - #include "bpf_tcp_helpers.h" 3 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 6 4 7 5 extern const int bpf_prog_active __ksym; 8 - extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, 9 - __u32 c, __u64 d) __ksym; 10 - extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; 11 6 int active_res = -1; 12 7 int sk_state_res = -1; 13 8 ··· 23 28 if (active) 24 29 active_res = *active; 25 30 26 - sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state; 31 + sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; 27 32 28 33 return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); 29 34 }

+2 -3

tools/testing/selftests/bpf/progs/local_kptr_stash.c

··· 5 5 #include <bpf/bpf_tracing.h> 6 6 #include <bpf/bpf_helpers.h> 7 7 #include <bpf/bpf_core_read.h> 8 - #include "bpf_experimental.h" 8 + #include "../bpf_experimental.h" 9 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 9 10 10 11 struct node_data { 11 12 long key; ··· 32 31 * Had to do the same w/ bpf_kfunc_call_test_release below 33 32 */ 34 33 struct node_data *just_here_because_btf_bug; 35 - 36 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 37 34 38 35 struct { 39 36 __uint(type, BPF_MAP_TYPE_ARRAY);

+1 -4

tools/testing/selftests/bpf/progs/map_kptr.c

··· 2 2 #include <vmlinux.h> 3 3 #include <bpf/bpf_tracing.h> 4 4 #include <bpf/bpf_helpers.h> 5 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 5 6 6 7 struct map_value { 7 8 struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; ··· 114 113 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); 115 114 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); 116 115 DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); 117 - 118 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 119 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 120 - void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; 121 116 122 117 #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) 123 118

+1 -3

tools/testing/selftests/bpf/progs/map_kptr_fail.c

··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include <bpf/bpf_core_read.h> 6 6 #include "bpf_misc.h" 7 + #include "../bpf_testmod/bpf_testmod_kfunc.h" 7 8 8 9 struct map_value { 9 10 char buf[8]; ··· 19 18 __type(value, struct map_value); 20 19 __uint(max_entries, 1); 21 20 } array_map SEC(".maps"); 22 - 23 - extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; 24 - extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; 25 21 26 22 SEC("?tc") 27 23 __failure __msg("kptr access size must be BPF_DW")

+145

tools/testing/selftests/bpf/progs/sock_destroy_prog.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/bpf_endian.h> 6 + 7 + #include "bpf_tracing_net.h" 8 + 9 + __be16 serv_port = 0; 10 + 11 + int bpf_sock_destroy(struct sock_common *sk) __ksym; 12 + 13 + struct { 14 + __uint(type, BPF_MAP_TYPE_ARRAY); 15 + __uint(max_entries, 1); 16 + __type(key, __u32); 17 + __type(value, __u64); 18 + } tcp_conn_sockets SEC(".maps"); 19 + 20 + struct { 21 + __uint(type, BPF_MAP_TYPE_ARRAY); 22 + __uint(max_entries, 1); 23 + __type(key, __u32); 24 + __type(value, __u64); 25 + } udp_conn_sockets SEC(".maps"); 26 + 27 + SEC("cgroup/connect6") 28 + int sock_connect(struct bpf_sock_addr *ctx) 29 + { 30 + __u64 sock_cookie = 0; 31 + int key = 0; 32 + __u32 keyc = 0; 33 + 34 + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) 35 + return 1; 36 + 37 + sock_cookie = bpf_get_socket_cookie(ctx); 38 + if (ctx->protocol == IPPROTO_TCP) 39 + bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0); 40 + else if (ctx->protocol == IPPROTO_UDP) 41 + bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0); 42 + else 43 + return 1; 44 + 45 + return 1; 46 + } 47 + 48 + SEC("iter/tcp") 49 + int iter_tcp6_client(struct bpf_iter__tcp *ctx) 50 + { 51 + struct sock_common *sk_common = ctx->sk_common; 52 + __u64 sock_cookie = 0; 53 + __u64 *val; 54 + int key = 0; 55 + 56 + if (!sk_common) 57 + return 0; 58 + 59 + if (sk_common->skc_family != AF_INET6) 60 + return 0; 61 + 62 + sock_cookie = bpf_get_socket_cookie(sk_common); 63 + val = bpf_map_lookup_elem(&tcp_conn_sockets, &key); 64 + if (!val) 65 + return 0; 66 + /* Destroy connected client sockets. */ 67 + if (sock_cookie == *val) 68 + bpf_sock_destroy(sk_common); 69 + 70 + return 0; 71 + } 72 + 73 + SEC("iter/tcp") 74 + int iter_tcp6_server(struct bpf_iter__tcp *ctx) 75 + { 76 + struct sock_common *sk_common = ctx->sk_common; 77 + const struct inet_connection_sock *icsk; 78 + const struct inet_sock *inet; 79 + struct tcp6_sock *tcp_sk; 80 + __be16 srcp; 81 + 82 + if (!sk_common) 83 + return 0; 84 + 85 + if (sk_common->skc_family != AF_INET6) 86 + return 0; 87 + 88 + tcp_sk = bpf_skc_to_tcp6_sock(sk_common); 89 + if (!tcp_sk) 90 + return 0; 91 + 92 + icsk = &tcp_sk->tcp.inet_conn; 93 + inet = &icsk->icsk_inet; 94 + srcp = inet->inet_sport; 95 + 96 + /* Destroy server sockets. */ 97 + if (srcp == serv_port) 98 + bpf_sock_destroy(sk_common); 99 + 100 + return 0; 101 + } 102 + 103 + 104 + SEC("iter/udp") 105 + int iter_udp6_client(struct bpf_iter__udp *ctx) 106 + { 107 + struct udp_sock *udp_sk = ctx->udp_sk; 108 + struct sock *sk = (struct sock *) udp_sk; 109 + __u64 sock_cookie = 0, *val; 110 + int key = 0; 111 + 112 + if (!sk) 113 + return 0; 114 + 115 + sock_cookie = bpf_get_socket_cookie(sk); 116 + val = bpf_map_lookup_elem(&udp_conn_sockets, &key); 117 + if (!val) 118 + return 0; 119 + /* Destroy connected client sockets. */ 120 + if (sock_cookie == *val) 121 + bpf_sock_destroy((struct sock_common *)sk); 122 + 123 + return 0; 124 + } 125 + 126 + SEC("iter/udp") 127 + int iter_udp6_server(struct bpf_iter__udp *ctx) 128 + { 129 + struct udp_sock *udp_sk = ctx->udp_sk; 130 + struct sock *sk = (struct sock *) udp_sk; 131 + struct inet_sock *inet; 132 + __be16 srcp; 133 + 134 + if (!sk) 135 + return 0; 136 + 137 + inet = &udp_sk->inet; 138 + srcp = inet->inet_sport; 139 + if (srcp == serv_port) 140 + bpf_sock_destroy((struct sock_common *)sk); 141 + 142 + return 0; 143 + } 144 + 145 + char _license[] SEC("license") = "GPL";

+22

tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_tracing.h> 5 + #include <bpf/bpf_helpers.h> 6 + 7 + #include "bpf_misc.h" 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + int bpf_sock_destroy(struct sock_common *sk) __ksym; 12 + 13 + SEC("tp_btf/tcp_destroy_sock") 14 + __failure __msg("calling kernel function bpf_sock_destroy is not allowed") 15 + int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk) 16 + { 17 + /* should not load */ 18 + bpf_sock_destroy((struct sock_common *)sk); 19 + 20 + return 0; 21 + } 22 +

+58

tools/testing/selftests/bpf/progs/test_global_map_resize.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include "vmlinux.h" 5 + #include <bpf/bpf_helpers.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + /* rodata section */ 10 + const volatile pid_t pid; 11 + const volatile size_t bss_array_len; 12 + const volatile size_t data_array_len; 13 + 14 + /* bss section */ 15 + int sum = 0; 16 + int array[1]; 17 + 18 + /* custom data secton */ 19 + int my_array[1] SEC(".data.custom"); 20 + 21 + /* custom data section which should NOT be resizable, 22 + * since it contains a single var which is not an array 23 + */ 24 + int my_int SEC(".data.non_array"); 25 + 26 + /* custom data section which should NOT be resizable, 27 + * since its last var is not an array 28 + */ 29 + int my_array_first[1] SEC(".data.array_not_last"); 30 + int my_int_last SEC(".data.array_not_last"); 31 + 32 + SEC("tp/syscalls/sys_enter_getpid") 33 + int bss_array_sum(void *ctx) 34 + { 35 + if (pid != (bpf_get_current_pid_tgid() >> 32)) 36 + return 0; 37 + 38 + sum = 0; 39 + 40 + for (size_t i = 0; i < bss_array_len; ++i) 41 + sum += array[i]; 42 + 43 + return 0; 44 + } 45 + 46 + SEC("tp/syscalls/sys_enter_getuid") 47 + int data_array_sum(void *ctx) 48 + { 49 + if (pid != (bpf_get_current_pid_tgid() >> 32)) 50 + return 0; 51 + 52 + sum = 0; 53 + 54 + for (size_t i = 0; i < data_array_len; ++i) 55 + sum += my_array[i]; 56 + 57 + return 0; 58 + }

+4 -1

tools/testing/selftests/bpf/progs/test_sock_fields.c

··· 265 265 266 266 static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) 267 267 { 268 - __u16 *half = (__u16 *)&sk->dst_port; 268 + __u16 *half; 269 + 270 + asm volatile (""); 271 + half = (__u16 *)&sk->dst_port; 269 272 return half[0] == bpf_htons(0xcafe); 270 273 } 271 274

+1

tools/testing/selftests/bpf/progs/test_xdp_dynptr.c

··· 2 2 /* Copyright (c) 2022 Meta */ 3 3 #include <stddef.h> 4 4 #include <string.h> 5 + #include <stdbool.h> 5 6 #include <linux/bpf.h> 6 7 #include <linux/if_ether.h> 7 8 #include <linux/if_packet.h>

+9 -67

tools/testing/selftests/bpf/test_progs.c

··· 11 11 #include <signal.h> 12 12 #include <string.h> 13 13 #include <execinfo.h> /* backtrace */ 14 - #include <linux/membarrier.h> 15 14 #include <sys/sysinfo.h> /* get_nprocs */ 16 15 #include <netinet/in.h> 17 16 #include <sys/select.h> ··· 626 627 free(val_buf1); 627 628 free(val_buf2); 628 629 return err; 629 - } 630 - 631 - static int finit_module(int fd, const char *param_values, int flags) 632 - { 633 - return syscall(__NR_finit_module, fd, param_values, flags); 634 - } 635 - 636 - static int delete_module(const char *name, int flags) 637 - { 638 - return syscall(__NR_delete_module, name, flags); 639 - } 640 - 641 - /* 642 - * Trigger synchronize_rcu() in kernel. 643 - */ 644 - int kern_sync_rcu(void) 645 - { 646 - return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); 647 - } 648 - 649 - static void unload_bpf_testmod(void) 650 - { 651 - if (kern_sync_rcu()) 652 - fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); 653 - if (delete_module("bpf_testmod", 0)) { 654 - if (errno == ENOENT) { 655 - if (verbose()) 656 - fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); 657 - return; 658 - } 659 - fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); 660 - return; 661 - } 662 - if (verbose()) 663 - fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); 664 - } 665 - 666 - static int load_bpf_testmod(void) 667 - { 668 - int fd; 669 - 670 - /* ensure previous instance of the module is unloaded */ 671 - unload_bpf_testmod(); 672 - 673 - if (verbose()) 674 - fprintf(stdout, "Loading bpf_testmod.ko...\n"); 675 - 676 - fd = open("bpf_testmod.ko", O_RDONLY); 677 - if (fd < 0) { 678 - fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); 679 - return -ENOENT; 680 - } 681 - if (finit_module(fd, "", 0)) { 682 - fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); 683 - close(fd); 684 - return -EINVAL; 685 - } 686 - close(fd); 687 - 688 - if (verbose()) 689 - fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); 690 - return 0; 691 630 } 692 631 693 632 /* extern declarations for test funcs */ ··· 1657 1720 env.stderr = stderr; 1658 1721 1659 1722 env.has_testmod = true; 1660 - if (!env.list_test_names && load_bpf_testmod()) { 1661 - fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); 1662 - env.has_testmod = false; 1723 + if (!env.list_test_names) { 1724 + /* ensure previous instance of the module is unloaded */ 1725 + unload_bpf_testmod(verbose()); 1726 + 1727 + if (load_bpf_testmod(verbose())) { 1728 + fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); 1729 + env.has_testmod = false; 1730 + } 1663 1731 } 1664 1732 1665 1733 /* initializing tests */ ··· 1761 1819 close(env.saved_netns_fd); 1762 1820 out: 1763 1821 if (!env.list_test_names && env.has_testmod) 1764 - unload_bpf_testmod(); 1822 + unload_bpf_testmod(verbose()); 1765 1823 1766 1824 free_test_selector(&env.test_selector); 1767 1825 free_test_selector(&env.subtest_selector);

-1

tools/testing/selftests/bpf/test_progs.h

··· 405 405 int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); 406 406 int compare_map_keys(int map1_fd, int map2_fd); 407 407 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); 408 - int kern_sync_rcu(void); 409 408 int trigger_module_test_read(int read_sz); 410 409 int trigger_module_test_write(int write_sz); 411 410 int write_sysctl(const char *sysctl, const char *value);

+148 -22

tools/testing/selftests/bpf/test_verifier.c

··· 40 40 #include "bpf_util.h" 41 41 #include "test_btf.h" 42 42 #include "../../../include/linux/filter.h" 43 + #include "testing_helpers.h" 43 44 44 45 #ifndef ENOTSUPP 45 46 #define ENOTSUPP 524 ··· 874 873 return fd; 875 874 } 876 875 876 + static void set_root(bool set) 877 + { 878 + __u64 caps; 879 + 880 + if (set) { 881 + if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps)) 882 + perror("cap_disable_effective(CAP_SYS_ADMIN)"); 883 + } else { 884 + if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) 885 + perror("cap_disable_effective(CAP_SYS_ADMIN)"); 886 + } 887 + } 888 + 889 + static __u64 ptr_to_u64(const void *ptr) 890 + { 891 + return (uintptr_t) ptr; 892 + } 893 + 894 + static struct btf *btf__load_testmod_btf(struct btf *vmlinux) 895 + { 896 + struct bpf_btf_info info; 897 + __u32 len = sizeof(info); 898 + struct btf *btf = NULL; 899 + char name[64]; 900 + __u32 id = 0; 901 + int err, fd; 902 + 903 + /* Iterate all loaded BTF objects and find bpf_testmod, 904 + * we need SYS_ADMIN cap for that. 905 + */ 906 + set_root(true); 907 + 908 + while (true) { 909 + err = bpf_btf_get_next_id(id, &id); 910 + if (err) { 911 + if (errno == ENOENT) 912 + break; 913 + perror("bpf_btf_get_next_id failed"); 914 + break; 915 + } 916 + 917 + fd = bpf_btf_get_fd_by_id(id); 918 + if (fd < 0) { 919 + if (errno == ENOENT) 920 + continue; 921 + perror("bpf_btf_get_fd_by_id failed"); 922 + break; 923 + } 924 + 925 + memset(&info, 0, sizeof(info)); 926 + info.name_len = sizeof(name); 927 + info.name = ptr_to_u64(name); 928 + len = sizeof(info); 929 + 930 + err = bpf_obj_get_info_by_fd(fd, &info, &len); 931 + if (err) { 932 + close(fd); 933 + perror("bpf_obj_get_info_by_fd failed"); 934 + break; 935 + } 936 + 937 + if (strcmp("bpf_testmod", name)) { 938 + close(fd); 939 + continue; 940 + } 941 + 942 + btf = btf__load_from_kernel_by_id_split(id, vmlinux); 943 + if (!btf) { 944 + close(fd); 945 + break; 946 + } 947 + 948 + /* We need the fd to stay open so it can be used in fd_array. 949 + * The final cleanup call to btf__free will free btf object 950 + * and close the file descriptor. 951 + */ 952 + btf__set_fd(btf, fd); 953 + break; 954 + } 955 + 956 + set_root(false); 957 + return btf; 958 + } 959 + 960 + static struct btf *testmod_btf; 961 + static struct btf *vmlinux_btf; 962 + 963 + static void kfuncs_cleanup(void) 964 + { 965 + btf__free(testmod_btf); 966 + btf__free(vmlinux_btf); 967 + } 968 + 969 + static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array, 970 + struct kfunc_btf_id_pair *fixup_kfunc_btf_id) 971 + { 972 + /* Patch in kfunc BTF IDs */ 973 + while (fixup_kfunc_btf_id->kfunc) { 974 + int btf_id = 0; 975 + 976 + /* try to find kfunc in kernel BTF */ 977 + vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf(); 978 + if (vmlinux_btf) { 979 + btf_id = btf__find_by_name_kind(vmlinux_btf, 980 + fixup_kfunc_btf_id->kfunc, 981 + BTF_KIND_FUNC); 982 + btf_id = btf_id < 0 ? 0 : btf_id; 983 + } 984 + 985 + /* kfunc not found in kernel BTF, try bpf_testmod BTF */ 986 + if (!btf_id) { 987 + testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf); 988 + if (testmod_btf) { 989 + btf_id = btf__find_by_name_kind(testmod_btf, 990 + fixup_kfunc_btf_id->kfunc, 991 + BTF_KIND_FUNC); 992 + btf_id = btf_id < 0 ? 0 : btf_id; 993 + if (btf_id) { 994 + /* We put bpf_testmod module fd into fd_array 995 + * and its index 1 into instruction 'off'. 996 + */ 997 + *fd_array = btf__fd(testmod_btf); 998 + prog[fixup_kfunc_btf_id->insn_idx].off = 1; 999 + } 1000 + } 1001 + } 1002 + 1003 + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; 1004 + fixup_kfunc_btf_id++; 1005 + } 1006 + } 1007 + 877 1008 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, 878 - struct bpf_insn *prog, int *map_fds) 1009 + struct bpf_insn *prog, int *map_fds, int *fd_array) 879 1010 { 880 1011 int *fixup_map_hash_8b = test->fixup_map_hash_8b; 881 1012 int *fixup_map_hash_48b = test->fixup_map_hash_48b; ··· 1032 899 int *fixup_map_ringbuf = test->fixup_map_ringbuf; 1033 900 int *fixup_map_timer = test->fixup_map_timer; 1034 901 int *fixup_map_kptr = test->fixup_map_kptr; 1035 - struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; 1036 902 1037 903 if (test->fill_helper) { 1038 904 test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); ··· 1232 1100 } while (*fixup_map_kptr); 1233 1101 } 1234 1102 1235 - /* Patch in kfunc BTF IDs */ 1236 - if (fixup_kfunc_btf_id->kfunc) { 1237 - struct btf *btf; 1238 - int btf_id; 1239 - 1240 - do { 1241 - btf_id = 0; 1242 - btf = btf__load_vmlinux_btf(); 1243 - if (btf) { 1244 - btf_id = btf__find_by_name_kind(btf, 1245 - fixup_kfunc_btf_id->kfunc, 1246 - BTF_KIND_FUNC); 1247 - btf_id = btf_id < 0 ? 0 : btf_id; 1248 - } 1249 - btf__free(btf); 1250 - prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; 1251 - fixup_kfunc_btf_id++; 1252 - } while (fixup_kfunc_btf_id->kfunc); 1253 - } 1103 + fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); 1254 1104 } 1255 1105 1256 1106 struct libcap { ··· 1559 1445 int run_errs, run_successes; 1560 1446 int map_fds[MAX_NR_MAPS]; 1561 1447 const char *expected_err; 1448 + int fd_array[2] = { -1, -1 }; 1562 1449 int saved_errno; 1563 1450 int fixup_skips; 1564 1451 __u32 pflags; ··· 1573 1458 if (!prog_type) 1574 1459 prog_type = BPF_PROG_TYPE_SOCKET_FILTER; 1575 1460 fixup_skips = skips; 1576 - do_test_fixup(test, prog_type, prog, map_fds); 1461 + do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]); 1577 1462 if (test->fill_insns) { 1578 1463 prog = test->fill_insns; 1579 1464 prog_len = test->prog_len; ··· 1607 1492 else 1608 1493 opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; 1609 1494 opts.prog_flags = pflags; 1495 + if (fd_array[1] != -1) 1496 + opts.fd_array = &fd_array[0]; 1610 1497 1611 1498 if ((prog_type == BPF_PROG_TYPE_TRACING || 1612 1499 prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) { ··· 1801 1684 { 1802 1685 int i, passes = 0, errors = 0; 1803 1686 1687 + /* ensure previous instance of the module is unloaded */ 1688 + unload_bpf_testmod(verbose); 1689 + 1690 + if (load_bpf_testmod(verbose)) 1691 + return EXIT_FAILURE; 1692 + 1804 1693 for (i = from; i < to; i++) { 1805 1694 struct bpf_test *test = &tests[i]; 1806 1695 ··· 1833 1710 do_test_single(test, false, &passes, &errors); 1834 1711 } 1835 1712 } 1713 + 1714 + unload_bpf_testmod(verbose); 1715 + kfuncs_cleanup(); 1836 1716 1837 1717 printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes, 1838 1718 skips, errors);

+1 -9

tools/testing/selftests/bpf/test_xsk.sh

··· 68 68 # Run with verbose output: 69 69 # sudo ./test_xsk.sh -v 70 70 # 71 - # Run and dump packet contents: 72 - # sudo ./test_xsk.sh -D 73 - # 74 71 # Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger: 75 72 # sudo ./test_xsk.sh -d 76 73 # ··· 78 81 79 82 ETH="" 80 83 81 - while getopts "vDi:d" flag 84 + while getopts "vi:d" flag 82 85 do 83 86 case "${flag}" in 84 87 v) verbose=1;; 85 - D) dump_pkts=1;; 86 88 d) debug=1;; 87 89 i) ETH=${OPTARG};; 88 90 esac ··· 151 155 152 156 if [[ $verbose -eq 1 ]]; then 153 157 ARGS+="-v " 154 - fi 155 - 156 - if [[ $dump_pkts -eq 1 ]]; then 157 - ARGS="-D " 158 158 fi 159 159 160 160 retval=$?

+61

tools/testing/selftests/bpf/testing_helpers.c

··· 9 9 #include <bpf/libbpf.h> 10 10 #include "test_progs.h" 11 11 #include "testing_helpers.h" 12 + #include <linux/membarrier.h> 12 13 13 14 int parse_num_list(const char *s, bool **num_set, int *num_set_len) 14 15 { ··· 326 325 327 326 fclose(f); 328 327 return sample_freq; 328 + } 329 + 330 + static int finit_module(int fd, const char *param_values, int flags) 331 + { 332 + return syscall(__NR_finit_module, fd, param_values, flags); 333 + } 334 + 335 + static int delete_module(const char *name, int flags) 336 + { 337 + return syscall(__NR_delete_module, name, flags); 338 + } 339 + 340 + int unload_bpf_testmod(bool verbose) 341 + { 342 + if (kern_sync_rcu()) 343 + fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); 344 + if (delete_module("bpf_testmod", 0)) { 345 + if (errno == ENOENT) { 346 + if (verbose) 347 + fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); 348 + return -1; 349 + } 350 + fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); 351 + return -1; 352 + } 353 + if (verbose) 354 + fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); 355 + return 0; 356 + } 357 + 358 + int load_bpf_testmod(bool verbose) 359 + { 360 + int fd; 361 + 362 + if (verbose) 363 + fprintf(stdout, "Loading bpf_testmod.ko...\n"); 364 + 365 + fd = open("bpf_testmod.ko", O_RDONLY); 366 + if (fd < 0) { 367 + fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); 368 + return -ENOENT; 369 + } 370 + if (finit_module(fd, "", 0)) { 371 + fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); 372 + close(fd); 373 + return -EINVAL; 374 + } 375 + close(fd); 376 + 377 + if (verbose) 378 + fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); 379 + return 0; 380 + } 381 + 382 + /* 383 + * Trigger synchronize_rcu() in kernel. 384 + */ 385 + int kern_sync_rcu(void) 386 + { 387 + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); 329 388 }

+9

tools/testing/selftests/bpf/testing_helpers.h

··· 1 1 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 2 /* Copyright (C) 2020 Facebook, Inc. */ 3 + 4 + #ifndef __TESTING_HELPERS_H 5 + #define __TESTING_HELPERS_H 6 + 3 7 #include <stdbool.h> 4 8 #include <bpf/bpf.h> 5 9 #include <bpf/libbpf.h> ··· 29 25 bool is_glob_pattern); 30 26 31 27 __u64 read_perf_max_sample_freq(void); 28 + int load_bpf_testmod(bool verbose); 29 + int unload_bpf_testmod(bool verbose); 30 + int kern_sync_rcu(void); 31 + 32 + #endif /* __TESTING_HELPERS_H */

+5

tools/testing/selftests/bpf/xsk.h

··· 134 134 __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE); 135 135 } 136 136 137 + static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb) 138 + { 139 + prod->cached_prod -= nb; 140 + } 141 + 137 142 static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) 138 143 { 139 144 __u32 entries = xsk_cons_nb_avail(cons, nb);

+362 -407

tools/testing/selftests/bpf/xskxceiver.c

··· 76 76 #include <asm/barrier.h> 77 77 #include <linux/if_link.h> 78 78 #include <linux/if_ether.h> 79 - #include <linux/ip.h> 80 79 #include <linux/mman.h> 81 - #include <linux/udp.h> 82 80 #include <arpa/inet.h> 83 81 #include <net/if.h> 84 82 #include <locale.h> 85 83 #include <poll.h> 86 84 #include <pthread.h> 87 85 #include <signal.h> 88 - #include <stdbool.h> 89 86 #include <stdio.h> 90 87 #include <stdlib.h> 91 88 #include <string.h> ··· 91 94 #include <sys/socket.h> 92 95 #include <sys/time.h> 93 96 #include <sys/types.h> 94 - #include <sys/queue.h> 95 97 #include <time.h> 96 98 #include <unistd.h> 97 - #include <stdatomic.h> 98 99 99 100 #include "xsk_xdp_progs.skel.h" 100 101 #include "xsk.h" ··· 104 109 105 110 static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; 106 111 static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; 107 - static const char *IP1 = "192.168.100.162"; 108 - static const char *IP2 = "192.168.100.161"; 109 - static const u16 UDP_PORT1 = 2020; 110 - static const u16 UDP_PORT2 = 2121; 111 112 112 113 static void __exit_with_error(int error, const char *file, const char *func, int line) 113 114 { ··· 138 147 test->fail = true; 139 148 } 140 149 141 - static void memset32_htonl(void *dest, u32 val, u32 size) 142 - { 143 - u32 *ptr = (u32 *)dest; 144 - int i; 145 - 146 - val = htonl(val); 147 - 148 - for (i = 0; i < (size & (~0x3)); i += 4) 149 - ptr[i >> 2] = val; 150 - } 151 - 152 - /* 153 - * Fold a partial checksum 154 - * This function code has been taken from 155 - * Linux kernel include/asm-generic/checksum.h 150 + /* The payload is a word consisting of a packet sequence number in the upper 151 + * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's 152 + * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. 156 153 */ 157 - static __u16 csum_fold(__u32 csum) 154 + static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) 158 155 { 159 - u32 sum = (__force u32)csum; 156 + u32 *ptr = (u32 *)dest, i; 160 157 161 - sum = (sum & 0xffff) + (sum >> 16); 162 - sum = (sum & 0xffff) + (sum >> 16); 163 - return (__force __u16)~sum; 164 - } 165 - 166 - /* 167 - * This function code has been taken from 168 - * Linux kernel lib/checksum.c 169 - */ 170 - static u32 from64to32(u64 x) 171 - { 172 - /* add up 32-bit and 32-bit for 32+c bit */ 173 - x = (x & 0xffffffff) + (x >> 32); 174 - /* add up carry.. */ 175 - x = (x & 0xffffffff) + (x >> 32); 176 - return (u32)x; 177 - } 178 - 179 - /* 180 - * This function code has been taken from 181 - * Linux kernel lib/checksum.c 182 - */ 183 - static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 184 - { 185 - unsigned long long s = (__force u32)sum; 186 - 187 - s += (__force u32)saddr; 188 - s += (__force u32)daddr; 189 - #ifdef __BIG_ENDIAN__ 190 - s += proto + len; 191 - #else 192 - s += (proto + len) << 8; 193 - #endif 194 - return (__force __u32)from64to32(s); 195 - } 196 - 197 - /* 198 - * This function has been taken from 199 - * Linux kernel include/asm-generic/checksum.h 200 - */ 201 - static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) 202 - { 203 - return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); 204 - } 205 - 206 - static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) 207 - { 208 - u32 csum = 0; 209 - u32 cnt = 0; 210 - 211 - /* udp hdr and data */ 212 - for (; cnt < len; cnt += 2) 213 - csum += udp_pkt[cnt >> 1]; 214 - 215 - return csum_tcpudp_magic(saddr, daddr, len, proto, csum); 158 + start /= sizeof(*ptr); 159 + size /= sizeof(*ptr); 160 + for (i = 0; i < size; i++) 161 + ptr[i] = htonl(pkt_nb << 16 | (i + start)); 216 162 } 217 163 218 164 static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) 219 165 { 220 166 memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); 221 167 memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); 222 - eth_hdr->h_proto = htons(ETH_P_IP); 223 - } 224 - 225 - static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) 226 - { 227 - ip_hdr->version = IP_PKT_VER; 228 - ip_hdr->ihl = 0x5; 229 - ip_hdr->tos = IP_PKT_TOS; 230 - ip_hdr->tot_len = htons(IP_PKT_SIZE); 231 - ip_hdr->id = 0; 232 - ip_hdr->frag_off = 0; 233 - ip_hdr->ttl = IPDEFTTL; 234 - ip_hdr->protocol = IPPROTO_UDP; 235 - ip_hdr->saddr = ifobject->src_ip; 236 - ip_hdr->daddr = ifobject->dst_ip; 237 - ip_hdr->check = 0; 238 - } 239 - 240 - static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, 241 - struct udphdr *udp_hdr) 242 - { 243 - udp_hdr->source = htons(ifobject->src_port); 244 - udp_hdr->dest = htons(ifobject->dst_port); 245 - udp_hdr->len = htons(UDP_PKT_SIZE); 246 - memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); 168 + eth_hdr->h_proto = htons(ETH_P_LOOPBACK); 247 169 } 248 170 249 171 static bool is_umem_valid(struct ifobject *ifobj) ··· 164 260 return !!ifobj->umem->umem; 165 261 } 166 262 167 - static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) 168 - { 169 - udp_hdr->check = 0; 170 - udp_hdr->check = 171 - udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); 172 - } 173 - 174 263 static u32 mode_to_xdp_flags(enum test_mode mode) 175 264 { 176 265 return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; 177 266 } 178 267 179 - static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) 268 + static u64 umem_size(struct xsk_umem_info *umem) 269 + { 270 + return umem->num_frames * umem->frame_size; 271 + } 272 + 273 + static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, 274 + u64 size) 180 275 { 181 276 struct xsk_umem_config cfg = { 182 277 .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, ··· 195 292 return ret; 196 293 197 294 umem->buffer = buffer; 295 + if (ifobj->shared_umem && ifobj->rx_on) { 296 + umem->base_addr = umem_size(umem); 297 + umem->next_buffer = umem_size(umem); 298 + } 299 + 198 300 return 0; 301 + } 302 + 303 + static u64 umem_alloc_buffer(struct xsk_umem_info *umem) 304 + { 305 + u64 addr; 306 + 307 + addr = umem->next_buffer; 308 + umem->next_buffer += umem->frame_size; 309 + if (umem->next_buffer >= umem->base_addr + umem_size(umem)) 310 + umem->next_buffer = umem->base_addr; 311 + 312 + return addr; 313 + } 314 + 315 + static void umem_reset_alloc(struct xsk_umem_info *umem) 316 + { 317 + umem->next_buffer = 0; 199 318 } 200 319 201 320 static void enable_busy_poll(struct xsk_socket_info *xsk) ··· 279 354 exit_with_error(ENOMEM); 280 355 } 281 356 umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 282 - ret = xsk_configure_umem(umem, bufs, umem_sz); 357 + ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz); 283 358 if (ret) 284 359 exit_with_error(-ret); 285 360 ··· 305 380 static struct option long_options[] = { 306 381 {"interface", required_argument, 0, 'i'}, 307 382 {"busy-poll", no_argument, 0, 'b'}, 308 - {"dump-pkts", no_argument, 0, 'D'}, 309 383 {"verbose", no_argument, 0, 'v'}, 310 384 {0, 0, 0, 0} 311 385 }; ··· 315 391 " Usage: %s [OPTIONS]\n" 316 392 " Options:\n" 317 393 " -i, --interface Use interface\n" 318 - " -D, --dump-pkts Dump packets L2 - L5\n" 319 394 " -v, --verbose Verbose output\n" 320 395 " -b, --busy-poll Enable busy poll\n"; 321 396 ··· 338 415 opterr = 0; 339 416 340 417 for (;;) { 341 - c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); 418 + c = getopt_long(argc, argv, "i:vb", long_options, &option_index); 342 419 if (c == -1) 343 420 break; 344 421 ··· 359 436 exit_with_error(errno); 360 437 361 438 interface_nb++; 362 - break; 363 - case 'D': 364 - opt_pkt_dump = true; 365 439 break; 366 440 case 'v': 367 441 opt_verbose = true; ··· 402 482 memset(ifobj->umem, 0, sizeof(*ifobj->umem)); 403 483 ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; 404 484 ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 405 - if (ifobj->shared_umem && ifobj->rx_on) 406 - ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS * 407 - XSK_UMEM__DEFAULT_FRAME_SIZE; 408 485 409 486 for (j = 0; j < MAX_SOCKETS; j++) { 410 487 memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); ··· 471 554 static void pkt_stream_reset(struct pkt_stream *pkt_stream) 472 555 { 473 556 if (pkt_stream) 474 - pkt_stream->rx_pkt_nb = 0; 557 + pkt_stream->current_pkt_nb = 0; 475 558 } 476 559 477 - static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) 560 + static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) 478 561 { 479 - if (pkt_nb >= pkt_stream->nb_pkts) 562 + if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) 480 563 return NULL; 481 564 482 - return &pkt_stream->pkts[pkt_nb]; 565 + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; 483 566 } 484 567 485 568 static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) 486 569 { 487 - while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { 570 + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { 488 571 (*pkts_sent)++; 489 - if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) 490 - return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; 491 - pkt_stream->rx_pkt_nb++; 572 + if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) 573 + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; 574 + pkt_stream->current_pkt_nb++; 492 575 } 493 576 return NULL; 494 577 } ··· 533 616 return pkt_stream; 534 617 } 535 618 536 - static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) 619 + static u32 ceil_u32(u32 a, u32 b) 537 620 { 538 - pkt->addr = addr + umem->base_addr; 621 + return (a + b - 1) / b; 622 + } 623 + 624 + static u32 pkt_nb_frags(u32 frame_size, struct pkt *pkt) 625 + { 626 + if (!pkt || !pkt->valid) 627 + return 1; 628 + return ceil_u32(pkt->len, frame_size); 629 + } 630 + 631 + static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) 632 + { 633 + pkt->offset = offset; 539 634 pkt->len = len; 540 635 if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) 541 636 pkt->valid = false; 542 637 else 543 638 pkt->valid = true; 639 + } 640 + 641 + static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) 642 + { 643 + return ceil_u32(len, umem->frame_size) * umem->frame_size; 544 644 } 545 645 546 646 static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) ··· 569 635 if (!pkt_stream) 570 636 exit_with_error(ENOMEM); 571 637 638 + pkt_stream->nb_pkts = nb_pkts; 639 + pkt_stream->max_pkt_len = pkt_len; 572 640 for (i = 0; i < nb_pkts; i++) { 573 - pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, 574 - pkt_len); 575 - pkt_stream->pkts[i].payload = i; 641 + struct pkt *pkt = &pkt_stream->pkts[i]; 642 + 643 + pkt_set(umem, pkt, 0, pkt_len); 644 + pkt->pkt_nb = i; 576 645 } 577 646 578 647 return pkt_stream; ··· 606 669 607 670 pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); 608 671 for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) 609 - pkt_set(umem, &pkt_stream->pkts[i], 610 - (i % umem->num_frames) * umem->frame_size + offset, pkt_len); 672 + pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); 611 673 612 674 ifobj->pkt_stream = pkt_stream; 613 675 } ··· 630 694 pkt_stream->pkts[i].valid = false; 631 695 } 632 696 633 - static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) 697 + static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) 634 698 { 635 - struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); 636 - struct udphdr *udp_hdr; 637 - struct ethhdr *eth_hdr; 638 - struct iphdr *ip_hdr; 639 - void *data; 699 + if (!pkt->valid) 700 + return pkt->offset; 701 + return pkt->offset + umem_alloc_buffer(umem); 702 + } 640 703 641 - if (!pkt) 642 - return NULL; 643 - if (!pkt->valid || pkt->len < MIN_PKT_SIZE) 644 - return pkt; 704 + static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, 705 + u32 bytes_written) 706 + { 707 + void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); 645 708 646 - data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); 647 - udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); 648 - ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr)); 649 - eth_hdr = (struct ethhdr *)data; 709 + if (len < MIN_PKT_SIZE) 710 + return; 650 711 651 - gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr); 652 - gen_ip_hdr(ifobject, ip_hdr); 653 - gen_udp_csum(udp_hdr, ip_hdr); 654 - gen_eth_hdr(ifobject, eth_hdr); 712 + if (!bytes_written) { 713 + gen_eth_hdr(ifobject, data); 655 714 656 - return pkt; 715 + len -= PKT_HDR_SIZE; 716 + data += PKT_HDR_SIZE; 717 + } else { 718 + bytes_written -= PKT_HDR_SIZE; 719 + } 720 + 721 + write_payload(data, pkt_nb, bytes_written, len); 657 722 } 658 723 659 724 static void __pkt_stream_generate_custom(struct ifobject *ifobj, ··· 668 731 exit_with_error(ENOMEM); 669 732 670 733 for (i = 0; i < nb_pkts; i++) { 671 - pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr; 672 - pkt_stream->pkts[i].len = pkts[i].len; 673 - pkt_stream->pkts[i].payload = i; 674 - pkt_stream->pkts[i].valid = pkts[i].valid; 734 + struct pkt *pkt = &pkt_stream->pkts[i]; 735 + 736 + pkt->offset = pkts[i].offset; 737 + pkt->len = pkts[i].len; 738 + pkt->pkt_nb = i; 739 + pkt->valid = pkts[i].valid; 740 + if (pkt->len > pkt_stream->max_pkt_len) 741 + pkt_stream->max_pkt_len = pkt->len; 675 742 } 676 743 677 744 ifobj->pkt_stream = pkt_stream; ··· 687 746 __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts); 688 747 } 689 748 690 - static void pkt_dump(void *pkt, u32 len) 749 + static void pkt_print_data(u32 *data, u32 cnt) 691 750 { 692 - char s[INET_ADDRSTRLEN]; 693 - struct ethhdr *ethhdr; 694 - struct udphdr *udphdr; 695 - struct iphdr *iphdr; 696 - u32 payload, i; 751 + u32 i; 697 752 698 - ethhdr = pkt; 699 - iphdr = pkt + sizeof(*ethhdr); 700 - udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr); 753 + for (i = 0; i < cnt; i++) { 754 + u32 seqnum, pkt_nb; 701 755 702 - /*extract L2 frame */ 703 - fprintf(stdout, "DEBUG>> L2: dst mac: "); 704 - for (i = 0; i < ETH_ALEN; i++) 705 - fprintf(stdout, "%02X", ethhdr->h_dest[i]); 706 - 707 - fprintf(stdout, "\nDEBUG>> L2: src mac: "); 708 - for (i = 0; i < ETH_ALEN; i++) 709 - fprintf(stdout, "%02X", ethhdr->h_source[i]); 710 - 711 - /*extract L3 frame */ 712 - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); 713 - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", 714 - inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); 715 - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", 716 - inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); 717 - /*extract L4 frame */ 718 - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); 719 - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); 720 - /*extract L5 frame */ 721 - payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE))); 722 - 723 - fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); 724 - fprintf(stdout, "---------------------------------------\n"); 756 + seqnum = ntohl(*data) & 0xffff; 757 + pkt_nb = ntohl(*data) >> 16; 758 + fprintf(stdout, "%u:%u ", pkt_nb, seqnum); 759 + data++; 760 + } 725 761 } 726 762 727 - static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, 728 - u64 pkt_stream_addr) 763 + static void pkt_dump(void *pkt, u32 len, bool eth_header) 764 + { 765 + struct ethhdr *ethhdr = pkt; 766 + u32 i, *data; 767 + 768 + if (eth_header) { 769 + /*extract L2 frame */ 770 + fprintf(stdout, "DEBUG>> L2: dst mac: "); 771 + for (i = 0; i < ETH_ALEN; i++) 772 + fprintf(stdout, "%02X", ethhdr->h_dest[i]); 773 + 774 + fprintf(stdout, "\nDEBUG>> L2: src mac: "); 775 + for (i = 0; i < ETH_ALEN; i++) 776 + fprintf(stdout, "%02X", ethhdr->h_source[i]); 777 + 778 + data = pkt + PKT_HDR_SIZE; 779 + } else { 780 + data = pkt; 781 + } 782 + 783 + /*extract L5 frame */ 784 + fprintf(stdout, "\nDEBUG>> L5: seqnum: "); 785 + pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); 786 + fprintf(stdout, "...."); 787 + if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { 788 + fprintf(stdout, "\n.... "); 789 + pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, 790 + PKT_DUMP_NB_TO_PRINT); 791 + } 792 + fprintf(stdout, "\n---------------------------------------\n"); 793 + } 794 + 795 + static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) 729 796 { 730 797 u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; 731 - u32 offset = addr % umem->frame_size, expected_offset = 0; 798 + u32 offset = addr % umem->frame_size, expected_offset; 799 + int pkt_offset = pkt->valid ? pkt->offset : 0; 732 800 733 - if (!pkt_stream->use_addr_for_fill) 734 - pkt_stream_addr = 0; 801 + if (!umem->unaligned_mode) 802 + pkt_offset = 0; 735 803 736 - expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; 804 + expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; 737 805 738 806 if (offset == expected_offset) 739 807 return true; ··· 756 806 void *data = xsk_umem__get_data(buffer, addr); 757 807 struct xdp_info *meta = data - sizeof(struct xdp_info); 758 808 759 - if (meta->count != pkt->payload) { 809 + if (meta->count != pkt->pkt_nb) { 760 810 ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", 761 - __func__, pkt->payload, meta->count); 811 + __func__, pkt->pkt_nb, meta->count); 762 812 return false; 763 813 } 764 814 ··· 768 818 static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) 769 819 { 770 820 void *data = xsk_umem__get_data(buffer, addr); 771 - struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); 821 + u32 seqnum, pkt_data; 772 822 773 823 if (!pkt) { 774 824 ksft_print_msg("[%s] too many packets received\n", __func__); 775 - return false; 825 + goto error; 776 826 } 777 827 778 828 if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { ··· 783 833 if (pkt->len != len) { 784 834 ksft_print_msg("[%s] expected length [%d], got length [%d]\n", 785 835 __func__, pkt->len, len); 786 - return false; 836 + goto error; 787 837 } 788 838 789 - if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { 790 - u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); 839 + pkt_data = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); 840 + seqnum = pkt_data >> 16; 791 841 792 - if (opt_pkt_dump) 793 - pkt_dump(data, PKT_SIZE); 794 - 795 - if (pkt->payload != seqnum) { 796 - ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", 797 - __func__, pkt->payload, seqnum); 798 - return false; 799 - } 800 - } else { 801 - ksft_print_msg("Invalid frame received: "); 802 - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, 803 - iphdr->tos); 804 - return false; 842 + if (pkt->pkt_nb != seqnum) { 843 + ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", 844 + __func__, pkt->pkt_nb, seqnum); 845 + goto error; 805 846 } 806 847 807 848 return true; 849 + 850 + error: 851 + pkt_dump(data, len, true); 852 + return false; 808 853 } 809 854 810 855 static void kick_tx(struct xsk_socket_info *xsk) ··· 921 976 addr = xsk_umem__add_offset_to_addr(addr); 922 977 923 978 if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || 924 - !is_offset_correct(umem, pkt_stream, addr, pkt->addr) || 979 + !is_offset_correct(umem, pkt, addr) || 925 980 (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) 926 981 return TEST_FAILURE; 927 982 ··· 937 992 938 993 pthread_mutex_lock(&pacing_mutex); 939 994 pkts_in_flight -= pkts_sent; 940 - if (pkts_in_flight < umem->num_frames) 941 - pthread_cond_signal(&pacing_cond); 942 995 pthread_mutex_unlock(&pacing_mutex); 943 996 pkts_sent = 0; 944 997 } ··· 944 1001 return TEST_PASS; 945 1002 } 946 1003 947 - static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds, 948 - bool timeout) 1004 + static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) 949 1005 { 950 1006 struct xsk_socket_info *xsk = ifobject->xsk; 1007 + struct xsk_umem_info *umem = ifobject->umem; 1008 + u32 i, idx = 0, valid_pkts = 0, buffer_len; 951 1009 bool use_poll = ifobject->use_poll; 952 - u32 i, idx = 0, valid_pkts = 0; 953 1010 int ret; 1011 + 1012 + buffer_len = pkt_get_buffer_len(umem, ifobject->pkt_stream->max_pkt_len); 1013 + /* pkts_in_flight might be negative if many invalid packets are sent */ 1014 + if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { 1015 + kick_tx(xsk); 1016 + return TEST_CONTINUE; 1017 + } 954 1018 955 1019 while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { 956 1020 if (use_poll) { ··· 984 1034 985 1035 for (i = 0; i < BATCH_SIZE; i++) { 986 1036 struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); 987 - struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); 1037 + struct pkt *pkt = pkt_stream_get_next_tx_pkt(ifobject->pkt_stream); 988 1038 989 1039 if (!pkt) 990 1040 break; 991 1041 992 - tx_desc->addr = pkt->addr; 1042 + tx_desc->addr = pkt_get_addr(pkt, umem); 993 1043 tx_desc->len = pkt->len; 994 - (*pkt_nb)++; 995 - if (pkt->valid) 1044 + if (pkt->valid) { 996 1045 valid_pkts++; 1046 + pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, 0); 1047 + } 997 1048 } 998 1049 999 1050 pthread_mutex_lock(&pacing_mutex); 1000 1051 pkts_in_flight += valid_pkts; 1001 - /* pkts_in_flight might be negative if many invalid packets are sent */ 1002 - if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { 1003 - kick_tx(xsk); 1004 - pthread_cond_wait(&pacing_cond, &pacing_mutex); 1005 - } 1006 1052 pthread_mutex_unlock(&pacing_mutex); 1007 1053 1008 1054 xsk_ring_prod__submit(&xsk->tx, i); ··· 1034 1088 1035 1089 static int send_pkts(struct test_spec *test, struct ifobject *ifobject) 1036 1090 { 1091 + struct pkt_stream *pkt_stream = ifobject->pkt_stream; 1037 1092 bool timeout = !is_umem_valid(test->ifobj_rx); 1038 1093 struct pollfd fds = { }; 1039 - u32 pkt_cnt = 0, ret; 1094 + u32 ret; 1040 1095 1041 1096 fds.fd = xsk_socket__fd(ifobject->xsk->xsk); 1042 1097 fds.events = POLLOUT; 1043 1098 1044 - while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { 1045 - ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout); 1099 + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { 1100 + ret = __send_pkts(ifobject, &fds, timeout); 1101 + if (ret == TEST_CONTINUE && !test->fail) 1102 + continue; 1046 1103 if ((ret || test->fail) && !timeout) 1047 1104 return TEST_FAILURE; 1048 - else if (ret == TEST_PASS && timeout) 1105 + if (ret == TEST_PASS && timeout) 1049 1106 return ret; 1050 1107 } 1051 1108 ··· 1198 1249 ifobject->xsk = &ifobject->xsk_arr[0]; 1199 1250 ifobject->xskmap = test->ifobj_rx->xskmap; 1200 1251 memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); 1252 + ifobject->umem->base_addr = 0; 1201 1253 } 1202 1254 1203 - static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) 1255 + static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, 1256 + bool fill_up) 1204 1257 { 1205 - u32 idx = 0, i, buffers_to_fill; 1258 + u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; 1259 + u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; 1206 1260 int ret; 1207 1261 1208 1262 if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) ··· 1216 1264 ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); 1217 1265 if (ret != buffers_to_fill) 1218 1266 exit_with_error(ENOSPC); 1219 - for (i = 0; i < buffers_to_fill; i++) { 1267 + 1268 + while (filled < buffers_to_fill) { 1269 + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); 1220 1270 u64 addr; 1271 + u32 i; 1221 1272 1222 - if (pkt_stream->use_addr_for_fill) { 1223 - struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); 1273 + for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt); i++) { 1274 + if (!pkt) { 1275 + if (!fill_up) 1276 + break; 1277 + addr = filled * umem->frame_size + umem->base_addr; 1278 + } else if (pkt->offset >= 0) { 1279 + addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); 1280 + } else { 1281 + addr = pkt->offset + umem_alloc_buffer(umem); 1282 + } 1224 1283 1225 - if (!pkt) 1284 + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; 1285 + if (++filled >= buffers_to_fill) 1226 1286 break; 1227 - addr = pkt->addr; 1228 - } else { 1229 - addr = i * umem->frame_size; 1230 1287 } 1231 - 1232 - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; 1233 1288 } 1234 - xsk_ring_prod__submit(&umem->fq, i); 1289 + xsk_ring_prod__submit(&umem->fq, filled); 1290 + xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); 1291 + 1292 + pkt_stream_reset(pkt_stream); 1293 + umem_reset_alloc(umem); 1235 1294 } 1236 1295 1237 1296 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) ··· 1263 1300 if (bufs == MAP_FAILED) 1264 1301 exit_with_error(errno); 1265 1302 1266 - ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); 1303 + ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); 1267 1304 if (ret) 1268 1305 exit_with_error(-ret); 1269 - 1270 - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); 1271 1306 1272 1307 xsk_configure_socket(test, ifobject, ifobject->umem, false); 1273 1308 ··· 1273 1312 1274 1313 if (!ifobject->rx_on) 1275 1314 return; 1315 + 1316 + xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); 1276 1317 1277 1318 ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); 1278 1319 if (ret) ··· 1333 1370 1334 1371 if (!err && ifobject->validation_func) 1335 1372 err = ifobject->validation_func(ifobject); 1336 - if (err) { 1373 + if (err) 1337 1374 report_failure(test); 1338 - pthread_mutex_lock(&pacing_mutex); 1339 - pthread_cond_signal(&pacing_cond); 1340 - pthread_mutex_unlock(&pacing_mutex); 1341 - } 1342 1375 1343 1376 pthread_exit(NULL); 1344 1377 } ··· 1361 1402 pthread_exit(NULL); 1362 1403 } 1363 1404 1364 - static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj) 1405 + static bool xdp_prog_changed_rx(struct test_spec *test) 1365 1406 { 1407 + struct ifobject *ifobj = test->ifobj_rx; 1408 + 1366 1409 return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; 1410 + } 1411 + 1412 + static bool xdp_prog_changed_tx(struct test_spec *test) 1413 + { 1414 + struct ifobject *ifobj = test->ifobj_tx; 1415 + 1416 + return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; 1367 1417 } 1368 1418 1369 1419 static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, ··· 1401 1433 static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, 1402 1434 struct ifobject *ifobj_tx) 1403 1435 { 1404 - if (xdp_prog_changed(test, ifobj_rx)) 1436 + if (xdp_prog_changed_rx(test)) 1405 1437 xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); 1406 1438 1407 1439 if (!ifobj_tx || ifobj_tx->shared_umem) 1408 1440 return; 1409 1441 1410 - if (xdp_prog_changed(test, ifobj_tx)) 1442 + if (xdp_prog_changed_tx(test)) 1411 1443 xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); 1412 1444 } 1413 1445 ··· 1416 1448 { 1417 1449 pthread_t t0, t1; 1418 1450 1419 - if (ifobj2) 1451 + if (ifobj2) { 1420 1452 if (pthread_barrier_init(&barr, NULL, 2)) 1421 1453 exit_with_error(errno); 1454 + pkt_stream_reset(ifobj2->pkt_stream); 1455 + } 1422 1456 1423 1457 test->current_step++; 1424 1458 pkt_stream_reset(ifobj1->pkt_stream); ··· 1463 1493 struct ifobject *ifobj_rx = test->ifobj_rx; 1464 1494 struct ifobject *ifobj_tx = test->ifobj_tx; 1465 1495 1496 + if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || 1497 + (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { 1498 + ksft_test_result_skip("No huge pages present.\n"); 1499 + return TEST_SKIP; 1500 + } 1501 + 1466 1502 xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); 1467 1503 return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); 1468 1504 } ··· 1478 1502 return __testapp_validate_traffic(test, ifobj, NULL); 1479 1503 } 1480 1504 1481 - static void testapp_teardown(struct test_spec *test) 1505 + static int testapp_teardown(struct test_spec *test) 1482 1506 { 1483 1507 int i; 1484 1508 1485 1509 test_spec_set_name(test, "TEARDOWN"); 1486 1510 for (i = 0; i < MAX_TEARDOWN_ITER; i++) { 1487 1511 if (testapp_validate_traffic(test)) 1488 - return; 1512 + return TEST_FAILURE; 1489 1513 test_spec_reset(test); 1490 1514 } 1515 + 1516 + return TEST_PASS; 1491 1517 } 1492 1518 1493 1519 static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) ··· 1504 1526 *ifobj2 = tmp_ifobj; 1505 1527 } 1506 1528 1507 - static void testapp_bidi(struct test_spec *test) 1529 + static int testapp_bidi(struct test_spec *test) 1508 1530 { 1531 + int res; 1532 + 1509 1533 test_spec_set_name(test, "BIDIRECTIONAL"); 1510 1534 test->ifobj_tx->rx_on = true; 1511 1535 test->ifobj_rx->tx_on = true; 1512 1536 test->total_steps = 2; 1513 1537 if (testapp_validate_traffic(test)) 1514 - return; 1538 + return TEST_FAILURE; 1515 1539 1516 1540 print_verbose("Switching Tx/Rx vectors\n"); 1517 1541 swap_directions(&test->ifobj_rx, &test->ifobj_tx); 1518 - __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); 1542 + res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); 1519 1543 1520 1544 swap_directions(&test->ifobj_rx, &test->ifobj_tx); 1545 + return res; 1521 1546 } 1522 1547 1523 1548 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) ··· 1537 1556 exit_with_error(errno); 1538 1557 } 1539 1558 1540 - static void testapp_bpf_res(struct test_spec *test) 1559 + static int testapp_bpf_res(struct test_spec *test) 1541 1560 { 1542 1561 test_spec_set_name(test, "BPF_RES"); 1543 1562 test->total_steps = 2; 1544 1563 test->nb_sockets = 2; 1545 1564 if (testapp_validate_traffic(test)) 1546 - return; 1565 + return TEST_FAILURE; 1547 1566 1548 1567 swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); 1549 - testapp_validate_traffic(test); 1568 + return testapp_validate_traffic(test); 1550 1569 } 1551 1570 1552 - static void testapp_headroom(struct test_spec *test) 1571 + static int testapp_headroom(struct test_spec *test) 1553 1572 { 1554 1573 test_spec_set_name(test, "UMEM_HEADROOM"); 1555 1574 test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; 1556 - testapp_validate_traffic(test); 1575 + return testapp_validate_traffic(test); 1557 1576 } 1558 1577 1559 - static void testapp_stats_rx_dropped(struct test_spec *test) 1578 + static int testapp_stats_rx_dropped(struct test_spec *test) 1560 1579 { 1561 1580 test_spec_set_name(test, "STAT_RX_DROPPED"); 1581 + if (test->mode == TEST_MODE_ZC) { 1582 + ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); 1583 + return TEST_SKIP; 1584 + } 1585 + 1562 1586 pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); 1563 1587 test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - 1564 1588 XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; 1565 1589 pkt_stream_receive_half(test); 1566 1590 test->ifobj_rx->validation_func = validate_rx_dropped; 1567 - testapp_validate_traffic(test); 1591 + return testapp_validate_traffic(test); 1568 1592 } 1569 1593 1570 - static void testapp_stats_tx_invalid_descs(struct test_spec *test) 1594 + static int testapp_stats_tx_invalid_descs(struct test_spec *test) 1571 1595 { 1572 1596 test_spec_set_name(test, "STAT_TX_INVALID"); 1573 1597 pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); 1574 1598 test->ifobj_tx->validation_func = validate_tx_invalid_descs; 1575 - testapp_validate_traffic(test); 1599 + return testapp_validate_traffic(test); 1576 1600 } 1577 1601 1578 - static void testapp_stats_rx_full(struct test_spec *test) 1602 + static int testapp_stats_rx_full(struct test_spec *test) 1579 1603 { 1580 1604 test_spec_set_name(test, "STAT_RX_FULL"); 1581 - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); 1605 + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); 1582 1606 test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 1583 - DEFAULT_UMEM_BUFFERS, PKT_SIZE); 1584 - if (!test->ifobj_rx->pkt_stream) 1585 - exit_with_error(ENOMEM); 1607 + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); 1586 1608 1587 1609 test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; 1588 1610 test->ifobj_rx->release_rx = false; 1589 1611 test->ifobj_rx->validation_func = validate_rx_full; 1590 - testapp_validate_traffic(test); 1612 + return testapp_validate_traffic(test); 1591 1613 } 1592 1614 1593 - static void testapp_stats_fill_empty(struct test_spec *test) 1615 + static int testapp_stats_fill_empty(struct test_spec *test) 1594 1616 { 1595 1617 test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); 1596 - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); 1618 + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); 1597 1619 test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 1598 - DEFAULT_UMEM_BUFFERS, PKT_SIZE); 1599 - if (!test->ifobj_rx->pkt_stream) 1600 - exit_with_error(ENOMEM); 1620 + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); 1601 1621 1602 1622 test->ifobj_rx->use_fill_ring = false; 1603 1623 test->ifobj_rx->validation_func = validate_fill_empty; 1604 - testapp_validate_traffic(test); 1624 + return testapp_validate_traffic(test); 1605 1625 } 1606 1626 1607 - /* Simple test */ 1608 - static bool hugepages_present(struct ifobject *ifobject) 1627 + static int testapp_unaligned(struct test_spec *test) 1609 1628 { 1610 - size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; 1611 - void *bufs; 1612 - 1613 - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1614 - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB, -1, 0); 1615 - if (bufs == MAP_FAILED) 1616 - return false; 1617 - 1618 - mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; 1619 - munmap(bufs, mmap_sz); 1620 - return true; 1621 - } 1622 - 1623 - static bool testapp_unaligned(struct test_spec *test) 1624 - { 1625 - if (!hugepages_present(test->ifobj_tx)) { 1626 - ksft_test_result_skip("No 2M huge pages present.\n"); 1627 - return false; 1628 - } 1629 - 1630 1629 test_spec_set_name(test, "UNALIGNED_MODE"); 1631 1630 test->ifobj_tx->umem->unaligned_mode = true; 1632 1631 test->ifobj_rx->umem->unaligned_mode = true; 1633 - /* Let half of the packets straddle a buffer boundrary */ 1634 - pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); 1635 - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; 1636 - testapp_validate_traffic(test); 1632 + /* Let half of the packets straddle a 4K buffer boundary */ 1633 + pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2); 1637 1634 1638 - return true; 1635 + return testapp_validate_traffic(test); 1639 1636 } 1640 1637 1641 - static void testapp_single_pkt(struct test_spec *test) 1638 + static int testapp_single_pkt(struct test_spec *test) 1642 1639 { 1643 - struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; 1640 + struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; 1644 1641 1645 1642 pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); 1646 - testapp_validate_traffic(test); 1643 + return testapp_validate_traffic(test); 1647 1644 } 1648 1645 1649 - static void testapp_invalid_desc(struct test_spec *test) 1646 + static int testapp_invalid_desc(struct test_spec *test) 1650 1647 { 1651 - u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; 1648 + struct xsk_umem_info *umem = test->ifobj_tx->umem; 1649 + u64 umem_size = umem->num_frames * umem->frame_size; 1652 1650 struct pkt pkts[] = { 1653 1651 /* Zero packet address allowed */ 1654 - {0, PKT_SIZE, 0, true}, 1652 + {0, MIN_PKT_SIZE, 0, true}, 1655 1653 /* Allowed packet */ 1656 - {0x1000, PKT_SIZE, 0, true}, 1654 + {0, MIN_PKT_SIZE, 0, true}, 1657 1655 /* Straddling the start of umem */ 1658 - {-2, PKT_SIZE, 0, false}, 1656 + {-2, MIN_PKT_SIZE, 0, false}, 1659 1657 /* Packet too large */ 1660 - {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, 1658 + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, 1661 1659 /* Up to end of umem allowed */ 1662 - {umem_size - PKT_SIZE, PKT_SIZE, 0, true}, 1660 + {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, 1663 1661 /* After umem ends */ 1664 - {umem_size, PKT_SIZE, 0, false}, 1662 + {umem_size, MIN_PKT_SIZE, 0, false}, 1665 1663 /* Straddle the end of umem */ 1666 - {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false}, 1667 - /* Straddle a page boundrary */ 1668 - {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, 1669 - /* Straddle a 2K boundrary */ 1670 - {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, 1664 + {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, 1665 + /* Straddle a 4K boundary */ 1666 + {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, 1667 + /* Straddle a 2K boundary */ 1668 + {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, 1671 1669 /* Valid packet for synch so that something is received */ 1672 - {0x4000, PKT_SIZE, 0, true}}; 1670 + {0, MIN_PKT_SIZE, 0, true}}; 1673 1671 1674 - if (test->ifobj_tx->umem->unaligned_mode) { 1675 - /* Crossing a page boundrary allowed */ 1672 + if (umem->unaligned_mode) { 1673 + /* Crossing a page boundary allowed */ 1676 1674 pkts[7].valid = true; 1677 1675 } 1678 - if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { 1679 - /* Crossing a 2K frame size boundrary not allowed */ 1676 + if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { 1677 + /* Crossing a 2K frame size boundary not allowed */ 1680 1678 pkts[8].valid = false; 1681 1679 } 1682 1680 1683 1681 if (test->ifobj_tx->shared_umem) { 1684 - pkts[4].addr += umem_size; 1685 - pkts[5].addr += umem_size; 1686 - pkts[6].addr += umem_size; 1682 + pkts[4].offset += umem_size; 1683 + pkts[5].offset += umem_size; 1684 + pkts[6].offset += umem_size; 1687 1685 } 1688 1686 1689 1687 pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); 1690 - testapp_validate_traffic(test); 1688 + return testapp_validate_traffic(test); 1691 1689 } 1692 1690 1693 - static void testapp_xdp_drop(struct test_spec *test) 1691 + static int testapp_xdp_drop(struct test_spec *test) 1694 1692 { 1695 1693 struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; 1696 1694 struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; ··· 1679 1719 skel_rx->maps.xsk, skel_tx->maps.xsk); 1680 1720 1681 1721 pkt_stream_receive_half(test); 1682 - testapp_validate_traffic(test); 1722 + return testapp_validate_traffic(test); 1683 1723 } 1684 1724 1685 - static void testapp_xdp_metadata_count(struct test_spec *test) 1725 + static int testapp_xdp_metadata_count(struct test_spec *test) 1686 1726 { 1687 1727 struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; 1688 1728 struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; ··· 1703 1743 if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) 1704 1744 exit_with_error(errno); 1705 1745 1706 - testapp_validate_traffic(test); 1746 + return testapp_validate_traffic(test); 1707 1747 } 1708 1748 1709 - static void testapp_poll_txq_tmout(struct test_spec *test) 1749 + static int testapp_poll_txq_tmout(struct test_spec *test) 1710 1750 { 1711 1751 test_spec_set_name(test, "POLL_TXQ_FULL"); 1712 1752 ··· 1714 1754 /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ 1715 1755 test->ifobj_tx->umem->frame_size = 2048; 1716 1756 pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); 1717 - testapp_validate_traffic_single_thread(test, test->ifobj_tx); 1757 + return testapp_validate_traffic_single_thread(test, test->ifobj_tx); 1718 1758 } 1719 1759 1720 - static void testapp_poll_rxq_tmout(struct test_spec *test) 1760 + static int testapp_poll_rxq_tmout(struct test_spec *test) 1721 1761 { 1722 1762 test_spec_set_name(test, "POLL_RXQ_EMPTY"); 1723 1763 test->ifobj_rx->use_poll = true; 1724 - testapp_validate_traffic_single_thread(test, test->ifobj_rx); 1764 + return testapp_validate_traffic_single_thread(test, test->ifobj_rx); 1725 1765 } 1726 1766 1727 1767 static int xsk_load_xdp_programs(struct ifobject *ifobj) ··· 1738 1778 xsk_xdp_progs__destroy(ifobj->xdp_progs); 1739 1779 } 1740 1780 1741 - static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, 1742 - const char *dst_ip, const char *src_ip, const u16 dst_port, 1743 - const u16 src_port, thread_func_t func_ptr) 1781 + /* Simple test */ 1782 + static bool hugepages_present(void) 1744 1783 { 1745 - struct in_addr ip; 1784 + size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; 1785 + void *bufs; 1786 + 1787 + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 1788 + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); 1789 + if (bufs == MAP_FAILED) 1790 + return false; 1791 + 1792 + mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; 1793 + munmap(bufs, mmap_sz); 1794 + return true; 1795 + } 1796 + 1797 + static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, 1798 + thread_func_t func_ptr) 1799 + { 1746 1800 int err; 1747 1801 1748 1802 memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); 1749 1803 memcpy(ifobj->src_mac, src_mac, ETH_ALEN); 1750 - 1751 - inet_aton(dst_ip, &ip); 1752 - ifobj->dst_ip = ip.s_addr; 1753 - 1754 - inet_aton(src_ip, &ip); 1755 - ifobj->src_ip = ip.s_addr; 1756 - 1757 - ifobj->dst_port = dst_port; 1758 - ifobj->src_port = src_port; 1759 1804 1760 1805 ifobj->func_ptr = func_ptr; 1761 1806 ··· 1769 1804 printf("Error loading XDP program\n"); 1770 1805 exit_with_error(err); 1771 1806 } 1807 + 1808 + if (hugepages_present()) 1809 + ifobj->unaligned_supp = true; 1772 1810 } 1773 1811 1774 1812 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) 1775 1813 { 1814 + int ret = TEST_SKIP; 1815 + 1776 1816 switch (type) { 1777 1817 case TEST_TYPE_STATS_RX_DROPPED: 1778 - if (mode == TEST_MODE_ZC) { 1779 - ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); 1780 - return; 1781 - } 1782 - testapp_stats_rx_dropped(test); 1818 + ret = testapp_stats_rx_dropped(test); 1783 1819 break; 1784 1820 case TEST_TYPE_STATS_TX_INVALID_DESCS: 1785 - testapp_stats_tx_invalid_descs(test); 1821 + ret = testapp_stats_tx_invalid_descs(test); 1786 1822 break; 1787 1823 case TEST_TYPE_STATS_RX_FULL: 1788 - testapp_stats_rx_full(test); 1824 + ret = testapp_stats_rx_full(test); 1789 1825 break; 1790 1826 case TEST_TYPE_STATS_FILL_EMPTY: 1791 - testapp_stats_fill_empty(test); 1827 + ret = testapp_stats_fill_empty(test); 1792 1828 break; 1793 1829 case TEST_TYPE_TEARDOWN: 1794 - testapp_teardown(test); 1830 + ret = testapp_teardown(test); 1795 1831 break; 1796 1832 case TEST_TYPE_BIDI: 1797 - testapp_bidi(test); 1833 + ret = testapp_bidi(test); 1798 1834 break; 1799 1835 case TEST_TYPE_BPF_RES: 1800 - testapp_bpf_res(test); 1836 + ret = testapp_bpf_res(test); 1801 1837 break; 1802 1838 case TEST_TYPE_RUN_TO_COMPLETION: 1803 1839 test_spec_set_name(test, "RUN_TO_COMPLETION"); 1804 - testapp_validate_traffic(test); 1840 + ret = testapp_validate_traffic(test); 1805 1841 break; 1806 1842 case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: 1807 1843 test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); 1808 - testapp_single_pkt(test); 1844 + ret = testapp_single_pkt(test); 1809 1845 break; 1810 1846 case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: 1811 1847 test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); 1812 1848 test->ifobj_tx->umem->frame_size = 2048; 1813 1849 test->ifobj_rx->umem->frame_size = 2048; 1814 - pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); 1815 - testapp_validate_traffic(test); 1850 + pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); 1851 + ret = testapp_validate_traffic(test); 1816 1852 break; 1817 1853 case TEST_TYPE_RX_POLL: 1818 1854 test->ifobj_rx->use_poll = true; 1819 1855 test_spec_set_name(test, "POLL_RX"); 1820 - testapp_validate_traffic(test); 1856 + ret = testapp_validate_traffic(test); 1821 1857 break; 1822 1858 case TEST_TYPE_TX_POLL: 1823 1859 test->ifobj_tx->use_poll = true; 1824 1860 test_spec_set_name(test, "POLL_TX"); 1825 - testapp_validate_traffic(test); 1861 + ret = testapp_validate_traffic(test); 1826 1862 break; 1827 1863 case TEST_TYPE_POLL_TXQ_TMOUT: 1828 - testapp_poll_txq_tmout(test); 1864 + ret = testapp_poll_txq_tmout(test); 1829 1865 break; 1830 1866 case TEST_TYPE_POLL_RXQ_TMOUT: 1831 - testapp_poll_rxq_tmout(test); 1867 + ret = testapp_poll_rxq_tmout(test); 1832 1868 break; 1833 1869 case TEST_TYPE_ALIGNED_INV_DESC: 1834 1870 test_spec_set_name(test, "ALIGNED_INV_DESC"); 1835 - testapp_invalid_desc(test); 1871 + ret = testapp_invalid_desc(test); 1836 1872 break; 1837 1873 case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: 1838 1874 test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); 1839 1875 test->ifobj_tx->umem->frame_size = 2048; 1840 1876 test->ifobj_rx->umem->frame_size = 2048; 1841 - testapp_invalid_desc(test); 1877 + ret = testapp_invalid_desc(test); 1842 1878 break; 1843 1879 case TEST_TYPE_UNALIGNED_INV_DESC: 1844 - if (!hugepages_present(test->ifobj_tx)) { 1845 - ksft_test_result_skip("No 2M huge pages present.\n"); 1846 - return; 1847 - } 1848 1880 test_spec_set_name(test, "UNALIGNED_INV_DESC"); 1849 1881 test->ifobj_tx->umem->unaligned_mode = true; 1850 1882 test->ifobj_rx->umem->unaligned_mode = true; 1851 - testapp_invalid_desc(test); 1883 + ret = testapp_invalid_desc(test); 1852 1884 break; 1853 1885 case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: { 1854 1886 u64 page_size, umem_size; 1855 1887 1856 - if (!hugepages_present(test->ifobj_tx)) { 1857 - ksft_test_result_skip("No 2M huge pages present.\n"); 1858 - return; 1859 - } 1860 1888 test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE"); 1861 1889 /* Odd frame size so the UMEM doesn't end near a page boundary. */ 1862 1890 test->ifobj_tx->umem->frame_size = 4001; ··· 1861 1903 */ 1862 1904 page_size = sysconf(_SC_PAGESIZE); 1863 1905 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; 1864 - assert(umem_size % page_size > PKT_SIZE); 1865 - assert(umem_size % page_size < page_size - PKT_SIZE); 1866 - testapp_invalid_desc(test); 1906 + assert(umem_size % page_size > MIN_PKT_SIZE); 1907 + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); 1908 + ret = testapp_invalid_desc(test); 1867 1909 break; 1868 1910 } 1869 1911 case TEST_TYPE_UNALIGNED: 1870 - if (!testapp_unaligned(test)) 1871 - return; 1912 + ret = testapp_unaligned(test); 1872 1913 break; 1873 1914 case TEST_TYPE_HEADROOM: 1874 - testapp_headroom(test); 1915 + ret = testapp_headroom(test); 1875 1916 break; 1876 1917 case TEST_TYPE_XDP_DROP_HALF: 1877 - testapp_xdp_drop(test); 1918 + ret = testapp_xdp_drop(test); 1878 1919 break; 1879 1920 case TEST_TYPE_XDP_METADATA_COUNT: 1880 - testapp_xdp_metadata_count(test); 1921 + ret = testapp_xdp_metadata_count(test); 1881 1922 break; 1882 1923 default: 1883 1924 break; 1884 1925 } 1885 1926 1886 - if (!test->fail) 1927 + if (ret == TEST_PASS) 1887 1928 ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), 1888 1929 test->name); 1889 1930 pkt_stream_restore_default(test); ··· 1987 2030 modes++; 1988 2031 } 1989 2032 1990 - init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, 1991 - worker_testapp_validate_rx); 1992 - init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, 1993 - worker_testapp_validate_tx); 2033 + init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); 2034 + init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); 1994 2035 1995 2036 test_spec_init(&test, ifobj_tx, ifobj_rx, 0); 1996 - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); 1997 - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE); 2037 + tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); 2038 + rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); 1998 2039 if (!tx_pkt_stream_default || !rx_pkt_stream_default) 1999 2040 exit_with_error(ENOMEM); 2000 2041 test.tx_pkt_stream_default = tx_pkt_stream_default;

+10 -21

tools/testing/selftests/bpf/xskxceiver.h

··· 30 30 #define TEST_PASS 0 31 31 #define TEST_FAILURE -1 32 32 #define TEST_CONTINUE 1 33 + #define TEST_SKIP 2 33 34 #define MAX_INTERFACES 2 34 35 #define MAX_INTERFACE_NAME_CHARS 16 35 36 #define MAX_SOCKETS 2 36 37 #define MAX_TEST_NAME_SIZE 32 37 38 #define MAX_TEARDOWN_ITER 10 38 - #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ 39 - sizeof(struct udphdr)) 40 - #define MIN_ETH_PKT_SIZE 64 41 - #define ETH_FCS_SIZE 4 42 - #define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) 43 - #define PKT_SIZE (MIN_PKT_SIZE) 44 - #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) 45 - #define IP_PKT_VER 0x4 46 - #define IP_PKT_TOS 0x9 47 - #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) 48 - #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) 39 + #define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ 40 + #define MIN_PKT_SIZE 64 49 41 #define USLEEP_MAX 10000 50 42 #define SOCK_RECONF_CTR 10 51 43 #define BATCH_SIZE 64 ··· 49 57 #define UMEM_HEADROOM_TEST_SIZE 128 50 58 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) 51 59 #define HUGEPAGE_SIZE (2 * 1024 * 1024) 60 + #define PKT_DUMP_NB_TO_PRINT 16 52 61 53 62 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) 54 63 ··· 86 93 TEST_TYPE_MAX 87 94 }; 88 95 89 - static bool opt_pkt_dump; 90 96 static bool opt_verbose; 91 97 92 98 struct xsk_umem_info { 93 99 struct xsk_ring_prod fq; 94 100 struct xsk_ring_cons cq; 95 101 struct xsk_umem *umem; 102 + u64 next_buffer; 96 103 u32 num_frames; 97 104 u32 frame_headroom; 98 105 void *buffer; ··· 111 118 }; 112 119 113 120 struct pkt { 114 - u64 addr; 121 + int offset; 115 122 u32 len; 116 - u32 payload; 123 + u32 pkt_nb; 117 124 bool valid; 118 125 }; 119 126 120 127 struct pkt_stream { 121 128 u32 nb_pkts; 122 - u32 rx_pkt_nb; 129 + u32 current_pkt_nb; 123 130 struct pkt *pkts; 124 - bool use_addr_for_fill; 131 + u32 max_pkt_len; 125 132 }; 126 133 127 134 struct ifobject; ··· 141 148 struct bpf_program *xdp_prog; 142 149 enum test_mode mode; 143 150 int ifindex; 144 - u32 dst_ip; 145 - u32 src_ip; 146 151 u32 bind_flags; 147 - u16 src_port; 148 - u16 dst_port; 149 152 bool tx_on; 150 153 bool rx_on; 151 154 bool use_poll; ··· 150 161 bool release_rx; 151 162 bool shared_umem; 152 163 bool use_metadata; 164 + bool unaligned_supp; 153 165 u8 dst_mac[ETH_ALEN]; 154 166 u8 src_mac[ETH_ALEN]; 155 167 }; ··· 174 184 175 185 pthread_barrier_t barr; 176 186 pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; 177 - pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; 178 187 179 188 int pkts_in_flight; 180 189