Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'add-fd_array_cnt-attribute-for-bpf_prog_load'

Anton Protopopov says:

====================
Add fd_array_cnt attribute for BPF_PROG_LOAD

Add a new attribute to the bpf(BPF_PROG_LOAD) system call. If this
new attribute is non-zero, then the fd_array is considered to be a
continuous array of the fd_array_cnt length and to contain only
proper map file descriptors or btf file descriptors.

This change allows maps (and btfs), which aren't referenced directly
by a BPF program, to be bound to the program _and_ also to be present
during the program verification (so BPF_PROG_BIND_MAP is not enough
for this use case).

The primary reason for this change is that it is a prerequisite for
adding "instruction set" maps, which are both non-referenced by the
program and must be present during the program verification.

The first five commits add the new functionality, the sixth adds
corresponding self-tests, and the last one is a small additional fix.

v1 -> v2:
* rewrite the add_fd_from_fd_array() function (Eduard)
* a few cleanups in selftests (Eduard)

v2 -> v3:
* various renamings (Alexei)
* "0 is not special" (Alexei, Andrii)
* do not alloc memory on fd_array init (Alexei)
* fix leaking maps for error path (Hou Tao)
* use libbpf helpers vs. raw syscalls (Andrii)
* add comments on __btf_get_by_fd/__bpf_map_get (Alexei)
* remove extra code (Alexei)

v3 -> v4:
* simplify error path when parsing fd_array
* libbpf: pass fd_array_cnt only in prog_load (Alexei)
* selftests patch contained extra code (Alexei)
* renames, fix comments (Alexei)

v4 -> v5:
* Add btfs to env->used_btfs (Andrii)
* Fix an integer overflow (Andrii)
* A set of cleanups for selftests (Andrii)
====================

Link: https://patch.msgid.link/20241213130934.1087929-1-aspsk@isovalent.com
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

+707 -143
+17
include/linux/bpf.h
··· 2301 2301 struct bpf_map *bpf_map_get(u32 ufd); 2302 2302 struct bpf_map *bpf_map_get_with_uref(u32 ufd); 2303 2303 2304 + /* 2305 + * The __bpf_map_get() and __btf_get_by_fd() functions parse a file 2306 + * descriptor and return a corresponding map or btf object. 2307 + * Their names are double underscored to emphasize the fact that they 2308 + * do not increase refcnt. To also increase refcnt use corresponding 2309 + * bpf_map_get() and btf_get_by_fd() functions. 2310 + */ 2311 + 2304 2312 static inline struct bpf_map *__bpf_map_get(struct fd f) 2305 2313 { 2306 2314 if (fd_empty(f)) 2307 2315 return ERR_PTR(-EBADF); 2308 2316 if (unlikely(fd_file(f)->f_op != &bpf_map_fops)) 2317 + return ERR_PTR(-EINVAL); 2318 + return fd_file(f)->private_data; 2319 + } 2320 + 2321 + static inline struct btf *__btf_get_by_fd(struct fd f) 2322 + { 2323 + if (fd_empty(f)) 2324 + return ERR_PTR(-EBADF); 2325 + if (unlikely(fd_file(f)->f_op != &btf_fops)) 2309 2326 return ERR_PTR(-EINVAL); 2310 2327 return fd_file(f)->private_data; 2311 2328 }
+10
include/uapi/linux/bpf.h
··· 1573 1573 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. 1574 1574 */ 1575 1575 __s32 prog_token_fd; 1576 + /* The fd_array_cnt can be used to pass the length of the 1577 + * fd_array array. In this case all the [map] file descriptors 1578 + * passed in this array will be bound to the program, even if 1579 + * the maps are not referenced directly. The functionality is 1580 + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be 1581 + * used by the verifier during the program load. If provided, 1582 + * then the fd_array[0,...,fd_array_cnt-1] is expected to be 1583 + * continuous. 1584 + */ 1585 + __u32 fd_array_cnt; 1576 1586 }; 1577 1587 1578 1588 struct { /* anonymous struct used by BPF_OBJ_* commands */
+3 -8
kernel/bpf/btf.c
··· 7746 7746 struct btf *btf; 7747 7747 CLASS(fd, f)(fd); 7748 7748 7749 - if (fd_empty(f)) 7750 - return ERR_PTR(-EBADF); 7751 - 7752 - if (fd_file(f)->f_op != &btf_fops) 7753 - return ERR_PTR(-EINVAL); 7754 - 7755 - btf = fd_file(f)->private_data; 7756 - refcount_inc(&btf->refcnt); 7749 + btf = __btf_get_by_fd(f); 7750 + if (!IS_ERR(btf)) 7751 + refcount_inc(&btf->refcnt); 7757 7752 7758 7753 return btf; 7759 7754 }
+1 -1
kernel/bpf/syscall.c
··· 2730 2730 } 2731 2731 2732 2732 /* last field in 'union bpf_attr' used by this command */ 2733 - #define BPF_PROG_LOAD_LAST_FIELD prog_token_fd 2733 + #define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt 2734 2734 2735 2735 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) 2736 2736 {
+216 -129
kernel/bpf/verifier.c
··· 19218 19218 return -ENOENT; 19219 19219 } 19220 19220 19221 + /* 19222 + * Add btf to the used_btfs array and return the index. (If the btf was 19223 + * already added, then just return the index.) Upon successful insertion 19224 + * increase btf refcnt, and, if present, also refcount the corresponding 19225 + * kernel module. 19226 + */ 19227 + static int __add_used_btf(struct bpf_verifier_env *env, struct btf *btf) 19228 + { 19229 + struct btf_mod_pair *btf_mod; 19230 + int i; 19231 + 19232 + /* check whether we recorded this BTF (and maybe module) already */ 19233 + for (i = 0; i < env->used_btf_cnt; i++) 19234 + if (env->used_btfs[i].btf == btf) 19235 + return i; 19236 + 19237 + if (env->used_btf_cnt >= MAX_USED_BTFS) 19238 + return -E2BIG; 19239 + 19240 + btf_get(btf); 19241 + 19242 + btf_mod = &env->used_btfs[env->used_btf_cnt]; 19243 + btf_mod->btf = btf; 19244 + btf_mod->module = NULL; 19245 + 19246 + /* if we reference variables from kernel module, bump its refcount */ 19247 + if (btf_is_module(btf)) { 19248 + btf_mod->module = btf_try_get_module(btf); 19249 + if (!btf_mod->module) { 19250 + btf_put(btf); 19251 + return -ENXIO; 19252 + } 19253 + } 19254 + 19255 + return env->used_btf_cnt++; 19256 + } 19257 + 19221 19258 /* replace pseudo btf_id with kernel symbol address */ 19222 - static int check_pseudo_btf_id(struct bpf_verifier_env *env, 19223 - struct bpf_insn *insn, 19224 - struct bpf_insn_aux_data *aux) 19259 + static int __check_pseudo_btf_id(struct bpf_verifier_env *env, 19260 + struct bpf_insn *insn, 19261 + struct bpf_insn_aux_data *aux, 19262 + struct btf *btf) 19225 19263 { 19226 19264 const struct btf_var_secinfo *vsi; 19227 19265 const struct btf_type *datasec; 19228 - struct btf_mod_pair *btf_mod; 19229 19266 const struct btf_type *t; 19230 19267 const char *sym_name; 19231 19268 bool percpu = false; 19232 19269 u32 type, id = insn->imm; 19233 - struct btf *btf; 19234 19270 s32 datasec_id; 19235 19271 u64 addr; 19236 - int i, btf_fd, err; 19237 - 19238 - btf_fd = insn[1].imm; 19239 - if (btf_fd) { 19240 - btf = btf_get_by_fd(btf_fd); 19241 - if (IS_ERR(btf)) { 19242 - verbose(env, "invalid module BTF object FD specified.\n"); 19243 - return -EINVAL; 19244 - } 19245 - } else { 19246 - if (!btf_vmlinux) { 19247 - verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); 19248 - return -EINVAL; 19249 - } 19250 - btf = btf_vmlinux; 19251 - btf_get(btf); 19252 - } 19272 + int i; 19253 19273 19254 19274 t = btf_type_by_id(btf, id); 19255 19275 if (!t) { 19256 19276 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id); 19257 - err = -ENOENT; 19258 - goto err_put; 19277 + return -ENOENT; 19259 19278 } 19260 19279 19261 19280 if (!btf_type_is_var(t) && !btf_type_is_func(t)) { 19262 19281 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id); 19263 - err = -EINVAL; 19264 - goto err_put; 19282 + return -EINVAL; 19265 19283 } 19266 19284 19267 19285 sym_name = btf_name_by_offset(btf, t->name_off); ··· 19287 19269 if (!addr) { 19288 19270 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", 19289 19271 sym_name); 19290 - err = -ENOENT; 19291 - goto err_put; 19272 + return -ENOENT; 19292 19273 } 19293 19274 insn[0].imm = (u32)addr; 19294 19275 insn[1].imm = addr >> 32; ··· 19295 19278 if (btf_type_is_func(t)) { 19296 19279 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; 19297 19280 aux->btf_var.mem_size = 0; 19298 - goto check_btf; 19281 + return 0; 19299 19282 } 19300 19283 19301 19284 datasec_id = find_btf_percpu_datasec(btf); ··· 19326 19309 tname = btf_name_by_offset(btf, t->name_off); 19327 19310 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", 19328 19311 tname, PTR_ERR(ret)); 19329 - err = -EINVAL; 19330 - goto err_put; 19312 + return -EINVAL; 19331 19313 } 19332 19314 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; 19333 19315 aux->btf_var.mem_size = tsize; ··· 19335 19319 aux->btf_var.btf = btf; 19336 19320 aux->btf_var.btf_id = type; 19337 19321 } 19338 - check_btf: 19339 - /* check whether we recorded this BTF (and maybe module) already */ 19340 - for (i = 0; i < env->used_btf_cnt; i++) { 19341 - if (env->used_btfs[i].btf == btf) { 19342 - btf_put(btf); 19343 - return 0; 19344 - } 19345 - } 19346 - 19347 - if (env->used_btf_cnt >= MAX_USED_BTFS) { 19348 - err = -E2BIG; 19349 - goto err_put; 19350 - } 19351 - 19352 - btf_mod = &env->used_btfs[env->used_btf_cnt]; 19353 - btf_mod->btf = btf; 19354 - btf_mod->module = NULL; 19355 - 19356 - /* if we reference variables from kernel module, bump its refcount */ 19357 - if (btf_is_module(btf)) { 19358 - btf_mod->module = btf_try_get_module(btf); 19359 - if (!btf_mod->module) { 19360 - err = -ENXIO; 19361 - goto err_put; 19362 - } 19363 - } 19364 - 19365 - env->used_btf_cnt++; 19366 19322 19367 19323 return 0; 19368 - err_put: 19369 - btf_put(btf); 19370 - return err; 19324 + } 19325 + 19326 + static int check_pseudo_btf_id(struct bpf_verifier_env *env, 19327 + struct bpf_insn *insn, 19328 + struct bpf_insn_aux_data *aux) 19329 + { 19330 + struct btf *btf; 19331 + int btf_fd; 19332 + int err; 19333 + 19334 + btf_fd = insn[1].imm; 19335 + if (btf_fd) { 19336 + CLASS(fd, f)(btf_fd); 19337 + 19338 + btf = __btf_get_by_fd(f); 19339 + if (IS_ERR(btf)) { 19340 + verbose(env, "invalid module BTF object FD specified.\n"); 19341 + return -EINVAL; 19342 + } 19343 + } else { 19344 + if (!btf_vmlinux) { 19345 + verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); 19346 + return -EINVAL; 19347 + } 19348 + btf = btf_vmlinux; 19349 + } 19350 + 19351 + err = __check_pseudo_btf_id(env, insn, aux, btf); 19352 + if (err) 19353 + return err; 19354 + 19355 + err = __add_used_btf(env, btf); 19356 + if (err < 0) 19357 + return err; 19358 + return 0; 19371 19359 } 19372 19360 19373 19361 static bool is_tracing_prog_type(enum bpf_prog_type type) ··· 19386 19366 default: 19387 19367 return false; 19388 19368 } 19369 + } 19370 + 19371 + static bool bpf_map_is_cgroup_storage(struct bpf_map *map) 19372 + { 19373 + return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 19374 + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 19389 19375 } 19390 19376 19391 19377 static int check_map_prog_compatibility(struct bpf_verifier_env *env, ··· 19472 19446 return -EINVAL; 19473 19447 } 19474 19448 19449 + if (bpf_map_is_cgroup_storage(map) && 19450 + bpf_cgroup_storage_assign(env->prog->aux, map)) { 19451 + verbose(env, "only one cgroup storage of each type is allowed\n"); 19452 + return -EBUSY; 19453 + } 19454 + 19455 + if (map->map_type == BPF_MAP_TYPE_ARENA) { 19456 + if (env->prog->aux->arena) { 19457 + verbose(env, "Only one arena per program\n"); 19458 + return -EBUSY; 19459 + } 19460 + if (!env->allow_ptr_leaks || !env->bpf_capable) { 19461 + verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n"); 19462 + return -EPERM; 19463 + } 19464 + if (!env->prog->jit_requested) { 19465 + verbose(env, "JIT is required to use arena\n"); 19466 + return -EOPNOTSUPP; 19467 + } 19468 + if (!bpf_jit_supports_arena()) { 19469 + verbose(env, "JIT doesn't support arena\n"); 19470 + return -EOPNOTSUPP; 19471 + } 19472 + env->prog->aux->arena = (void *)map; 19473 + if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { 19474 + verbose(env, "arena's user address must be set via map_extra or mmap()\n"); 19475 + return -EINVAL; 19476 + } 19477 + } 19478 + 19475 19479 return 0; 19476 19480 } 19477 19481 19478 - static bool bpf_map_is_cgroup_storage(struct bpf_map *map) 19482 + static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) 19479 19483 { 19480 - return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || 19481 - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); 19482 - } 19483 - 19484 - /* Add map behind fd to used maps list, if it's not already there, and return 19485 - * its index. Also set *reused to true if this map was already in the list of 19486 - * used maps. 19487 - * Returns <0 on error, or >= 0 index, on success. 19488 - */ 19489 - static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd, bool *reused) 19490 - { 19491 - CLASS(fd, f)(fd); 19492 - struct bpf_map *map; 19493 - int i; 19494 - 19495 - map = __bpf_map_get(f); 19496 - if (IS_ERR(map)) { 19497 - verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); 19498 - return PTR_ERR(map); 19499 - } 19484 + int i, err; 19500 19485 19501 19486 /* check whether we recorded this map already */ 19502 - for (i = 0; i < env->used_map_cnt; i++) { 19503 - if (env->used_maps[i] == map) { 19504 - *reused = true; 19487 + for (i = 0; i < env->used_map_cnt; i++) 19488 + if (env->used_maps[i] == map) 19505 19489 return i; 19506 - } 19507 - } 19508 19490 19509 19491 if (env->used_map_cnt >= MAX_USED_MAPS) { 19510 19492 verbose(env, "The total number of maps per program has reached the limit of %u\n", 19511 19493 MAX_USED_MAPS); 19512 19494 return -E2BIG; 19513 19495 } 19496 + 19497 + err = check_map_prog_compatibility(env, map, env->prog); 19498 + if (err) 19499 + return err; 19514 19500 19515 19501 if (env->prog->sleepable) 19516 19502 atomic64_inc(&map->sleepable_refcnt); ··· 19534 19496 */ 19535 19497 bpf_map_inc(map); 19536 19498 19537 - *reused = false; 19538 19499 env->used_maps[env->used_map_cnt++] = map; 19539 19500 19540 19501 return env->used_map_cnt - 1; 19502 + } 19503 + 19504 + /* Add map behind fd to used maps list, if it's not already there, and return 19505 + * its index. 19506 + * Returns <0 on error, or >= 0 index, on success. 19507 + */ 19508 + static int add_used_map(struct bpf_verifier_env *env, int fd) 19509 + { 19510 + struct bpf_map *map; 19511 + CLASS(fd, f)(fd); 19512 + 19513 + map = __bpf_map_get(f); 19514 + if (IS_ERR(map)) { 19515 + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); 19516 + return PTR_ERR(map); 19517 + } 19518 + 19519 + return __add_used_map(env, map); 19541 19520 } 19542 19521 19543 19522 /* find and rewrite pseudo imm in ld_imm64 instructions: ··· 19588 19533 int map_idx; 19589 19534 u64 addr; 19590 19535 u32 fd; 19591 - bool reused; 19592 19536 19593 19537 if (i == insn_cnt - 1 || insn[1].code != 0 || 19594 19538 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || ··· 19648 19594 break; 19649 19595 } 19650 19596 19651 - map_idx = add_used_map_from_fd(env, fd, &reused); 19597 + map_idx = add_used_map(env, fd); 19652 19598 if (map_idx < 0) 19653 19599 return map_idx; 19654 19600 map = env->used_maps[map_idx]; 19655 19601 19656 19602 aux = &env->insn_aux_data[i]; 19657 19603 aux->map_index = map_idx; 19658 - 19659 - err = check_map_prog_compatibility(env, map, env->prog); 19660 - if (err) 19661 - return err; 19662 19604 19663 19605 if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || 19664 19606 insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { ··· 19685 19635 19686 19636 insn[0].imm = (u32)addr; 19687 19637 insn[1].imm = addr >> 32; 19688 - 19689 - /* proceed with extra checks only if its newly added used map */ 19690 - if (reused) 19691 - goto next_insn; 19692 - 19693 - if (bpf_map_is_cgroup_storage(map) && 19694 - bpf_cgroup_storage_assign(env->prog->aux, map)) { 19695 - verbose(env, "only one cgroup storage of each type is allowed\n"); 19696 - return -EBUSY; 19697 - } 19698 - if (map->map_type == BPF_MAP_TYPE_ARENA) { 19699 - if (env->prog->aux->arena) { 19700 - verbose(env, "Only one arena per program\n"); 19701 - return -EBUSY; 19702 - } 19703 - if (!env->allow_ptr_leaks || !env->bpf_capable) { 19704 - verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n"); 19705 - return -EPERM; 19706 - } 19707 - if (!env->prog->jit_requested) { 19708 - verbose(env, "JIT is required to use arena\n"); 19709 - return -EOPNOTSUPP; 19710 - } 19711 - if (!bpf_jit_supports_arena()) { 19712 - verbose(env, "JIT doesn't support arena\n"); 19713 - return -EOPNOTSUPP; 19714 - } 19715 - env->prog->aux->arena = (void *)map; 19716 - if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) { 19717 - verbose(env, "arena's user address must be set via map_extra or mmap()\n"); 19718 - return -EINVAL; 19719 - } 19720 - } 19721 19638 19722 19639 next_insn: 19723 19640 insn++; ··· 22856 22839 return btf_vmlinux; 22857 22840 } 22858 22841 22842 + /* 22843 + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In 22844 + * this case expect that every file descriptor in the array is either a map or 22845 + * a BTF. Everything else is considered to be trash. 22846 + */ 22847 + static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd) 22848 + { 22849 + struct bpf_map *map; 22850 + struct btf *btf; 22851 + CLASS(fd, f)(fd); 22852 + int err; 22853 + 22854 + map = __bpf_map_get(f); 22855 + if (!IS_ERR(map)) { 22856 + err = __add_used_map(env, map); 22857 + if (err < 0) 22858 + return err; 22859 + return 0; 22860 + } 22861 + 22862 + btf = __btf_get_by_fd(f); 22863 + if (!IS_ERR(btf)) { 22864 + err = __add_used_btf(env, btf); 22865 + if (err < 0) 22866 + return err; 22867 + return 0; 22868 + } 22869 + 22870 + verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd); 22871 + return PTR_ERR(map); 22872 + } 22873 + 22874 + static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr) 22875 + { 22876 + size_t size = sizeof(int); 22877 + int ret; 22878 + int fd; 22879 + u32 i; 22880 + 22881 + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); 22882 + 22883 + /* 22884 + * The only difference between old (no fd_array_cnt is given) and new 22885 + * APIs is that in the latter case the fd_array is expected to be 22886 + * continuous and is scanned for map fds right away 22887 + */ 22888 + if (!attr->fd_array_cnt) 22889 + return 0; 22890 + 22891 + /* Check for integer overflow */ 22892 + if (attr->fd_array_cnt >= (U32_MAX / size)) { 22893 + verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt); 22894 + return -EINVAL; 22895 + } 22896 + 22897 + for (i = 0; i < attr->fd_array_cnt; i++) { 22898 + if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size)) 22899 + return -EFAULT; 22900 + 22901 + ret = add_fd_from_fd_array(env, fd); 22902 + if (ret) 22903 + return ret; 22904 + } 22905 + 22906 + return 0; 22907 + } 22908 + 22859 22909 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) 22860 22910 { 22861 22911 u64 start_time = ktime_get_ns(); ··· 22954 22870 env->insn_aux_data[i].orig_idx = i; 22955 22871 env->prog = *prog; 22956 22872 env->ops = bpf_verifier_ops[env->prog->type]; 22957 - env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); 22958 22873 22959 22874 env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); 22960 22875 env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); ··· 22975 22892 attr->log_size); 22976 22893 if (ret) 22977 22894 goto err_unlock; 22895 + 22896 + ret = process_fd_array(env, attr, uattr); 22897 + if (ret) 22898 + goto skip_full_check; 22978 22899 22979 22900 mark_verifier_state_clean(env); 22980 22901
+10
tools/include/uapi/linux/bpf.h
··· 1573 1573 * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. 1574 1574 */ 1575 1575 __s32 prog_token_fd; 1576 + /* The fd_array_cnt can be used to pass the length of the 1577 + * fd_array array. In this case all the [map] file descriptors 1578 + * passed in this array will be bound to the program, even if 1579 + * the maps are not referenced directly. The functionality is 1580 + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be 1581 + * used by the verifier during the program load. If provided, 1582 + * then the fd_array[0,...,fd_array_cnt-1] is expected to be 1583 + * continuous. 1584 + */ 1585 + __u32 fd_array_cnt; 1576 1586 }; 1577 1587 1578 1588 struct { /* anonymous struct used by BPF_OBJ_* commands */
+2 -1
tools/lib/bpf/bpf.c
··· 238 238 const struct bpf_insn *insns, size_t insn_cnt, 239 239 struct bpf_prog_load_opts *opts) 240 240 { 241 - const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); 241 + const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt); 242 242 void *finfo = NULL, *linfo = NULL; 243 243 const char *func_info, *line_info; 244 244 __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; ··· 311 311 attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); 312 312 313 313 attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); 314 + attr.fd_array_cnt = OPTS_GET(opts, fd_array_cnt, 0); 314 315 315 316 if (log_level) { 316 317 attr.log_buf = ptr_to_u64(log_buf);
+4 -1
tools/lib/bpf/bpf.h
··· 107 107 */ 108 108 __u32 log_true_size; 109 109 __u32 token_fd; 110 + 111 + /* if set, provides the length of fd_array */ 112 + __u32 fd_array_cnt; 110 113 size_t :0; 111 114 }; 112 - #define bpf_prog_load_opts__last_field token_fd 115 + #define bpf_prog_load_opts__last_field fd_array_cnt 113 116 114 117 LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, 115 118 const char *prog_name, const char *license,
+441
tools/testing/selftests/bpf/prog_tests/fd_array.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <test_progs.h> 4 + 5 + #include <linux/btf.h> 6 + #include <bpf/bpf.h> 7 + 8 + #include "../test_btf.h" 9 + 10 + static inline int new_map(void) 11 + { 12 + const char *name = NULL; 13 + __u32 max_entries = 1; 14 + __u32 value_size = 8; 15 + __u32 key_size = 4; 16 + 17 + return bpf_map_create(BPF_MAP_TYPE_ARRAY, name, 18 + key_size, value_size, 19 + max_entries, NULL); 20 + } 21 + 22 + static int new_btf(void) 23 + { 24 + struct btf_blob { 25 + struct btf_header btf_hdr; 26 + __u32 types[8]; 27 + __u32 str; 28 + } raw_btf = { 29 + .btf_hdr = { 30 + .magic = BTF_MAGIC, 31 + .version = BTF_VERSION, 32 + .hdr_len = sizeof(struct btf_header), 33 + .type_len = sizeof(raw_btf.types), 34 + .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types), 35 + .str_len = sizeof(raw_btf.str), 36 + }, 37 + .types = { 38 + /* long */ 39 + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */ 40 + /* unsigned long */ 41 + BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */ 42 + }, 43 + }; 44 + 45 + return bpf_btf_load(&raw_btf, sizeof(raw_btf), NULL); 46 + } 47 + 48 + #define Close(FD) do { \ 49 + if ((FD) >= 0) { \ 50 + close(FD); \ 51 + FD = -1; \ 52 + } \ 53 + } while(0) 54 + 55 + static bool map_exists(__u32 id) 56 + { 57 + int fd; 58 + 59 + fd = bpf_map_get_fd_by_id(id); 60 + if (fd >= 0) { 61 + close(fd); 62 + return true; 63 + } 64 + return false; 65 + } 66 + 67 + static bool btf_exists(__u32 id) 68 + { 69 + int fd; 70 + 71 + fd = bpf_btf_get_fd_by_id(id); 72 + if (fd >= 0) { 73 + close(fd); 74 + return true; 75 + } 76 + return false; 77 + } 78 + 79 + static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *map_ids) 80 + { 81 + __u32 len = sizeof(struct bpf_prog_info); 82 + struct bpf_prog_info info; 83 + int err; 84 + 85 + memset(&info, 0, len); 86 + info.nr_map_ids = *nr_map_ids, 87 + info.map_ids = ptr_to_u64(map_ids), 88 + 89 + err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); 90 + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) 91 + return -1; 92 + 93 + *nr_map_ids = info.nr_map_ids; 94 + 95 + return 0; 96 + } 97 + 98 + static int __load_test_prog(int map_fd, const int *fd_array, int fd_array_cnt) 99 + { 100 + /* A trivial program which uses one map */ 101 + struct bpf_insn insns[] = { 102 + BPF_LD_MAP_FD(BPF_REG_1, map_fd), 103 + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), 104 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 105 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), 106 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 107 + BPF_MOV64_IMM(BPF_REG_0, 0), 108 + BPF_EXIT_INSN(), 109 + }; 110 + LIBBPF_OPTS(bpf_prog_load_opts, opts); 111 + 112 + opts.fd_array = fd_array; 113 + opts.fd_array_cnt = fd_array_cnt; 114 + 115 + return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, ARRAY_SIZE(insns), &opts); 116 + } 117 + 118 + static int load_test_prog(const int *fd_array, int fd_array_cnt) 119 + { 120 + int map_fd; 121 + int ret; 122 + 123 + map_fd = new_map(); 124 + if (!ASSERT_GE(map_fd, 0, "new_map")) 125 + return map_fd; 126 + 127 + ret = __load_test_prog(map_fd, fd_array, fd_array_cnt); 128 + close(map_fd); 129 + return ret; 130 + } 131 + 132 + static bool check_expected_map_ids(int prog_fd, int expected, __u32 *map_ids, __u32 *nr_map_ids) 133 + { 134 + int err; 135 + 136 + err = bpf_prog_get_map_ids(prog_fd, nr_map_ids, map_ids); 137 + if (!ASSERT_OK(err, "bpf_prog_get_map_ids")) 138 + return false; 139 + if (!ASSERT_EQ(*nr_map_ids, expected, "unexpected nr_map_ids")) 140 + return false; 141 + 142 + return true; 143 + } 144 + 145 + /* 146 + * Load a program, which uses one map. No fd_array maps are present. 147 + * On return only one map is expected to be bound to prog. 148 + */ 149 + static void check_fd_array_cnt__no_fd_array(void) 150 + { 151 + __u32 map_ids[16]; 152 + __u32 nr_map_ids; 153 + int prog_fd = -1; 154 + 155 + prog_fd = load_test_prog(NULL, 0); 156 + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) 157 + return; 158 + nr_map_ids = ARRAY_SIZE(map_ids); 159 + check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids); 160 + close(prog_fd); 161 + } 162 + 163 + /* 164 + * Load a program, which uses one map, and pass two extra, non-equal, maps in 165 + * fd_array with fd_array_cnt=2. On return three maps are expected to be bound 166 + * to the program. 167 + */ 168 + static void check_fd_array_cnt__fd_array_ok(void) 169 + { 170 + int extra_fds[2] = { -1, -1 }; 171 + __u32 map_ids[16]; 172 + __u32 nr_map_ids; 173 + int prog_fd = -1; 174 + 175 + extra_fds[0] = new_map(); 176 + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) 177 + goto cleanup; 178 + extra_fds[1] = new_map(); 179 + if (!ASSERT_GE(extra_fds[1], 0, "new_map")) 180 + goto cleanup; 181 + prog_fd = load_test_prog(extra_fds, 2); 182 + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) 183 + goto cleanup; 184 + nr_map_ids = ARRAY_SIZE(map_ids); 185 + if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids)) 186 + goto cleanup; 187 + 188 + /* maps should still exist when original file descriptors are closed */ 189 + Close(extra_fds[0]); 190 + Close(extra_fds[1]); 191 + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map_ids[0] should exist")) 192 + goto cleanup; 193 + if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map_ids[1] should exist")) 194 + goto cleanup; 195 + 196 + /* some fds might be invalid, so ignore return codes */ 197 + cleanup: 198 + Close(extra_fds[1]); 199 + Close(extra_fds[0]); 200 + Close(prog_fd); 201 + } 202 + 203 + /* 204 + * Load a program with a few extra maps duplicated in the fd_array. 205 + * After the load maps should only be referenced once. 206 + */ 207 + static void check_fd_array_cnt__duplicated_maps(void) 208 + { 209 + int extra_fds[4] = { -1, -1, -1, -1 }; 210 + __u32 map_ids[16]; 211 + __u32 nr_map_ids; 212 + int prog_fd = -1; 213 + 214 + extra_fds[0] = extra_fds[2] = new_map(); 215 + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) 216 + goto cleanup; 217 + extra_fds[1] = extra_fds[3] = new_map(); 218 + if (!ASSERT_GE(extra_fds[1], 0, "new_map")) 219 + goto cleanup; 220 + prog_fd = load_test_prog(extra_fds, 4); 221 + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) 222 + goto cleanup; 223 + nr_map_ids = ARRAY_SIZE(map_ids); 224 + if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids)) 225 + goto cleanup; 226 + 227 + /* maps should still exist when original file descriptors are closed */ 228 + Close(extra_fds[0]); 229 + Close(extra_fds[1]); 230 + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist")) 231 + goto cleanup; 232 + if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map should exist")) 233 + goto cleanup; 234 + 235 + /* some fds might be invalid, so ignore return codes */ 236 + cleanup: 237 + Close(extra_fds[1]); 238 + Close(extra_fds[0]); 239 + Close(prog_fd); 240 + } 241 + 242 + /* 243 + * Check that if maps which are referenced by a program are 244 + * passed in fd_array, then they will be referenced only once 245 + */ 246 + static void check_fd_array_cnt__referenced_maps_in_fd_array(void) 247 + { 248 + int extra_fds[1] = { -1 }; 249 + __u32 map_ids[16]; 250 + __u32 nr_map_ids; 251 + int prog_fd = -1; 252 + 253 + extra_fds[0] = new_map(); 254 + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) 255 + goto cleanup; 256 + prog_fd = __load_test_prog(extra_fds[0], extra_fds, 1); 257 + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) 258 + goto cleanup; 259 + nr_map_ids = ARRAY_SIZE(map_ids); 260 + if (!check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids)) 261 + goto cleanup; 262 + 263 + /* map should still exist when original file descriptor is closed */ 264 + Close(extra_fds[0]); 265 + if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist")) 266 + goto cleanup; 267 + 268 + /* some fds might be invalid, so ignore return codes */ 269 + cleanup: 270 + Close(extra_fds[0]); 271 + Close(prog_fd); 272 + } 273 + 274 + static int get_btf_id_by_fd(int btf_fd, __u32 *id) 275 + { 276 + struct bpf_btf_info info; 277 + __u32 info_len = sizeof(info); 278 + int err; 279 + 280 + memset(&info, 0, info_len); 281 + err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len); 282 + if (err) 283 + return err; 284 + if (id) 285 + *id = info.id; 286 + return 0; 287 + } 288 + 289 + /* 290 + * Check that fd_array operates properly for btfs. Namely, to check that 291 + * passing a btf fd in fd_array increases its reference count, do the 292 + * following: 293 + * 1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1 294 + * 2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2 295 + * 3) Close the btf_fd, now refcnt=1 296 + * Wait and check that BTF stil exists. 297 + */ 298 + static void check_fd_array_cnt__referenced_btfs(void) 299 + { 300 + int extra_fds[1] = { -1 }; 301 + int prog_fd = -1; 302 + __u32 btf_id; 303 + int tries; 304 + int err; 305 + 306 + extra_fds[0] = new_btf(); 307 + if (!ASSERT_GE(extra_fds[0], 0, "new_btf")) 308 + goto cleanup; 309 + prog_fd = load_test_prog(extra_fds, 1); 310 + if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD")) 311 + goto cleanup; 312 + 313 + /* btf should still exist when original file descriptor is closed */ 314 + err = get_btf_id_by_fd(extra_fds[0], &btf_id); 315 + if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) 316 + goto cleanup; 317 + 318 + Close(extra_fds[0]); 319 + 320 + if (!ASSERT_GE(kern_sync_rcu(), 0, "kern_sync_rcu 1")) 321 + goto cleanup; 322 + 323 + if (!ASSERT_EQ(btf_exists(btf_id), true, "btf should exist")) 324 + goto cleanup; 325 + 326 + Close(prog_fd); 327 + 328 + /* The program is freed by a workqueue, so no reliable 329 + * way to sync, so just wait a bit (max ~1 second). */ 330 + for (tries = 100; tries >= 0; tries--) { 331 + usleep(1000); 332 + 333 + if (!btf_exists(btf_id)) 334 + break; 335 + 336 + if (tries) 337 + continue; 338 + 339 + PRINT_FAIL("btf should have been freed"); 340 + } 341 + 342 + /* some fds might be invalid, so ignore return codes */ 343 + cleanup: 344 + Close(extra_fds[0]); 345 + Close(prog_fd); 346 + } 347 + 348 + /* 349 + * Test that a program with trash in fd_array can't be loaded: 350 + * only map and BTF file descriptors should be accepted. 351 + */ 352 + static void check_fd_array_cnt__fd_array_with_trash(void) 353 + { 354 + int extra_fds[3] = { -1, -1, -1 }; 355 + int prog_fd = -1; 356 + 357 + extra_fds[0] = new_map(); 358 + if (!ASSERT_GE(extra_fds[0], 0, "new_map")) 359 + goto cleanup; 360 + extra_fds[1] = new_btf(); 361 + if (!ASSERT_GE(extra_fds[1], 0, "new_btf")) 362 + goto cleanup; 363 + 364 + /* trash 1: not a file descriptor */ 365 + extra_fds[2] = 0xbeef; 366 + prog_fd = load_test_prog(extra_fds, 3); 367 + if (!ASSERT_EQ(prog_fd, -EBADF, "prog should have been rejected with -EBADF")) 368 + goto cleanup; 369 + 370 + /* trash 2: not a map or btf */ 371 + extra_fds[2] = socket(AF_INET, SOCK_STREAM, 0); 372 + if (!ASSERT_GE(extra_fds[2], 0, "socket")) 373 + goto cleanup; 374 + 375 + prog_fd = load_test_prog(extra_fds, 3); 376 + if (!ASSERT_EQ(prog_fd, -EINVAL, "prog should have been rejected with -EINVAL")) 377 + goto cleanup; 378 + 379 + /* Validate that the prog is ok if trash is removed */ 380 + Close(extra_fds[2]); 381 + extra_fds[2] = new_btf(); 382 + if (!ASSERT_GE(extra_fds[2], 0, "new_btf")) 383 + goto cleanup; 384 + 385 + prog_fd = load_test_prog(extra_fds, 3); 386 + if (!ASSERT_GE(prog_fd, 0, "prog should have been loaded")) 387 + goto cleanup; 388 + 389 + /* some fds might be invalid, so ignore return codes */ 390 + cleanup: 391 + Close(extra_fds[2]); 392 + Close(extra_fds[1]); 393 + Close(extra_fds[0]); 394 + } 395 + 396 + /* 397 + * Test that a program with too big fd_array can't be loaded. 398 + */ 399 + static void check_fd_array_cnt__fd_array_too_big(void) 400 + { 401 + int extra_fds[65]; 402 + int prog_fd = -1; 403 + int i; 404 + 405 + for (i = 0; i < 65; i++) { 406 + extra_fds[i] = new_map(); 407 + if (!ASSERT_GE(extra_fds[i], 0, "new_map")) 408 + goto cleanup_fds; 409 + } 410 + 411 + prog_fd = load_test_prog(extra_fds, 65); 412 + ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG"); 413 + 414 + cleanup_fds: 415 + while (i > 0) 416 + Close(extra_fds[--i]); 417 + } 418 + 419 + void test_fd_array_cnt(void) 420 + { 421 + if (test__start_subtest("no-fd-array")) 422 + check_fd_array_cnt__no_fd_array(); 423 + 424 + if (test__start_subtest("fd-array-ok")) 425 + check_fd_array_cnt__fd_array_ok(); 426 + 427 + if (test__start_subtest("fd-array-dup-input")) 428 + check_fd_array_cnt__duplicated_maps(); 429 + 430 + if (test__start_subtest("fd-array-ref-maps-in-array")) 431 + check_fd_array_cnt__referenced_maps_in_fd_array(); 432 + 433 + if (test__start_subtest("fd-array-ref-btfs")) 434 + check_fd_array_cnt__referenced_btfs(); 435 + 436 + if (test__start_subtest("fd-array-trash-input")) 437 + check_fd_array_cnt__fd_array_with_trash(); 438 + 439 + if (test__start_subtest("fd-array-2big")) 440 + check_fd_array_cnt__fd_array_too_big(); 441 + }
+3 -3
tools/testing/selftests/bpf/progs/syscall.c
··· 76 76 .magic = BTF_MAGIC, 77 77 .version = BTF_VERSION, 78 78 .hdr_len = sizeof(struct btf_header), 79 - .type_len = sizeof(__u32) * 8, 80 - .str_off = sizeof(__u32) * 8, 81 - .str_len = sizeof(__u32), 79 + .type_len = sizeof(raw_btf.types), 80 + .str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types), 81 + .str_len = sizeof(raw_btf.str), 82 82 }, 83 83 .types = { 84 84 /* long */