Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'bpf-fix-the-release-of-inner-map'

Hou Tao says:

====================
bpf: Fix the release of inner map

From: Hou Tao <houtao1@huawei.com>

Hi,

The patchset aims to fix the release of inner map in map array or map
htab. The release of inner map is different with normal map. For normal
map, the map is released after the bpf program which uses the map is
destroyed, because the bpf program tracks the used maps. However bpf
program can not track the used inner map because these inner map may be
updated or deleted dynamically, and for now the ref-counter of inner map
is decreased after the inner map is remove from outer map, so the inner
map may be freed before the bpf program, which is accessing the inner
map, exits and there will be use-after-free problem as demonstrated by
patch #6.

The patchset fixes the problem by deferring the release of inner map.
The freeing of inner map is deferred according to the sleepable
attributes of the bpf programs which own the outer map. Patch #1 fixes
the warning when running the newly-added selftest under interpreter
mode. Patch #2 adds more parameters to .map_fd_put_ptr() to prepare for
the fix. Patch #3 fixes the incorrect value of need_defer when freeing
the fd array. Patch #4 fixes the potential use-after-free problem by
using call_rcu_tasks_trace() and call_rcu() to wait for one tasks trace
RCU GP and one RCU GP unconditionally. Patch #5 optimizes the free of
inner map by removing the unnecessary RCU GP waiting. Patch #6 adds a
selftest to demonstrate the potential use-after-free problem. Patch #7
updates a selftest to update outer map in syscall bpf program.

Please see individual patches for more details. And comments are always
welcome.

Change Log:
v5:
* patch #3: rename fd_array_map_delete_elem_with_deferred_free() to
__fd_array_map_delete_elem() (Alexei)
* patch #5: use atomic64_t instead of atomic_t to prevent potential
overflow (Alexei)
* patch #7: use ptr_to_u64() helper instead of force casting to initialize
pointers in bpf_attr (Alexei)

v4: https://lore.kernel.org/bpf/20231130140120.1736235-1-houtao@huaweicloud.com
* patch #2: don't use "deferred", use "need_defer" uniformly
* patch #3: newly-added, fix the incorrect value of need_defer during
fd array free.
* patch #4: doesn't consider the case in which bpf map is not used by
any bpf program and only use sleepable_refcnt to remove
unnecessary tasks trace RCU GP (Alexei)
* patch #4: remove memory barriers added due to cautiousness (Alexei)

v3: https://lore.kernel.org/bpf/20231124113033.503338-1-houtao@huaweicloud.com
* multiple variable renamings (Martin)
* define BPF_MAP_RCU_GP/BPF_MAP_RCU_TT_GP as bit (Martin)
* use call_rcu() and its variants instead of synchronize_rcu() (Martin)
* remove unnecessary mask in bpf_map_free_deferred() (Martin)
* place atomic_or() and the related smp_mb() together (Martin)
* add patch #6 to demonstrate that updating outer map in syscall
program is dead-lock free (Alexei)
* update comments about the memory barrier in bpf_map_fd_put_ptr()
* update commit message for patch #3 and #4 to describe more details

v2: https://lore.kernel.org/bpf/20231113123324.3914612-1-houtao@huaweicloud.com
* defer the invocation of ops->map_free() instead of bpf_map_put() (Martin)
* update selftest to make it being reproducible under JIT mode (Martin)
* remove unnecessary preparatory patches

v1: https://lore.kernel.org/bpf/20231107140702.1891778-1-houtao@huaweicloud.com
====================

Link: https://lore.kernel.org/r/20231204140425.1480317-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+453 -41
+13 -2
include/linux/bpf.h
··· 106 106 /* funcs called by prog_array and perf_event_array map */ 107 107 void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, 108 108 int fd); 109 - void (*map_fd_put_ptr)(void *ptr); 109 + /* If need_defer is true, the implementation should guarantee that 110 + * the to-be-put element is still alive before the bpf program, which 111 + * may manipulate it, exists. 112 + */ 113 + void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer); 110 114 int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); 111 115 u32 (*map_fd_sys_lookup_elem)(void *ptr); 112 116 void (*map_seq_show_elem)(struct bpf_map *map, void *key, ··· 276 272 */ 277 273 atomic64_t refcnt ____cacheline_aligned; 278 274 atomic64_t usercnt; 279 - struct work_struct work; 275 + /* rcu is used before freeing and work is only used during freeing */ 276 + union { 277 + struct work_struct work; 278 + struct rcu_head rcu; 279 + }; 280 280 struct mutex freeze_mutex; 281 281 atomic64_t writecnt; 282 282 /* 'Ownership' of program-containing map is claimed by the first program ··· 296 288 } owner; 297 289 bool bypass_spec_v1; 298 290 bool frozen; /* write-once; write-protected by freeze_mutex */ 291 + bool free_after_mult_rcu_gp; 292 + bool free_after_rcu_gp; 293 + atomic64_t sleepable_refcnt; 299 294 s64 __percpu *elem_count; 300 295 }; 301 296
+20 -13
kernel/bpf/arraymap.c
··· 867 867 } 868 868 869 869 if (old_ptr) 870 - map->ops->map_fd_put_ptr(old_ptr); 870 + map->ops->map_fd_put_ptr(map, old_ptr, true); 871 871 return 0; 872 872 } 873 873 874 - static long fd_array_map_delete_elem(struct bpf_map *map, void *key) 874 + static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer) 875 875 { 876 876 struct bpf_array *array = container_of(map, struct bpf_array, map); 877 877 void *old_ptr; ··· 890 890 } 891 891 892 892 if (old_ptr) { 893 - map->ops->map_fd_put_ptr(old_ptr); 893 + map->ops->map_fd_put_ptr(map, old_ptr, need_defer); 894 894 return 0; 895 895 } else { 896 896 return -ENOENT; 897 897 } 898 + } 899 + 900 + static long fd_array_map_delete_elem(struct bpf_map *map, void *key) 901 + { 902 + return __fd_array_map_delete_elem(map, key, true); 898 903 } 899 904 900 905 static void *prog_fd_array_get_ptr(struct bpf_map *map, ··· 918 913 return prog; 919 914 } 920 915 921 - static void prog_fd_array_put_ptr(void *ptr) 916 + static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) 922 917 { 918 + /* bpf_prog is freed after one RCU or tasks trace grace period */ 923 919 bpf_prog_put(ptr); 924 920 } 925 921 ··· 930 924 } 931 925 932 926 /* decrement refcnt of all bpf_progs that are stored in this map */ 933 - static void bpf_fd_array_map_clear(struct bpf_map *map) 927 + static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer) 934 928 { 935 929 struct bpf_array *array = container_of(map, struct bpf_array, map); 936 930 int i; 937 931 938 932 for (i = 0; i < array->map.max_entries; i++) 939 - fd_array_map_delete_elem(map, &i); 933 + __fd_array_map_delete_elem(map, &i, need_defer); 940 934 } 941 935 942 936 static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key, ··· 1115 1109 { 1116 1110 struct bpf_map *map = container_of(work, struct bpf_array_aux, 1117 1111 work)->map; 1118 - bpf_fd_array_map_clear(map); 1112 + bpf_fd_array_map_clear(map, true); 1119 1113 bpf_map_put(map); 1120 1114 } 1121 1115 ··· 1245 1239 return ee; 1246 1240 } 1247 1241 1248 - static void perf_event_fd_array_put_ptr(void *ptr) 1242 + static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) 1249 1243 { 1244 + /* bpf_perf_event is freed after one RCU grace period */ 1250 1245 bpf_event_entry_free_rcu(ptr); 1251 1246 } 1252 1247 ··· 1265 1258 for (i = 0; i < array->map.max_entries; i++) { 1266 1259 ee = READ_ONCE(array->ptrs[i]); 1267 1260 if (ee && ee->map_file == map_file) 1268 - fd_array_map_delete_elem(map, &i); 1261 + __fd_array_map_delete_elem(map, &i, true); 1269 1262 } 1270 1263 rcu_read_unlock(); 1271 1264 } ··· 1273 1266 static void perf_event_fd_array_map_free(struct bpf_map *map) 1274 1267 { 1275 1268 if (map->map_flags & BPF_F_PRESERVE_ELEMS) 1276 - bpf_fd_array_map_clear(map); 1269 + bpf_fd_array_map_clear(map, false); 1277 1270 fd_array_map_free(map); 1278 1271 } 1279 1272 ··· 1301 1294 return cgroup_get_from_fd(fd); 1302 1295 } 1303 1296 1304 - static void cgroup_fd_array_put_ptr(void *ptr) 1297 + static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) 1305 1298 { 1306 1299 /* cgroup_put free cgrp after a rcu grace period */ 1307 1300 cgroup_put(ptr); ··· 1309 1302 1310 1303 static void cgroup_fd_array_free(struct bpf_map *map) 1311 1304 { 1312 - bpf_fd_array_map_clear(map); 1305 + bpf_fd_array_map_clear(map, false); 1313 1306 fd_array_map_free(map); 1314 1307 } 1315 1308 ··· 1354 1347 * is protected by fdget/fdput. 1355 1348 */ 1356 1349 bpf_map_meta_free(map->inner_map_meta); 1357 - bpf_fd_array_map_clear(map); 1350 + bpf_fd_array_map_clear(map, false); 1358 1351 fd_array_map_free(map); 1359 1352 } 1360 1353
+4
kernel/bpf/core.c
··· 2664 2664 struct bpf_map **used_maps, u32 len) 2665 2665 { 2666 2666 struct bpf_map *map; 2667 + bool sleepable; 2667 2668 u32 i; 2668 2669 2670 + sleepable = aux->sleepable; 2669 2671 for (i = 0; i < len; i++) { 2670 2672 map = used_maps[i]; 2671 2673 if (map->ops->map_poke_untrack) 2672 2674 map->ops->map_poke_untrack(map, aux); 2675 + if (sleepable) 2676 + atomic64_dec(&map->sleepable_refcnt); 2673 2677 bpf_map_put(map); 2674 2678 } 2675 2679 }
+3 -3
kernel/bpf/hashtab.c
··· 897 897 898 898 if (map->ops->map_fd_put_ptr) { 899 899 ptr = fd_htab_map_get_ptr(map, l); 900 - map->ops->map_fd_put_ptr(ptr); 900 + map->ops->map_fd_put_ptr(map, ptr, true); 901 901 } 902 902 } 903 903 ··· 2484 2484 hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { 2485 2485 void *ptr = fd_htab_map_get_ptr(map, l); 2486 2486 2487 - map->ops->map_fd_put_ptr(ptr); 2487 + map->ops->map_fd_put_ptr(map, ptr, false); 2488 2488 } 2489 2489 } 2490 2490 ··· 2525 2525 2526 2526 ret = htab_map_update_elem(map, key, &ptr, map_flags); 2527 2527 if (ret) 2528 - map->ops->map_fd_put_ptr(ptr); 2528 + map->ops->map_fd_put_ptr(map, ptr, false); 2529 2529 2530 2530 return ret; 2531 2531 }
+8 -5
kernel/bpf/helpers.c
··· 32 32 * 33 33 * Different map implementations will rely on rcu in map methods 34 34 * lookup/update/delete, therefore eBPF programs must run under rcu lock 35 - * if program is allowed to access maps, so check rcu_read_lock_held in 36 - * all three functions. 35 + * if program is allowed to access maps, so check rcu_read_lock_held() or 36 + * rcu_read_lock_trace_held() in all three functions. 37 37 */ 38 38 BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) 39 39 { 40 - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 40 + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 41 + !rcu_read_lock_bh_held()); 41 42 return (unsigned long) map->ops->map_lookup_elem(map, key); 42 43 } 43 44 ··· 54 53 BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, 55 54 void *, value, u64, flags) 56 55 { 57 - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 56 + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 57 + !rcu_read_lock_bh_held()); 58 58 return map->ops->map_update_elem(map, key, value, flags); 59 59 } 60 60 ··· 72 70 73 71 BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) 74 72 { 75 - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); 73 + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && 74 + !rcu_read_lock_bh_held()); 76 75 return map->ops->map_delete_elem(map, key); 77 76 } 78 77
+13 -4
kernel/bpf/map_in_map.c
··· 127 127 return inner_map; 128 128 } 129 129 130 - void bpf_map_fd_put_ptr(void *ptr) 130 + void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) 131 131 { 132 - /* ptr->ops->map_free() has to go through one 133 - * rcu grace period by itself. 132 + struct bpf_map *inner_map = ptr; 133 + 134 + /* Defer the freeing of inner map according to the sleepable attribute 135 + * of bpf program which owns the outer map, so unnecessary waiting for 136 + * RCU tasks trace grace period can be avoided. 134 137 */ 135 - bpf_map_put(ptr); 138 + if (need_defer) { 139 + if (atomic64_read(&map->sleepable_refcnt)) 140 + WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); 141 + else 142 + WRITE_ONCE(inner_map->free_after_rcu_gp, true); 143 + } 144 + bpf_map_put(inner_map); 136 145 } 137 146 138 147 u32 bpf_map_fd_sys_lookup_elem(void *ptr)
+1 -1
kernel/bpf/map_in_map.h
··· 13 13 void bpf_map_meta_free(struct bpf_map *map_meta); 14 14 void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, 15 15 int ufd); 16 - void bpf_map_fd_put_ptr(void *ptr); 16 + void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer); 17 17 u32 bpf_map_fd_sys_lookup_elem(void *ptr); 18 18 19 19 #endif
+35 -5
kernel/bpf/syscall.c
··· 719 719 } 720 720 } 721 721 722 + static void bpf_map_free_in_work(struct bpf_map *map) 723 + { 724 + INIT_WORK(&map->work, bpf_map_free_deferred); 725 + /* Avoid spawning kworkers, since they all might contend 726 + * for the same mutex like slab_mutex. 727 + */ 728 + queue_work(system_unbound_wq, &map->work); 729 + } 730 + 731 + static void bpf_map_free_rcu_gp(struct rcu_head *rcu) 732 + { 733 + bpf_map_free_in_work(container_of(rcu, struct bpf_map, rcu)); 734 + } 735 + 736 + static void bpf_map_free_mult_rcu_gp(struct rcu_head *rcu) 737 + { 738 + if (rcu_trace_implies_rcu_gp()) 739 + bpf_map_free_rcu_gp(rcu); 740 + else 741 + call_rcu(rcu, bpf_map_free_rcu_gp); 742 + } 743 + 722 744 /* decrement map refcnt and schedule it for freeing via workqueue 723 745 * (underlying map implementation ops->map_free() might sleep) 724 746 */ ··· 750 728 /* bpf_map_free_id() must be called first */ 751 729 bpf_map_free_id(map); 752 730 btf_put(map->btf); 753 - INIT_WORK(&map->work, bpf_map_free_deferred); 754 - /* Avoid spawning kworkers, since they all might contend 755 - * for the same mutex like slab_mutex. 756 - */ 757 - queue_work(system_unbound_wq, &map->work); 731 + 732 + WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); 733 + if (READ_ONCE(map->free_after_mult_rcu_gp)) 734 + call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); 735 + else if (READ_ONCE(map->free_after_rcu_gp)) 736 + call_rcu(&map->rcu, bpf_map_free_rcu_gp); 737 + else 738 + bpf_map_free_in_work(map); 758 739 } 759 740 } 760 741 EXPORT_SYMBOL_GPL(bpf_map_put); ··· 5348 5323 goto out_unlock; 5349 5324 } 5350 5325 5326 + /* The bpf program will not access the bpf map, but for the sake of 5327 + * simplicity, increase sleepable_refcnt for sleepable program as well. 5328 + */ 5329 + if (prog->aux->sleepable) 5330 + atomic64_inc(&map->sleepable_refcnt); 5351 5331 memcpy(used_maps_new, used_maps_old, 5352 5332 sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); 5353 5333 used_maps_new[prog->aux->used_map_cnt] = map;
+3 -1
kernel/bpf/verifier.c
··· 17889 17889 return -E2BIG; 17890 17890 } 17891 17891 17892 + if (env->prog->aux->sleepable) 17893 + atomic64_inc(&map->sleepable_refcnt); 17892 17894 /* hold the map. If the program is rejected by verifier, 17893 17895 * the map will be released by release_maps() or it 17894 17896 * will be used by the valid program until it's unloaded 17895 - * and all maps are released in free_used_maps() 17897 + * and all maps are released in bpf_free_used_maps() 17896 17898 */ 17897 17899 bpf_map_inc(map); 17898 17900
+141
tools/testing/selftests/bpf/prog_tests/map_in_map.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2023. Huawei Technologies Co., Ltd */ 3 + #define _GNU_SOURCE 4 + #include <unistd.h> 5 + #include <sys/syscall.h> 6 + #include <test_progs.h> 7 + #include <bpf/btf.h> 8 + #include "access_map_in_map.skel.h" 9 + 10 + struct thread_ctx { 11 + pthread_barrier_t barrier; 12 + int outer_map_fd; 13 + int start, abort; 14 + int loop, err; 15 + }; 16 + 17 + static int wait_for_start_or_abort(struct thread_ctx *ctx) 18 + { 19 + while (!ctx->start && !ctx->abort) 20 + usleep(1); 21 + return ctx->abort ? -1 : 0; 22 + } 23 + 24 + static void *update_map_fn(void *data) 25 + { 26 + struct thread_ctx *ctx = data; 27 + int loop = ctx->loop, err = 0; 28 + 29 + if (wait_for_start_or_abort(ctx) < 0) 30 + return NULL; 31 + pthread_barrier_wait(&ctx->barrier); 32 + 33 + while (loop-- > 0) { 34 + int fd, zero = 0; 35 + 36 + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); 37 + if (fd < 0) { 38 + err |= 1; 39 + pthread_barrier_wait(&ctx->barrier); 40 + continue; 41 + } 42 + 43 + /* Remove the old inner map */ 44 + if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0) 45 + err |= 2; 46 + close(fd); 47 + pthread_barrier_wait(&ctx->barrier); 48 + } 49 + 50 + ctx->err = err; 51 + 52 + return NULL; 53 + } 54 + 55 + static void *access_map_fn(void *data) 56 + { 57 + struct thread_ctx *ctx = data; 58 + int loop = ctx->loop; 59 + 60 + if (wait_for_start_or_abort(ctx) < 0) 61 + return NULL; 62 + pthread_barrier_wait(&ctx->barrier); 63 + 64 + while (loop-- > 0) { 65 + /* Access the old inner map */ 66 + syscall(SYS_getpgid); 67 + pthread_barrier_wait(&ctx->barrier); 68 + } 69 + 70 + return NULL; 71 + } 72 + 73 + static void test_map_in_map_access(const char *prog_name, const char *map_name) 74 + { 75 + struct access_map_in_map *skel; 76 + struct bpf_map *outer_map; 77 + struct bpf_program *prog; 78 + struct thread_ctx ctx; 79 + pthread_t tid[2]; 80 + int err; 81 + 82 + skel = access_map_in_map__open(); 83 + if (!ASSERT_OK_PTR(skel, "access_map_in_map open")) 84 + return; 85 + 86 + prog = bpf_object__find_program_by_name(skel->obj, prog_name); 87 + if (!ASSERT_OK_PTR(prog, "find program")) 88 + goto out; 89 + bpf_program__set_autoload(prog, true); 90 + 91 + outer_map = bpf_object__find_map_by_name(skel->obj, map_name); 92 + if (!ASSERT_OK_PTR(outer_map, "find map")) 93 + goto out; 94 + 95 + err = access_map_in_map__load(skel); 96 + if (!ASSERT_OK(err, "access_map_in_map load")) 97 + goto out; 98 + 99 + err = access_map_in_map__attach(skel); 100 + if (!ASSERT_OK(err, "access_map_in_map attach")) 101 + goto out; 102 + 103 + skel->bss->tgid = getpid(); 104 + 105 + memset(&ctx, 0, sizeof(ctx)); 106 + pthread_barrier_init(&ctx.barrier, NULL, 2); 107 + ctx.outer_map_fd = bpf_map__fd(outer_map); 108 + ctx.loop = 4; 109 + 110 + err = pthread_create(&tid[0], NULL, update_map_fn, &ctx); 111 + if (!ASSERT_OK(err, "close_thread")) 112 + goto out; 113 + 114 + err = pthread_create(&tid[1], NULL, access_map_fn, &ctx); 115 + if (!ASSERT_OK(err, "read_thread")) { 116 + ctx.abort = 1; 117 + pthread_join(tid[0], NULL); 118 + goto out; 119 + } 120 + 121 + ctx.start = 1; 122 + pthread_join(tid[0], NULL); 123 + pthread_join(tid[1], NULL); 124 + 125 + ASSERT_OK(ctx.err, "err"); 126 + out: 127 + access_map_in_map__destroy(skel); 128 + } 129 + 130 + void test_map_in_map(void) 131 + { 132 + if (test__start_subtest("acc_map_in_array")) 133 + test_map_in_map_access("access_map_in_array", "outer_array_map"); 134 + if (test__start_subtest("sleepable_acc_map_in_array")) 135 + test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map"); 136 + if (test__start_subtest("acc_map_in_htab")) 137 + test_map_in_map_access("access_map_in_htab", "outer_htab_map"); 138 + if (test__start_subtest("sleepable_acc_map_in_htab")) 139 + test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map"); 140 + } 141 +
+28 -2
tools/testing/selftests/bpf/prog_tests/syscall.c
··· 12 12 int btf_fd; 13 13 }; 14 14 15 - void test_syscall(void) 15 + static void test_syscall_load_prog(void) 16 16 { 17 17 static char verifier_log[8192]; 18 18 struct args ctx = { ··· 32 32 if (!ASSERT_OK_PTR(skel, "skel_load")) 33 33 goto cleanup; 34 34 35 - prog_fd = bpf_program__fd(skel->progs.bpf_prog); 35 + prog_fd = bpf_program__fd(skel->progs.load_prog); 36 36 err = bpf_prog_test_run_opts(prog_fd, &tattr); 37 37 ASSERT_EQ(err, 0, "err"); 38 38 ASSERT_EQ(tattr.retval, 1, "retval"); ··· 52 52 close(ctx.map_fd); 53 53 if (ctx.btf_fd > 0) 54 54 close(ctx.btf_fd); 55 + } 56 + 57 + static void test_syscall_update_outer_map(void) 58 + { 59 + LIBBPF_OPTS(bpf_test_run_opts, opts); 60 + struct syscall *skel; 61 + int err, prog_fd; 62 + 63 + skel = syscall__open_and_load(); 64 + if (!ASSERT_OK_PTR(skel, "skel_load")) 65 + goto cleanup; 66 + 67 + prog_fd = bpf_program__fd(skel->progs.update_outer_map); 68 + err = bpf_prog_test_run_opts(prog_fd, &opts); 69 + ASSERT_EQ(err, 0, "err"); 70 + ASSERT_EQ(opts.retval, 1, "retval"); 71 + cleanup: 72 + syscall__destroy(skel); 73 + } 74 + 75 + void test_syscall(void) 76 + { 77 + if (test__start_subtest("load_prog")) 78 + test_syscall_load_prog(); 79 + if (test__start_subtest("update_outer_map")) 80 + test_syscall_update_outer_map(); 55 81 }
+93
tools/testing/selftests/bpf/progs/access_map_in_map.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (C) 2023. Huawei Technologies Co., Ltd */ 3 + #include <linux/bpf.h> 4 + #include <time.h> 5 + #include <bpf/bpf_helpers.h> 6 + 7 + #include "bpf_misc.h" 8 + 9 + struct inner_map_type { 10 + __uint(type, BPF_MAP_TYPE_ARRAY); 11 + __uint(key_size, 4); 12 + __uint(value_size, 4); 13 + __uint(max_entries, 1); 14 + } inner_map SEC(".maps"); 15 + 16 + struct { 17 + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); 18 + __type(key, int); 19 + __type(value, int); 20 + __uint(max_entries, 1); 21 + __array(values, struct inner_map_type); 22 + } outer_array_map SEC(".maps") = { 23 + .values = { 24 + [0] = &inner_map, 25 + }, 26 + }; 27 + 28 + struct { 29 + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); 30 + __type(key, int); 31 + __type(value, int); 32 + __uint(max_entries, 1); 33 + __array(values, struct inner_map_type); 34 + } outer_htab_map SEC(".maps") = { 35 + .values = { 36 + [0] = &inner_map, 37 + }, 38 + }; 39 + 40 + char _license[] SEC("license") = "GPL"; 41 + 42 + int tgid = 0; 43 + 44 + static int acc_map_in_map(void *outer_map) 45 + { 46 + int i, key, value = 0xdeadbeef; 47 + void *inner_map; 48 + 49 + if ((bpf_get_current_pid_tgid() >> 32) != tgid) 50 + return 0; 51 + 52 + /* Find nonexistent inner map */ 53 + key = 1; 54 + inner_map = bpf_map_lookup_elem(outer_map, &key); 55 + if (inner_map) 56 + return 0; 57 + 58 + /* Find the old inner map */ 59 + key = 0; 60 + inner_map = bpf_map_lookup_elem(outer_map, &key); 61 + if (!inner_map) 62 + return 0; 63 + 64 + /* Wait for the old inner map to be replaced */ 65 + for (i = 0; i < 2048; i++) 66 + bpf_map_update_elem(inner_map, &key, &value, 0); 67 + 68 + return 0; 69 + } 70 + 71 + SEC("?kprobe/" SYS_PREFIX "sys_getpgid") 72 + int access_map_in_array(void *ctx) 73 + { 74 + return acc_map_in_map(&outer_array_map); 75 + } 76 + 77 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 78 + int sleepable_access_map_in_array(void *ctx) 79 + { 80 + return acc_map_in_map(&outer_array_map); 81 + } 82 + 83 + SEC("?kprobe/" SYS_PREFIX "sys_getpgid") 84 + int access_map_in_htab(void *ctx) 85 + { 86 + return acc_map_in_map(&outer_htab_map); 87 + } 88 + 89 + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") 90 + int sleepable_access_map_in_htab(void *ctx) 91 + { 92 + return acc_map_in_map(&outer_htab_map); 93 + }
+91 -5
tools/testing/selftests/bpf/progs/syscall.c
··· 6 6 #include <bpf/bpf_tracing.h> 7 7 #include <../../../tools/include/linux/filter.h> 8 8 #include <linux/btf.h> 9 + #include <string.h> 10 + #include <errno.h> 9 11 10 12 char _license[] SEC("license") = "GPL"; 13 + 14 + struct bpf_map { 15 + int id; 16 + } __attribute__((preserve_access_index)); 11 17 12 18 struct args { 13 19 __u64 log_buf; ··· 32 26 #define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \ 33 27 BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ 34 28 BTF_INT_ENC(encoding, bits_offset, bits) 29 + 30 + struct { 31 + __uint(type, BPF_MAP_TYPE_ARRAY); 32 + __type(key, int); 33 + __type(value, union bpf_attr); 34 + __uint(max_entries, 1); 35 + } bpf_attr_array SEC(".maps"); 36 + 37 + struct inner_map_type { 38 + __uint(type, BPF_MAP_TYPE_ARRAY); 39 + __uint(key_size, 4); 40 + __uint(value_size, 4); 41 + __uint(max_entries, 1); 42 + } inner_map SEC(".maps"); 43 + 44 + struct { 45 + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); 46 + __type(key, int); 47 + __type(value, int); 48 + __uint(max_entries, 1); 49 + __array(values, struct inner_map_type); 50 + } outer_array_map SEC(".maps") = { 51 + .values = { 52 + [0] = &inner_map, 53 + }, 54 + }; 55 + 56 + static inline __u64 ptr_to_u64(const void *ptr) 57 + { 58 + return (__u64) (unsigned long) ptr; 59 + } 35 60 36 61 static int btf_load(void) 37 62 { ··· 95 58 } 96 59 97 60 SEC("syscall") 98 - int bpf_prog(struct args *ctx) 61 + int load_prog(struct args *ctx) 99 62 { 100 63 static char license[] = "GPL"; 101 64 static struct bpf_insn insns[] = { ··· 131 94 map_create_attr.max_entries = ctx->max_entries; 132 95 map_create_attr.btf_fd = ret; 133 96 134 - prog_load_attr.license = (long) license; 135 - prog_load_attr.insns = (long) insns; 97 + prog_load_attr.license = ptr_to_u64(license); 98 + prog_load_attr.insns = ptr_to_u64(insns); 136 99 prog_load_attr.log_buf = ctx->log_buf; 137 100 prog_load_attr.log_size = ctx->log_size; 138 101 prog_load_attr.log_level = 1; ··· 144 107 insns[3].imm = ret; 145 108 146 109 map_update_attr.map_fd = ret; 147 - map_update_attr.key = (long) &key; 148 - map_update_attr.value = (long) &value; 110 + map_update_attr.key = ptr_to_u64(&key); 111 + map_update_attr.value = ptr_to_u64(&value); 149 112 ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr)); 150 113 if (ret < 0) 151 114 return ret; ··· 155 118 return ret; 156 119 ctx->prog_fd = ret; 157 120 return 1; 121 + } 122 + 123 + SEC("syscall") 124 + int update_outer_map(void *ctx) 125 + { 126 + int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err; 127 + const int attr_sz = sizeof(union bpf_attr); 128 + union bpf_attr *attr; 129 + 130 + attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero); 131 + if (!attr) 132 + goto out; 133 + 134 + memset(attr, 0, attr_sz); 135 + attr->map_id = ((struct bpf_map *)&outer_array_map)->id; 136 + outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz); 137 + if (outer_fd < 0) 138 + goto out; 139 + 140 + memset(attr, 0, attr_sz); 141 + attr->map_type = BPF_MAP_TYPE_ARRAY; 142 + attr->key_size = 4; 143 + attr->value_size = 4; 144 + attr->max_entries = 1; 145 + inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz); 146 + if (inner_fd < 0) 147 + goto out; 148 + 149 + memset(attr, 0, attr_sz); 150 + attr->map_fd = outer_fd; 151 + attr->key = ptr_to_u64(&zero); 152 + attr->value = ptr_to_u64(&inner_fd); 153 + err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz); 154 + if (err) 155 + goto out; 156 + 157 + memset(attr, 0, attr_sz); 158 + attr->map_fd = outer_fd; 159 + attr->key = ptr_to_u64(&zero); 160 + err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz); 161 + if (err) 162 + goto out; 163 + ret = 1; 164 + out: 165 + if (inner_fd >= 0) 166 + bpf_sys_close(inner_fd); 167 + if (outer_fd >= 0) 168 + bpf_sys_close(outer_fd); 169 + return ret; 158 170 }