Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/bpf: Add test for race in btf_try_get_module

This adds a complete test case to ensure we never take references to
modules not in MODULE_STATE_LIVE, which can lead to UAF, and it also
ensures we never access btf->kfunc_set_tab in an inconsistent state.

The test uses userfaultfd to artificially widen the race.

When run on an unpatched kernel, it leads to the following splat:

[root@(none) bpf]# ./test_progs -t bpf_mod_race/ksym
[ 55.498171] BUG: unable to handle page fault for address: fffffbfff802548b
[ 55.499206] #PF: supervisor read access in kernel mode
[ 55.499855] #PF: error_code(0x0000) - not-present page
[ 55.500555] PGD a4fa9067 P4D a4fa9067 PUD a4fa5067 PMD 1b44067 PTE 0
[ 55.501499] Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
[ 55.502195] CPU: 0 PID: 83 Comm: kworker/0:2 Tainted: G OE 5.16.0-rc4+ #151
[ 55.503388] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.15.0-1 04/01/2014
[ 55.504777] Workqueue: events bpf_prog_free_deferred
[ 55.505563] RIP: 0010:kasan_check_range+0x184/0x1d0
[ 55.509140] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[ 55.509977] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[ 55.511096] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[ 55.512143] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[ 55.513228] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[ 55.514332] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[ 55.515418] FS: 0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[ 55.516705] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 55.517560] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[ 55.518672] PKRU: 55555554
[ 55.519022] Call Trace:
[ 55.519483] <TASK>
[ 55.519884] module_put.part.0+0x2a/0x180
[ 55.520642] bpf_prog_free_deferred+0x129/0x2e0
[ 55.521478] process_one_work+0x4fa/0x9e0
[ 55.522122] ? pwq_dec_nr_in_flight+0x100/0x100
[ 55.522878] ? rwlock_bug.part.0+0x60/0x60
[ 55.523551] worker_thread+0x2eb/0x700
[ 55.524176] ? __kthread_parkme+0xd8/0xf0
[ 55.524853] ? process_one_work+0x9e0/0x9e0
[ 55.525544] kthread+0x23a/0x270
[ 55.526088] ? set_kthread_struct+0x80/0x80
[ 55.526798] ret_from_fork+0x1f/0x30
[ 55.527413] </TASK>
[ 55.527813] Modules linked in: bpf_testmod(OE) [last unloaded: bpf_testmod]
[ 55.530846] CR2: fffffbfff802548b
[ 55.531341] ---[ end trace 1af41803c054ad6d ]---
[ 55.532136] RIP: 0010:kasan_check_range+0x184/0x1d0
[ 55.535887] RSP: 0018:ffff88800560fcf0 EFLAGS: 00010282
[ 55.536711] RAX: fffffbfff802548b RBX: fffffbfff802548c RCX: ffffffff9337b6ba
[ 55.537821] RDX: fffffbfff802548c RSI: 0000000000000004 RDI: ffffffffc012a458
[ 55.538899] RBP: fffffbfff802548b R08: 0000000000000001 R09: ffffffffc012a45b
[ 55.539928] R10: fffffbfff802548b R11: 0000000000000001 R12: ffff888001b5f598
[ 55.541021] R13: ffff888004f49ac8 R14: 0000000000000000 R15: ffff888092449400
[ 55.542108] FS: 0000000000000000(0000) GS:ffff888092400000(0000) knlGS:0000000000000000
[ 55.543260]CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 55.544136] CR2: fffffbfff802548b CR3: 0000000007c10006 CR4: 0000000000770ef0
[ 55.545317] PKRU: 55555554
[ 55.545671] note: kworker/0:2[83] exited with preempt_count 1

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-11-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Kumar Kartikeya Dwivedi and committed by
Alexei Starovoitov
46565696 c1ff181f

+364
+2
net/bpf/test_run.c
··· 172 172 { 173 173 return a + 1; 174 174 } 175 + EXPORT_SYMBOL_GPL(bpf_fentry_test1); 176 + ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO); 175 177 176 178 int noinline bpf_fentry_test2(int a, u64 b) 177 179 {
+4
tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
··· 118 118 .check_set = &bpf_testmod_check_kfunc_ids, 119 119 }; 120 120 121 + extern int bpf_fentry_test1(int a); 122 + 121 123 static int bpf_testmod_init(void) 122 124 { 123 125 int ret; ··· 127 125 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); 128 126 if (ret < 0) 129 127 return ret; 128 + if (bpf_fentry_test1(0) < 0) 129 + return -EINVAL; 130 130 return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); 131 131 } 132 132
+1
tools/testing/selftests/bpf/config
··· 52 52 CONFIG_NF_DEFRAG_IPV4=y 53 53 CONFIG_NF_DEFRAG_IPV6=y 54 54 CONFIG_NF_CONNTRACK=y 55 + CONFIG_USERFAULTFD=y
+230
tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <unistd.h> 3 + #include <pthread.h> 4 + #include <sys/mman.h> 5 + #include <stdatomic.h> 6 + #include <test_progs.h> 7 + #include <sys/syscall.h> 8 + #include <linux/module.h> 9 + #include <linux/userfaultfd.h> 10 + 11 + #include "ksym_race.skel.h" 12 + #include "bpf_mod_race.skel.h" 13 + #include "kfunc_call_race.skel.h" 14 + 15 + /* This test crafts a race between btf_try_get_module and do_init_module, and 16 + * checks whether btf_try_get_module handles the invocation for a well-formed 17 + * but uninitialized module correctly. Unless the module has completed its 18 + * initcalls, the verifier should fail the program load and return ENXIO. 19 + * 20 + * userfaultfd is used to trigger a fault in an fmod_ret program, and make it 21 + * sleep, then the BPF program is loaded and the return value from verifier is 22 + * inspected. After this, the userfaultfd is closed so that the module loading 23 + * thread makes forward progress, and fmod_ret injects an error so that the 24 + * module load fails and it is freed. 25 + * 26 + * If the verifier succeeded in loading the supplied program, it will end up 27 + * taking reference to freed module, and trigger a crash when the program fd 28 + * is closed later. This is true for both kfuncs and ksyms. In both cases, 29 + * the crash is triggered inside bpf_prog_free_deferred, when module reference 30 + * is finally released. 31 + */ 32 + 33 + struct test_config { 34 + const char *str_open; 35 + void *(*bpf_open_and_load)(); 36 + void (*bpf_destroy)(void *); 37 + }; 38 + 39 + enum test_state { 40 + _TS_INVALID, 41 + TS_MODULE_LOAD, 42 + TS_MODULE_LOAD_FAIL, 43 + }; 44 + 45 + static _Atomic enum test_state state = _TS_INVALID; 46 + 47 + static int sys_finit_module(int fd, const char *param_values, int flags) 48 + { 49 + return syscall(__NR_finit_module, fd, param_values, flags); 50 + } 51 + 52 + static int sys_delete_module(const char *name, unsigned int flags) 53 + { 54 + return syscall(__NR_delete_module, name, flags); 55 + } 56 + 57 + static int load_module(const char *mod) 58 + { 59 + int ret, fd; 60 + 61 + fd = open("bpf_testmod.ko", O_RDONLY); 62 + if (fd < 0) 63 + return fd; 64 + 65 + ret = sys_finit_module(fd, "", 0); 66 + close(fd); 67 + if (ret < 0) 68 + return ret; 69 + return 0; 70 + } 71 + 72 + static void *load_module_thread(void *p) 73 + { 74 + 75 + if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) 76 + atomic_store(&state, TS_MODULE_LOAD); 77 + else 78 + atomic_store(&state, TS_MODULE_LOAD_FAIL); 79 + return p; 80 + } 81 + 82 + static int sys_userfaultfd(int flags) 83 + { 84 + return syscall(__NR_userfaultfd, flags); 85 + } 86 + 87 + static int test_setup_uffd(void *fault_addr) 88 + { 89 + struct uffdio_register uffd_register = {}; 90 + struct uffdio_api uffd_api = {}; 91 + int uffd; 92 + 93 + uffd = sys_userfaultfd(O_CLOEXEC); 94 + if (uffd < 0) 95 + return -errno; 96 + 97 + uffd_api.api = UFFD_API; 98 + uffd_api.features = 0; 99 + if (ioctl(uffd, UFFDIO_API, &uffd_api)) { 100 + close(uffd); 101 + return -1; 102 + } 103 + 104 + uffd_register.range.start = (unsigned long)fault_addr; 105 + uffd_register.range.len = 4096; 106 + uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; 107 + if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { 108 + close(uffd); 109 + return -1; 110 + } 111 + return uffd; 112 + } 113 + 114 + static void test_bpf_mod_race_config(const struct test_config *config) 115 + { 116 + void *fault_addr, *skel_fail; 117 + struct bpf_mod_race *skel; 118 + struct uffd_msg uffd_msg; 119 + pthread_t load_mod_thrd; 120 + _Atomic int *blockingp; 121 + int uffd, ret; 122 + 123 + fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 124 + if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) 125 + return; 126 + 127 + if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) 128 + goto end_mmap; 129 + 130 + skel = bpf_mod_race__open(); 131 + if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open")) 132 + goto end_module; 133 + 134 + skel->rodata->bpf_mod_race_config.tgid = getpid(); 135 + skel->rodata->bpf_mod_race_config.inject_error = -4242; 136 + skel->rodata->bpf_mod_race_config.fault_addr = fault_addr; 137 + if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load")) 138 + goto end_destroy; 139 + blockingp = (_Atomic int *)&skel->bss->bpf_blocking; 140 + 141 + if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach")) 142 + goto end_destroy; 143 + 144 + uffd = test_setup_uffd(fault_addr); 145 + if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address")) 146 + goto end_destroy; 147 + 148 + if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL), 149 + "load module thread")) 150 + goto end_uffd; 151 + 152 + /* Now, we either fail loading module, or block in bpf prog, spin to find out */ 153 + while (!atomic_load(&state) && !atomic_load(blockingp)) 154 + ; 155 + if (!ASSERT_EQ(state, _TS_INVALID, "module load should block")) 156 + goto end_join; 157 + if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) { 158 + pthread_kill(load_mod_thrd, SIGKILL); 159 + goto end_uffd; 160 + } 161 + 162 + /* We might have set bpf_blocking to 1, but may have not blocked in 163 + * bpf_copy_from_user. Read userfaultfd descriptor to verify that. 164 + */ 165 + if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg), 166 + "read uffd block event")) 167 + goto end_join; 168 + if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault")) 169 + goto end_join; 170 + 171 + /* We know that load_mod_thrd is blocked in the fmod_ret program, the 172 + * module state is still MODULE_STATE_COMING because mod->init hasn't 173 + * returned. This is the time we try to load a program calling kfunc and 174 + * check if we get ENXIO from verifier. 175 + */ 176 + skel_fail = config->bpf_open_and_load(); 177 + ret = errno; 178 + if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) { 179 + /* Close uffd to unblock load_mod_thrd */ 180 + close(uffd); 181 + uffd = -1; 182 + while (atomic_load(blockingp) != 2) 183 + ; 184 + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); 185 + config->bpf_destroy(skel_fail); 186 + goto end_join; 187 + 188 + } 189 + ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO"); 190 + ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false"); 191 + 192 + close(uffd); 193 + uffd = -1; 194 + end_join: 195 + pthread_join(load_mod_thrd, NULL); 196 + if (uffd < 0) 197 + ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success"); 198 + end_uffd: 199 + if (uffd >= 0) 200 + close(uffd); 201 + end_destroy: 202 + bpf_mod_race__destroy(skel); 203 + ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); 204 + end_module: 205 + sys_delete_module("bpf_testmod", 0); 206 + ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); 207 + end_mmap: 208 + munmap(fault_addr, 4096); 209 + atomic_store(&state, _TS_INVALID); 210 + } 211 + 212 + static const struct test_config ksym_config = { 213 + .str_open = "ksym_race__open_and_load", 214 + .bpf_open_and_load = (void *)ksym_race__open_and_load, 215 + .bpf_destroy = (void *)ksym_race__destroy, 216 + }; 217 + 218 + static const struct test_config kfunc_config = { 219 + .str_open = "kfunc_call_race__open_and_load", 220 + .bpf_open_and_load = (void *)kfunc_call_race__open_and_load, 221 + .bpf_destroy = (void *)kfunc_call_race__destroy, 222 + }; 223 + 224 + void serial_test_bpf_mod_race(void) 225 + { 226 + if (test__start_subtest("ksym (used_btfs UAF)")) 227 + test_bpf_mod_race_config(&ksym_config); 228 + if (test__start_subtest("kfunc (kfunc_btf_tab UAF)")) 229 + test_bpf_mod_race_config(&kfunc_config); 230 + }
+100
tools/testing/selftests/bpf/progs/bpf_mod_race.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + 6 + const volatile struct { 7 + /* thread to activate trace programs for */ 8 + pid_t tgid; 9 + /* return error from __init function */ 10 + int inject_error; 11 + /* uffd monitored range start address */ 12 + void *fault_addr; 13 + } bpf_mod_race_config = { -1 }; 14 + 15 + int bpf_blocking = 0; 16 + int res_try_get_module = -1; 17 + 18 + static __always_inline bool check_thread_id(void) 19 + { 20 + struct task_struct *task = bpf_get_current_task_btf(); 21 + 22 + return task->tgid == bpf_mod_race_config.tgid; 23 + } 24 + 25 + /* The trace of execution is something like this: 26 + * 27 + * finit_module() 28 + * load_module() 29 + * prepare_coming_module() 30 + * notifier_call(MODULE_STATE_COMING) 31 + * btf_parse_module() 32 + * btf_alloc_id() // Visible to userspace at this point 33 + * list_add(btf_mod->list, &btf_modules) 34 + * do_init_module() 35 + * freeinit = kmalloc() 36 + * ret = mod->init() 37 + * bpf_prog_widen_race() 38 + * bpf_copy_from_user() 39 + * ...<sleep>... 40 + * if (ret < 0) 41 + * ... 42 + * free_module() 43 + * return ret 44 + * 45 + * At this point, module loading thread is blocked, we now load the program: 46 + * 47 + * bpf_check 48 + * add_kfunc_call/check_pseudo_btf_id 49 + * btf_try_get_module 50 + * try_get_module_live == false 51 + * return -ENXIO 52 + * 53 + * Without the fix (try_get_module_live in btf_try_get_module): 54 + * 55 + * bpf_check 56 + * add_kfunc_call/check_pseudo_btf_id 57 + * btf_try_get_module 58 + * try_get_module == true 59 + * <store module reference in btf_kfunc_tab or used_btf array> 60 + * ... 61 + * return fd 62 + * 63 + * Now, if we inject an error in the blocked program, our module will be freed 64 + * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING). 65 + * Later, when bpf program is freed, it will try to module_put already freed 66 + * module. This is why try_get_module_live returns false if mod->state is not 67 + * MODULE_STATE_LIVE. 68 + */ 69 + 70 + SEC("fmod_ret.s/bpf_fentry_test1") 71 + int BPF_PROG(widen_race, int a, int ret) 72 + { 73 + char dst; 74 + 75 + if (!check_thread_id()) 76 + return 0; 77 + /* Indicate that we will attempt to block */ 78 + bpf_blocking = 1; 79 + bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr); 80 + return bpf_mod_race_config.inject_error; 81 + } 82 + 83 + SEC("fexit/do_init_module") 84 + int BPF_PROG(fexit_init_module, struct module *mod, int ret) 85 + { 86 + if (!check_thread_id()) 87 + return 0; 88 + /* Indicate that we finished blocking */ 89 + bpf_blocking = 2; 90 + return 0; 91 + } 92 + 93 + SEC("fexit/btf_try_get_module") 94 + int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod) 95 + { 96 + res_try_get_module = !!mod; 97 + return 0; 98 + } 99 + 100 + char _license[] SEC("license") = "GPL";
+14
tools/testing/selftests/bpf/progs/kfunc_call_race.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_helpers.h> 4 + 5 + extern void bpf_testmod_test_mod_kfunc(int i) __ksym; 6 + 7 + SEC("tc") 8 + int kfunc_call_fail(struct __sk_buff *ctx) 9 + { 10 + bpf_testmod_test_mod_kfunc(0); 11 + return 0; 12 + } 13 + 14 + char _license[] SEC("license") = "GPL";
+13
tools/testing/selftests/bpf/progs/ksym_race.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <vmlinux.h> 3 + #include <bpf/bpf_helpers.h> 4 + 5 + extern int bpf_testmod_ksym_percpu __ksym; 6 + 7 + SEC("tc") 8 + int ksym_fail(struct __sk_buff *ctx) 9 + { 10 + return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu); 11 + } 12 + 13 + char _license[] SEC("license") = "GPL";