Merge tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Pull bpf fixes from Alexei Starovoitov:

- Replace bpf_map_kmalloc_node() with kmalloc_nolock() to fix kmemleak
imbalance in tracking of bpf_async_cb structures (Alexei Starovoitov)

- Make selftests/bpf arg_parsing.c more robust to errors (Andrii
Nakryiko)

- Fix redefinition of 'off' as different kind of symbol when I40E
driver is builtin (Brahmajit Das)

- Do not disable preemption in bpf_test_run (Sahil Chandna)

- Fix memory leak in __lookup_instance error path (Shardul Bankar)

- Ensure test data is flushed to disk before reading it (Xing Guo)

* tag 'bpf-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
selftests/bpf: Fix redefinition of 'off' as different kind of symbol
bpf: Do not disable preemption in bpf_test_run().
bpf: Fix memory leak in __lookup_instance error path
selftests: arg_parsing: Ensure data is flushed to disk before reading.
bpf: Replace bpf_map_kmalloc_node() with kmalloc_nolock() to allocate bpf_async_cb structures.
selftests/bpf: make arg_parsing.c more robust to crashes
bpf: test_run: Fix ctx leak in bpf_prog_test_run_xdp error path

+59 -40
+4
include/linux/bpf.h
··· 2499 #ifdef CONFIG_MEMCG 2500 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 2501 int node); 2502 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); 2503 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 2504 gfp_t flags); ··· 2513 */ 2514 #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ 2515 kmalloc_node(_size, _flags, _node) 2516 #define bpf_map_kzalloc(_map, _size, _flags) \ 2517 kzalloc(_size, _flags) 2518 #define bpf_map_kvcalloc(_map, _n, _size, _flags) \
··· 2499 #ifdef CONFIG_MEMCG 2500 void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags, 2501 int node); 2502 + void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 2503 + int node); 2504 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags); 2505 void *bpf_map_kvcalloc(struct bpf_map *map, size_t n, size_t size, 2506 gfp_t flags); ··· 2511 */ 2512 #define bpf_map_kmalloc_node(_map, _size, _flags, _node) \ 2513 kmalloc_node(_size, _flags, _node) 2514 + #define bpf_map_kmalloc_nolock(_map, _size, _flags, _node) \ 2515 + kmalloc_nolock(_size, _flags, _node) 2516 #define bpf_map_kzalloc(_map, _size, _flags) \ 2517 kzalloc(_size, _flags) 2518 #define bpf_map_kvcalloc(_map, _n, _size, _flags) \
+14 -11
kernel/bpf/helpers.c
··· 1215 rcu_read_unlock_trace(); 1216 } 1217 1218 static void bpf_wq_delete_work(struct work_struct *work) 1219 { 1220 struct bpf_work *w = container_of(work, struct bpf_work, delete_work); 1221 1222 cancel_work_sync(&w->work); 1223 1224 - kfree_rcu(w, cb.rcu); 1225 } 1226 1227 static void bpf_timer_delete_work(struct work_struct *work) ··· 1237 1238 /* Cancel the timer and wait for callback to complete if it was running. 1239 * If hrtimer_cancel() can be safely called it's safe to call 1240 - * kfree_rcu(t) right after for both preallocated and non-preallocated 1241 * maps. The async->cb = NULL was already done and no code path can see 1242 * address 't' anymore. Timer if armed for existing bpf_hrtimer before 1243 * bpf_timer_cancel_and_free will have been cancelled. 1244 */ 1245 hrtimer_cancel(&t->timer); 1246 - kfree_rcu(t, cb.rcu); 1247 } 1248 1249 static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, ··· 1277 goto out; 1278 } 1279 1280 - /* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until 1281 - * kmalloc_nolock() is available, avoid locking issues by using 1282 - * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM). 1283 - */ 1284 - cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); 1285 if (!cb) { 1286 ret = -ENOMEM; 1287 goto out; ··· 1318 * or pinned in bpffs. 1319 */ 1320 WRITE_ONCE(async->cb, NULL); 1321 - kfree(cb); 1322 ret = -EPERM; 1323 } 1324 out: ··· 1583 * timer _before_ calling us, such that failing to cancel it here will 1584 * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. 1585 * Therefore, we _need_ to cancel any outstanding timers before we do 1586 - * kfree_rcu, even though no more timers can be armed. 1587 * 1588 * Moreover, we need to schedule work even if timer does not belong to 1589 * the calling callback_fn, as on two different CPUs, we can end up in a ··· 1610 * completion. 1611 */ 1612 if (hrtimer_try_to_cancel(&t->timer) >= 0) 1613 - kfree_rcu(t, cb.rcu); 1614 else 1615 queue_work(system_dfl_wq, &t->cb.delete_work); 1616 } else {
··· 1215 rcu_read_unlock_trace(); 1216 } 1217 1218 + static void bpf_async_cb_rcu_free(struct rcu_head *rcu) 1219 + { 1220 + struct bpf_async_cb *cb = container_of(rcu, struct bpf_async_cb, rcu); 1221 + 1222 + kfree_nolock(cb); 1223 + } 1224 + 1225 static void bpf_wq_delete_work(struct work_struct *work) 1226 { 1227 struct bpf_work *w = container_of(work, struct bpf_work, delete_work); 1228 1229 cancel_work_sync(&w->work); 1230 1231 + call_rcu(&w->cb.rcu, bpf_async_cb_rcu_free); 1232 } 1233 1234 static void bpf_timer_delete_work(struct work_struct *work) ··· 1230 1231 /* Cancel the timer and wait for callback to complete if it was running. 1232 * If hrtimer_cancel() can be safely called it's safe to call 1233 + * call_rcu() right after for both preallocated and non-preallocated 1234 * maps. The async->cb = NULL was already done and no code path can see 1235 * address 't' anymore. Timer if armed for existing bpf_hrtimer before 1236 * bpf_timer_cancel_and_free will have been cancelled. 1237 */ 1238 hrtimer_cancel(&t->timer); 1239 + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); 1240 } 1241 1242 static int __bpf_async_init(struct bpf_async_kern *async, struct bpf_map *map, u64 flags, ··· 1270 goto out; 1271 } 1272 1273 + cb = bpf_map_kmalloc_nolock(map, size, 0, map->numa_node); 1274 if (!cb) { 1275 ret = -ENOMEM; 1276 goto out; ··· 1315 * or pinned in bpffs. 1316 */ 1317 WRITE_ONCE(async->cb, NULL); 1318 + kfree_nolock(cb); 1319 ret = -EPERM; 1320 } 1321 out: ··· 1580 * timer _before_ calling us, such that failing to cancel it here will 1581 * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. 1582 * Therefore, we _need_ to cancel any outstanding timers before we do 1583 + * call_rcu, even though no more timers can be armed. 1584 * 1585 * Moreover, we need to schedule work even if timer does not belong to 1586 * the calling callback_fn, as on two different CPUs, we can end up in a ··· 1607 * completion. 1608 */ 1609 if (hrtimer_try_to_cancel(&t->timer) >= 0) 1610 + call_rcu(&t->cb.rcu, bpf_async_cb_rcu_free); 1611 else 1612 queue_work(system_dfl_wq, &t->cb.delete_work); 1613 } else {
+3 -1
kernel/bpf/liveness.c
··· 195 return ERR_PTR(-ENOMEM); 196 result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set), 197 GFP_KERNEL_ACCOUNT); 198 - if (!result->must_write_set) 199 return ERR_PTR(-ENOMEM); 200 memcpy(&result->callchain, callchain, sizeof(*callchain)); 201 result->insn_cnt = subprog_sz; 202 hash_add(liveness->func_instances, &result->hl_node, key);
··· 195 return ERR_PTR(-ENOMEM); 196 result->must_write_set = kvcalloc(subprog_sz, sizeof(*result->must_write_set), 197 GFP_KERNEL_ACCOUNT); 198 + if (!result->must_write_set) { 199 + kvfree(result); 200 return ERR_PTR(-ENOMEM); 201 + } 202 memcpy(&result->callchain, callchain, sizeof(*callchain)); 203 result->insn_cnt = subprog_sz; 204 hash_add(liveness->func_instances, &result->hl_node, key);
+15
kernel/bpf/syscall.c
··· 520 return ptr; 521 } 522 523 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 524 { 525 struct mem_cgroup *memcg, *old_memcg;
··· 520 return ptr; 521 } 522 523 + void *bpf_map_kmalloc_nolock(const struct bpf_map *map, size_t size, gfp_t flags, 524 + int node) 525 + { 526 + struct mem_cgroup *memcg, *old_memcg; 527 + void *ptr; 528 + 529 + memcg = bpf_map_get_memcg(map); 530 + old_memcg = set_active_memcg(memcg); 531 + ptr = kmalloc_nolock(size, flags | __GFP_ACCOUNT, node); 532 + set_active_memcg(old_memcg); 533 + mem_cgroup_put(memcg); 534 + 535 + return ptr; 536 + } 537 + 538 void *bpf_map_kzalloc(const struct bpf_map *map, size_t size, gfp_t flags) 539 { 540 struct mem_cgroup *memcg, *old_memcg;
+7 -18
net/bpf/test_run.c
··· 29 #include <trace/events/bpf_test_run.h> 30 31 struct bpf_test_timer { 32 - enum { NO_PREEMPT, NO_MIGRATE } mode; 33 u32 i; 34 u64 time_start, time_spent; 35 }; ··· 36 static void bpf_test_timer_enter(struct bpf_test_timer *t) 37 __acquires(rcu) 38 { 39 - rcu_read_lock(); 40 - if (t->mode == NO_PREEMPT) 41 - preempt_disable(); 42 - else 43 - migrate_disable(); 44 - 45 t->time_start = ktime_get_ns(); 46 } 47 ··· 44 __releases(rcu) 45 { 46 t->time_start = 0; 47 - 48 - if (t->mode == NO_PREEMPT) 49 - preempt_enable(); 50 - else 51 - migrate_enable(); 52 - rcu_read_unlock(); 53 } 54 55 static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, ··· 363 364 { 365 struct xdp_test_data xdp = { .batch_size = batch_size }; 366 - struct bpf_test_timer t = { .mode = NO_MIGRATE }; 367 int ret; 368 369 if (!repeat) ··· 393 struct bpf_prog_array_item item = {.prog = prog}; 394 struct bpf_run_ctx *old_ctx; 395 struct bpf_cg_run_ctx run_ctx; 396 - struct bpf_test_timer t = { NO_MIGRATE }; 397 enum bpf_cgroup_storage_type stype; 398 int ret; 399 ··· 1258 goto free_ctx; 1259 1260 if (kattr->test.data_size_in - meta_sz < ETH_HLEN) 1261 - return -EINVAL; 1262 1263 data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); 1264 if (IS_ERR(data)) { ··· 1366 const union bpf_attr *kattr, 1367 union bpf_attr __user *uattr) 1368 { 1369 - struct bpf_test_timer t = { NO_PREEMPT }; 1370 u32 size = kattr->test.data_size_in; 1371 struct bpf_flow_dissector ctx = {}; 1372 u32 repeat = kattr->test.repeat; ··· 1434 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, 1435 union bpf_attr __user *uattr) 1436 { 1437 - struct bpf_test_timer t = { NO_PREEMPT }; 1438 struct bpf_prog_array *progs = NULL; 1439 struct bpf_sk_lookup_kern ctx = {}; 1440 u32 repeat = kattr->test.repeat;
··· 29 #include <trace/events/bpf_test_run.h> 30 31 struct bpf_test_timer { 32 u32 i; 33 u64 time_start, time_spent; 34 }; ··· 37 static void bpf_test_timer_enter(struct bpf_test_timer *t) 38 __acquires(rcu) 39 { 40 + rcu_read_lock_dont_migrate(); 41 t->time_start = ktime_get_ns(); 42 } 43 ··· 50 __releases(rcu) 51 { 52 t->time_start = 0; 53 + rcu_read_unlock_migrate(); 54 } 55 56 static bool bpf_test_timer_continue(struct bpf_test_timer *t, int iterations, ··· 374 375 { 376 struct xdp_test_data xdp = { .batch_size = batch_size }; 377 + struct bpf_test_timer t = {}; 378 int ret; 379 380 if (!repeat) ··· 404 struct bpf_prog_array_item item = {.prog = prog}; 405 struct bpf_run_ctx *old_ctx; 406 struct bpf_cg_run_ctx run_ctx; 407 + struct bpf_test_timer t = {}; 408 enum bpf_cgroup_storage_type stype; 409 int ret; 410 ··· 1269 goto free_ctx; 1270 1271 if (kattr->test.data_size_in - meta_sz < ETH_HLEN) 1272 + goto free_ctx; 1273 1274 data = bpf_test_init(kattr, linear_sz, max_linear_sz, headroom, tailroom); 1275 if (IS_ERR(data)) { ··· 1377 const union bpf_attr *kattr, 1378 union bpf_attr __user *uattr) 1379 { 1380 + struct bpf_test_timer t = {}; 1381 u32 size = kattr->test.data_size_in; 1382 struct bpf_flow_dissector ctx = {}; 1383 u32 repeat = kattr->test.repeat; ··· 1445 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr, 1446 union bpf_attr __user *uattr) 1447 { 1448 + struct bpf_test_timer t = {}; 1449 struct bpf_prog_array *progs = NULL; 1450 struct bpf_sk_lookup_kern ctx = {}; 1451 u32 repeat = kattr->test.repeat;
+9 -3
tools/testing/selftests/bpf/prog_tests/arg_parsing.c
··· 144 if (!ASSERT_OK(ferror(fp), "prepare tmp")) 145 goto out_fclose; 146 147 init_test_filter_set(&set); 148 149 - ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"); 150 151 - ASSERT_EQ(set.cnt, 4, "test count"); 152 ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); 153 ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); 154 ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); ··· 164 ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); 165 ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); 166 167 free_test_filter_set(&set); 168 - 169 out_fclose: 170 fclose(fp); 171 out_remove:
··· 144 if (!ASSERT_OK(ferror(fp), "prepare tmp")) 145 goto out_fclose; 146 147 + if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp")) 148 + goto out_fclose; 149 + 150 init_test_filter_set(&set); 151 152 + if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file")) 153 + goto out_fclose; 154 155 + if (!ASSERT_EQ(set.cnt, 4, "test count")) 156 + goto out_free_set; 157 + 158 ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); 159 ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); 160 ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); ··· 158 ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); 159 ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); 160 161 + out_free_set: 162 free_test_filter_set(&set); 163 out_fclose: 164 fclose(fp); 165 out_remove:
+7 -7
tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
··· 225 } 226 227 char mem[16]; 228 - u32 off; 229 230 SEC("tp_btf/sys_enter") 231 __success ··· 240 /* scalar to untrusted */ 241 subprog_untrusted(0); 242 /* variable offset to untrusted (map) */ 243 - subprog_untrusted((void *)mem + off); 244 /* variable offset to untrusted (trusted) */ 245 - subprog_untrusted((void *)bpf_get_current_task_btf() + off); 246 return 0; 247 } 248 ··· 298 /* scalar to untrusted mem */ 299 subprog_void_untrusted(0); 300 /* variable offset to untrusted mem (map) */ 301 - subprog_void_untrusted((void *)mem + off); 302 /* variable offset to untrusted mem (trusted) */ 303 - subprog_void_untrusted(bpf_get_current_task_btf() + off); 304 /* variable offset to untrusted char/enum (map) */ 305 - subprog_char_untrusted(mem + off); 306 - subprog_enum_untrusted((void *)mem + off); 307 return 0; 308 } 309
··· 225 } 226 227 char mem[16]; 228 + u32 offset; 229 230 SEC("tp_btf/sys_enter") 231 __success ··· 240 /* scalar to untrusted */ 241 subprog_untrusted(0); 242 /* variable offset to untrusted (map) */ 243 + subprog_untrusted((void *)mem + offset); 244 /* variable offset to untrusted (trusted) */ 245 + subprog_untrusted((void *)bpf_get_current_task_btf() + offset); 246 return 0; 247 } 248 ··· 298 /* scalar to untrusted mem */ 299 subprog_void_untrusted(0); 300 /* variable offset to untrusted mem (map) */ 301 + subprog_void_untrusted((void *)mem + offset); 302 /* variable offset to untrusted mem (trusted) */ 303 + subprog_void_untrusted(bpf_get_current_task_btf() + offset); 304 /* variable offset to untrusted char/enum (map) */ 305 + subprog_char_untrusted(mem + offset); 306 + subprog_enum_untrusted((void *)mem + offset); 307 return 0; 308 } 309