Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: af_unix: Use batching algorithm in bpf unix iter.

The commit 04c7820b776f ("bpf: tcp: Bpf iter batching and lock_sock")
introduces the batching algorithm to iterate TCP sockets with more
consistency.

This patch uses the same algorithm to iterate AF_UNIX sockets.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
Link: https://lore.kernel.org/r/20220113002849.4384-3-kuniyu@amazon.co.jp
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Kuniyuki Iwashima and committed by
Alexei Starovoitov
855d8e77 4408d55a

+177 -7
+177 -7
net/unix/af_unix.c
··· 3356 3356 }; 3357 3357 3358 3358 #if IS_BUILTIN(CONFIG_UNIX) && defined(CONFIG_BPF_SYSCALL) 3359 + struct bpf_unix_iter_state { 3360 + struct seq_net_private p; 3361 + unsigned int cur_sk; 3362 + unsigned int end_sk; 3363 + unsigned int max_sk; 3364 + struct sock **batch; 3365 + bool st_bucket_done; 3366 + }; 3367 + 3359 3368 struct bpf_iter__unix { 3360 3369 __bpf_md_ptr(struct bpf_iter_meta *, meta); 3361 3370 __bpf_md_ptr(struct unix_sock *, unix_sk); ··· 3383 3374 return bpf_iter_run_prog(prog, &ctx); 3384 3375 } 3385 3376 3377 + static int bpf_iter_unix_hold_batch(struct seq_file *seq, struct sock *start_sk) 3378 + 3379 + { 3380 + struct bpf_unix_iter_state *iter = seq->private; 3381 + unsigned int expected = 1; 3382 + struct sock *sk; 3383 + 3384 + sock_hold(start_sk); 3385 + iter->batch[iter->end_sk++] = start_sk; 3386 + 3387 + for (sk = sk_next(start_sk); sk; sk = sk_next(sk)) { 3388 + if (sock_net(sk) != seq_file_net(seq)) 3389 + continue; 3390 + 3391 + if (iter->end_sk < iter->max_sk) { 3392 + sock_hold(sk); 3393 + iter->batch[iter->end_sk++] = sk; 3394 + } 3395 + 3396 + expected++; 3397 + } 3398 + 3399 + spin_unlock(&unix_table_locks[start_sk->sk_hash]); 3400 + 3401 + return expected; 3402 + } 3403 + 3404 + static void bpf_iter_unix_put_batch(struct bpf_unix_iter_state *iter) 3405 + { 3406 + while (iter->cur_sk < iter->end_sk) 3407 + sock_put(iter->batch[iter->cur_sk++]); 3408 + } 3409 + 3410 + static int bpf_iter_unix_realloc_batch(struct bpf_unix_iter_state *iter, 3411 + unsigned int new_batch_sz) 3412 + { 3413 + struct sock **new_batch; 3414 + 3415 + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, 3416 + GFP_USER | __GFP_NOWARN); 3417 + if (!new_batch) 3418 + return -ENOMEM; 3419 + 3420 + bpf_iter_unix_put_batch(iter); 3421 + kvfree(iter->batch); 3422 + iter->batch = new_batch; 3423 + iter->max_sk = new_batch_sz; 3424 + 3425 + return 0; 3426 + } 3427 + 3428 + static struct sock *bpf_iter_unix_batch(struct seq_file *seq, 3429 + loff_t *pos) 3430 + { 3431 + struct bpf_unix_iter_state *iter = seq->private; 3432 + unsigned int expected; 3433 + bool resized = false; 3434 + struct sock *sk; 3435 + 3436 + if (iter->st_bucket_done) 3437 + *pos = set_bucket_offset(get_bucket(*pos) + 1, 1); 3438 + 3439 + again: 3440 + /* Get a new batch */ 3441 + iter->cur_sk = 0; 3442 + iter->end_sk = 0; 3443 + 3444 + sk = unix_get_first(seq, pos); 3445 + if (!sk) 3446 + return NULL; /* Done */ 3447 + 3448 + expected = bpf_iter_unix_hold_batch(seq, sk); 3449 + 3450 + if (iter->end_sk == expected) { 3451 + iter->st_bucket_done = true; 3452 + return sk; 3453 + } 3454 + 3455 + if (!resized && !bpf_iter_unix_realloc_batch(iter, expected * 3 / 2)) { 3456 + resized = true; 3457 + goto again; 3458 + } 3459 + 3460 + return sk; 3461 + } 3462 + 3463 + static void *bpf_iter_unix_seq_start(struct seq_file *seq, loff_t *pos) 3464 + { 3465 + if (!*pos) 3466 + return SEQ_START_TOKEN; 3467 + 3468 + /* bpf iter does not support lseek, so it always 3469 + * continue from where it was stop()-ped. 3470 + */ 3471 + return bpf_iter_unix_batch(seq, pos); 3472 + } 3473 + 3474 + static void *bpf_iter_unix_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3475 + { 3476 + struct bpf_unix_iter_state *iter = seq->private; 3477 + struct sock *sk; 3478 + 3479 + /* Whenever seq_next() is called, the iter->cur_sk is 3480 + * done with seq_show(), so advance to the next sk in 3481 + * the batch. 3482 + */ 3483 + if (iter->cur_sk < iter->end_sk) 3484 + sock_put(iter->batch[iter->cur_sk++]); 3485 + 3486 + ++*pos; 3487 + 3488 + if (iter->cur_sk < iter->end_sk) 3489 + sk = iter->batch[iter->cur_sk]; 3490 + else 3491 + sk = bpf_iter_unix_batch(seq, pos); 3492 + 3493 + return sk; 3494 + } 3495 + 3386 3496 static int bpf_iter_unix_seq_show(struct seq_file *seq, void *v) 3387 3497 { 3388 3498 struct bpf_iter_meta meta; 3389 3499 struct bpf_prog *prog; 3390 3500 struct sock *sk = v; 3391 3501 uid_t uid; 3502 + bool slow; 3503 + int ret; 3392 3504 3393 3505 if (v == SEQ_START_TOKEN) 3394 3506 return 0; 3395 3507 3508 + slow = lock_sock_fast(sk); 3509 + 3510 + if (unlikely(sk_unhashed(sk))) { 3511 + ret = SEQ_SKIP; 3512 + goto unlock; 3513 + } 3514 + 3396 3515 uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)); 3397 3516 meta.seq = seq; 3398 3517 prog = bpf_iter_get_info(&meta, false); 3399 - return unix_prog_seq_show(prog, &meta, v, uid); 3518 + ret = unix_prog_seq_show(prog, &meta, v, uid); 3519 + unlock: 3520 + unlock_sock_fast(sk, slow); 3521 + return ret; 3400 3522 } 3401 3523 3402 3524 static void bpf_iter_unix_seq_stop(struct seq_file *seq, void *v) 3403 3525 { 3526 + struct bpf_unix_iter_state *iter = seq->private; 3404 3527 struct bpf_iter_meta meta; 3405 3528 struct bpf_prog *prog; 3406 3529 ··· 3543 3402 (void)unix_prog_seq_show(prog, &meta, v, 0); 3544 3403 } 3545 3404 3546 - unix_seq_stop(seq, v); 3405 + if (iter->cur_sk < iter->end_sk) 3406 + bpf_iter_unix_put_batch(iter); 3547 3407 } 3548 3408 3549 3409 static const struct seq_operations bpf_iter_unix_seq_ops = { 3550 - .start = unix_seq_start, 3551 - .next = unix_seq_next, 3410 + .start = bpf_iter_unix_seq_start, 3411 + .next = bpf_iter_unix_seq_next, 3552 3412 .stop = bpf_iter_unix_seq_stop, 3553 3413 .show = bpf_iter_unix_seq_show, 3554 3414 }; ··· 3598 3456 DEFINE_BPF_ITER_FUNC(unix, struct bpf_iter_meta *meta, 3599 3457 struct unix_sock *unix_sk, uid_t uid) 3600 3458 3459 + #define INIT_BATCH_SZ 16 3460 + 3461 + static int bpf_iter_init_unix(void *priv_data, struct bpf_iter_aux_info *aux) 3462 + { 3463 + struct bpf_unix_iter_state *iter = priv_data; 3464 + int err; 3465 + 3466 + err = bpf_iter_init_seq_net(priv_data, aux); 3467 + if (err) 3468 + return err; 3469 + 3470 + err = bpf_iter_unix_realloc_batch(iter, INIT_BATCH_SZ); 3471 + if (err) { 3472 + bpf_iter_fini_seq_net(priv_data); 3473 + return err; 3474 + } 3475 + 3476 + return 0; 3477 + } 3478 + 3479 + static void bpf_iter_fini_unix(void *priv_data) 3480 + { 3481 + struct bpf_unix_iter_state *iter = priv_data; 3482 + 3483 + bpf_iter_fini_seq_net(priv_data); 3484 + kvfree(iter->batch); 3485 + } 3486 + 3601 3487 static const struct bpf_iter_seq_info unix_seq_info = { 3602 3488 .seq_ops = &bpf_iter_unix_seq_ops, 3603 - .init_seq_private = bpf_iter_init_seq_net, 3604 - .fini_seq_private = bpf_iter_fini_seq_net, 3605 - .seq_priv_size = sizeof(struct seq_net_private), 3489 + .init_seq_private = bpf_iter_init_unix, 3490 + .fini_seq_private = bpf_iter_fini_unix, 3491 + .seq_priv_size = sizeof(struct bpf_unix_iter_state), 3606 3492 }; 3607 3493 3608 3494 static struct bpf_iter_reg unix_reg_info = {