Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'fix-sockmap'

John Fastabend says:

====================
Fix a splat introduced by recent changes to avoid skipping ingress policy
when kTLS is enabled. The RCU splat was introduced because in the non-TLS
case the caller is wrapped in an rcu_read_lock/unlock. But, in the TLS
case we have a reference to the psock and the caller did not wrap its
call in rcu_read_lock/unlock.

To fix extend the RCU section to include the redirect case which was
missed. From v1->v2 I changed the location a bit to simplify the code
some. See patch 1.

But, then Martin asked why it was not needed in the non-TLS case. The
answer for patch 1 was, as stated above, because the caller has the
rcu read lock. However, there was still a missing case where a BPF
user could in-theory line up a set of parameters to hit a case
where the code was entered from strparser side from a different context
then the initial caller. To hit this user would need a parser program
to return value greater than skb->len then an ENOMEM error could happen
in the strparser codepath triggering strparser to retry from a workqueue
and without rcu_read_lock original caller used. See patch 2 for details.

Finally, we don't actually have any selftests for parser returning a
value geater than skb->len so add one in patch 3. This is especially
needed because at least I don't have any code that uses the parser
to return value greater than skb->len. So I wouldn't have caught any
errors here in my own testing.

Thanks, John

v1->v2: simplify code in patch 1 some and add patches 2 and 3.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+40 -9
+15 -8
net/core/skmsg.c
··· 683 683 return container_of(parser, struct sk_psock, parser); 684 684 } 685 685 686 - static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb) 686 + static void sk_psock_skb_redirect(struct sk_buff *skb) 687 687 { 688 688 struct sk_psock *psock_other; 689 689 struct sock *sk_other; ··· 715 715 } 716 716 } 717 717 718 - static void sk_psock_tls_verdict_apply(struct sk_psock *psock, 719 - struct sk_buff *skb, int verdict) 718 + static void sk_psock_tls_verdict_apply(struct sk_buff *skb, int verdict) 720 719 { 721 720 switch (verdict) { 722 721 case __SK_REDIRECT: 723 - sk_psock_skb_redirect(psock, skb); 722 + sk_psock_skb_redirect(skb); 724 723 break; 725 724 case __SK_PASS: 726 725 case __SK_DROP: ··· 740 741 ret = sk_psock_bpf_run(psock, prog, skb); 741 742 ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); 742 743 } 744 + sk_psock_tls_verdict_apply(skb, ret); 743 745 rcu_read_unlock(); 744 - sk_psock_tls_verdict_apply(psock, skb, ret); 745 746 return ret; 746 747 } 747 748 EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read); ··· 769 770 } 770 771 goto out_free; 771 772 case __SK_REDIRECT: 772 - sk_psock_skb_redirect(psock, skb); 773 + sk_psock_skb_redirect(skb); 773 774 break; 774 775 case __SK_DROP: 775 776 /* fall-through */ ··· 781 782 782 783 static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb) 783 784 { 784 - struct sk_psock *psock = sk_psock_from_strp(strp); 785 + struct sk_psock *psock; 785 786 struct bpf_prog *prog; 786 787 int ret = __SK_DROP; 788 + struct sock *sk; 787 789 788 790 rcu_read_lock(); 791 + sk = strp->sk; 792 + psock = sk_psock(sk); 793 + if (unlikely(!psock)) { 794 + kfree_skb(skb); 795 + goto out; 796 + } 789 797 prog = READ_ONCE(psock->progs.skb_verdict); 790 798 if (likely(prog)) { 791 799 skb_orphan(skb); ··· 800 794 ret = sk_psock_bpf_run(psock, prog, skb); 801 795 ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb)); 802 796 } 803 - rcu_read_unlock(); 804 797 sk_psock_verdict_apply(psock, skb, ret); 798 + out: 799 + rcu_read_unlock(); 805 800 } 806 801 807 802 static int sk_psock_strp_read_done(struct strparser *strp, int err)
+7 -1
tools/testing/selftests/bpf/progs/test_sockmap_kern.h
··· 79 79 80 80 struct { 81 81 __uint(type, BPF_MAP_TYPE_ARRAY); 82 - __uint(max_entries, 2); 82 + __uint(max_entries, 3); 83 83 __type(key, int); 84 84 __type(value, int); 85 85 } sock_skb_opts SEC(".maps"); ··· 94 94 SEC("sk_skb1") 95 95 int bpf_prog1(struct __sk_buff *skb) 96 96 { 97 + int *f, two = 2; 98 + 99 + f = bpf_map_lookup_elem(&sock_skb_opts, &two); 100 + if (f && *f) { 101 + return *f; 102 + } 97 103 return skb->len; 98 104 } 99 105
+18
tools/testing/selftests/bpf/test_sockmap.c
··· 85 85 int txmsg_ktls_skb_redir; 86 86 int ktls; 87 87 int peek_flag; 88 + int skb_use_parser; 88 89 89 90 static const struct option long_options[] = { 90 91 {"help", no_argument, NULL, 'h' }, ··· 175 174 txmsg_apply = txmsg_cork = 0; 176 175 txmsg_ingress = txmsg_redir_skb = 0; 177 176 txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; 177 + skb_use_parser = 0; 178 178 } 179 179 180 180 static int test_start_subtest(const struct _test *t, struct sockmap_options *o) ··· 1213 1211 } 1214 1212 } 1215 1213 1214 + if (skb_use_parser) { 1215 + i = 2; 1216 + err = bpf_map_update_elem(map_fd[7], &i, &skb_use_parser, BPF_ANY); 1217 + } 1218 + 1216 1219 if (txmsg_drop) 1217 1220 options->drop_expected = true; 1218 1221 ··· 1657 1650 test_send(opt, cgrp); 1658 1651 } 1659 1652 1653 + static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt) 1654 + { 1655 + txmsg_pass = 1; 1656 + skb_use_parser = 512; 1657 + opt->iov_length = 256; 1658 + opt->iov_count = 1; 1659 + opt->rate = 2; 1660 + test_exec(cgrp, opt); 1661 + } 1662 + 1660 1663 char *map_names[] = { 1661 1664 "sock_map", 1662 1665 "sock_map_txmsg", ··· 1765 1748 {"txmsg test pull-data", test_txmsg_pull}, 1766 1749 {"txmsg test pop-data", test_txmsg_pop}, 1767 1750 {"txmsg test push/pop data", test_txmsg_push_pop}, 1751 + {"txmsg text ingress parser", test_txmsg_ingress_parser}, 1768 1752 }; 1769 1753 1770 1754 static int check_whitelist(struct _test *t, struct sockmap_options *opt)