Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sock_map: Introduce BPF_SK_SKB_VERDICT

Reusing BPF_SK_SKB_STREAM_VERDICT is possible but its name is
confusing and more importantly we still want to distinguish them
from user-space. So we can just reuse the stream verdict code but
introduce a new type of eBPF program, skb_verdict. Users are not
allowed to attach stream_verdict and skb_verdict programs to the
same map.

Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210331023237.41094-10-xiyou.wangcong@gmail.com

authored by

Cong Wang and committed by
Alexei Starovoitov
a7ba4558 b0170552

+38 -1
+2
include/linux/skmsg.h
··· 58 58 struct bpf_prog *msg_parser; 59 59 struct bpf_prog *stream_parser; 60 60 struct bpf_prog *stream_verdict; 61 + struct bpf_prog *skb_verdict; 61 62 }; 62 63 63 64 enum sk_psock_state_bits { ··· 488 487 psock_set_prog(&progs->msg_parser, NULL); 489 488 psock_set_prog(&progs->stream_parser, NULL); 490 489 psock_set_prog(&progs->stream_verdict, NULL); 490 + psock_set_prog(&progs->skb_verdict, NULL); 491 491 } 492 492 493 493 int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
+1
include/uapi/linux/bpf.h
··· 957 957 BPF_XDP_CPUMAP, 958 958 BPF_SK_LOOKUP, 959 959 BPF_XDP, 960 + BPF_SK_SKB_VERDICT, 960 961 __MAX_BPF_ATTACH_TYPE 961 962 }; 962 963
+1
kernel/bpf/syscall.c
··· 2948 2948 return BPF_PROG_TYPE_SK_MSG; 2949 2949 case BPF_SK_SKB_STREAM_PARSER: 2950 2950 case BPF_SK_SKB_STREAM_VERDICT: 2951 + case BPF_SK_SKB_VERDICT: 2951 2952 return BPF_PROG_TYPE_SK_SKB; 2952 2953 case BPF_LIRC_MODE2: 2953 2954 return BPF_PROG_TYPE_LIRC_MODE2;
+3 -1
net/core/skmsg.c
··· 697 697 rcu_assign_sk_user_data(sk, NULL); 698 698 if (psock->progs.stream_parser) 699 699 sk_psock_stop_strp(sk, psock); 700 - else if (psock->progs.stream_verdict) 700 + else if (psock->progs.stream_verdict || psock->progs.skb_verdict) 701 701 sk_psock_stop_verdict(sk, psock); 702 702 write_unlock_bh(&sk->sk_callback_lock); 703 703 ··· 1024 1024 } 1025 1025 skb_set_owner_r(skb, sk); 1026 1026 prog = READ_ONCE(psock->progs.stream_verdict); 1027 + if (!prog) 1028 + prog = READ_ONCE(psock->progs.skb_verdict); 1027 1029 if (likely(prog)) { 1028 1030 skb_dst_drop(skb); 1029 1031 skb_bpf_redirect_clear(skb);
+28
net/core/sock_map.c
··· 156 156 strp_stop = true; 157 157 if (psock->saved_data_ready && stab->progs.stream_verdict) 158 158 verdict_stop = true; 159 + if (psock->saved_data_ready && stab->progs.skb_verdict) 160 + verdict_stop = true; 159 161 list_del(&link->list); 160 162 sk_psock_free_link(link); 161 163 } ··· 234 232 struct sk_psock_progs *progs = sock_map_progs(map); 235 233 struct bpf_prog *stream_verdict = NULL; 236 234 struct bpf_prog *stream_parser = NULL; 235 + struct bpf_prog *skb_verdict = NULL; 237 236 struct bpf_prog *msg_parser = NULL; 238 237 struct sk_psock *psock; 239 238 int ret; ··· 271 268 } 272 269 } 273 270 271 + skb_verdict = READ_ONCE(progs->skb_verdict); 272 + if (skb_verdict) { 273 + skb_verdict = bpf_prog_inc_not_zero(skb_verdict); 274 + if (IS_ERR(skb_verdict)) { 275 + ret = PTR_ERR(skb_verdict); 276 + goto out_put_msg_parser; 277 + } 278 + } 279 + 274 280 no_progs: 275 281 psock = sock_map_psock_get_checked(sk); 276 282 if (IS_ERR(psock)) { ··· 290 278 if (psock) { 291 279 if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) || 292 280 (stream_parser && READ_ONCE(psock->progs.stream_parser)) || 281 + (skb_verdict && READ_ONCE(psock->progs.skb_verdict)) || 282 + (skb_verdict && READ_ONCE(psock->progs.stream_verdict)) || 283 + (stream_verdict && READ_ONCE(psock->progs.skb_verdict)) || 293 284 (stream_verdict && READ_ONCE(psock->progs.stream_verdict))) { 294 285 sk_psock_put(sk, psock); 295 286 ret = -EBUSY; ··· 324 309 } else if (!stream_parser && stream_verdict && !psock->saved_data_ready) { 325 310 psock_set_prog(&psock->progs.stream_verdict, stream_verdict); 326 311 sk_psock_start_verdict(sk,psock); 312 + } else if (!stream_verdict && skb_verdict && !psock->saved_data_ready) { 313 + psock_set_prog(&psock->progs.skb_verdict, skb_verdict); 314 + sk_psock_start_verdict(sk, psock); 327 315 } 328 316 write_unlock_bh(&sk->sk_callback_lock); 329 317 return 0; ··· 335 317 out_drop: 336 318 sk_psock_put(sk, psock); 337 319 out_progs: 320 + if (skb_verdict) 321 + bpf_prog_put(skb_verdict); 322 + out_put_msg_parser: 338 323 if (msg_parser) 339 324 bpf_prog_put(msg_parser); 340 325 out_put_stream_parser: ··· 1463 1442 break; 1464 1443 #endif 1465 1444 case BPF_SK_SKB_STREAM_VERDICT: 1445 + if (progs->skb_verdict) 1446 + return -EBUSY; 1466 1447 pprog = &progs->stream_verdict; 1448 + break; 1449 + case BPF_SK_SKB_VERDICT: 1450 + if (progs->stream_verdict) 1451 + return -EBUSY; 1452 + pprog = &progs->skb_verdict; 1467 1453 break; 1468 1454 default: 1469 1455 return -EOPNOTSUPP;
+1
tools/bpf/bpftool/common.c
··· 57 57 58 58 [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", 59 59 [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", 60 + [BPF_SK_SKB_VERDICT] = "sk_skb_verdict", 60 61 [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", 61 62 [BPF_LIRC_MODE2] = "lirc_mode2", 62 63 [BPF_FLOW_DISSECTOR] = "flow_dissector",
+1
tools/bpf/bpftool/prog.c
··· 76 76 static const char * const attach_type_strings[] = { 77 77 [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", 78 78 [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", 79 + [BPF_SK_SKB_VERDICT] = "skb_verdict", 79 80 [BPF_SK_MSG_VERDICT] = "msg_verdict", 80 81 [BPF_FLOW_DISSECTOR] = "flow_dissector", 81 82 [__MAX_BPF_ATTACH_TYPE] = NULL,
+1
tools/include/uapi/linux/bpf.h
··· 957 957 BPF_XDP_CPUMAP, 958 958 BPF_SK_LOOKUP, 959 959 BPF_XDP, 960 + BPF_SK_SKB_VERDICT, 960 961 __MAX_BPF_ATTACH_TYPE 961 962 }; 962 963