Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf, net: Rework cookie generator as per-cpu one

With its use in BPF, the cookie generator can be called very frequently
in particular when used out of cgroup v2 hooks (e.g. connect / sendmsg)
and attached to the root cgroup, for example, when used in v1/v2 mixed
environments. In particular, when there's a high churn on sockets in the
system there can be many parallel requests to the bpf_get_socket_cookie()
and bpf_get_netns_cookie() helpers which then cause contention on the
atomic counter.

As similarly done in f991bd2e1421 ("fs: introduce a per-cpu last_ino
allocator"), add a small helper library that both can use for the 64 bit
counters. Given this can be called from different contexts, we also need
to deal with potential nested calls even though in practice they are
considered extremely rare. One idea as suggested by Eric Dumazet was
to use a reverse counter for this situation since we don't expect 64 bit
overflows anyways; that way, we can avoid bigger gaps in the 64 bit
counter space compared to just batch-wise increase. Even on machines
with small number of cores (e.g. 4) the cookie generation shrinks from
min/max/med/avg (ns) of 22/50/40/38.9 down to 10/35/14/17.3 when run
in parallel from multiple CPUs.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Link: https://lore.kernel.org/bpf/8a80b8d27d3c49f9a14e1d5213c19d8be87d1dc8.1601477936.git.daniel@iogearbox.net

authored by

Daniel Borkmann and committed by
Alexei Starovoitov
92acdc58 b426ce83

+86 -18
+51
include/linux/cookie.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef __LINUX_COOKIE_H 3 + #define __LINUX_COOKIE_H 4 + 5 + #include <linux/atomic.h> 6 + #include <linux/percpu.h> 7 + #include <asm/local.h> 8 + 9 + struct pcpu_gen_cookie { 10 + local_t nesting; 11 + u64 last; 12 + } __aligned(16); 13 + 14 + struct gen_cookie { 15 + struct pcpu_gen_cookie __percpu *local; 16 + atomic64_t forward_last ____cacheline_aligned_in_smp; 17 + atomic64_t reverse_last; 18 + }; 19 + 20 + #define COOKIE_LOCAL_BATCH 4096 21 + 22 + #define DEFINE_COOKIE(name) \ 23 + static DEFINE_PER_CPU(struct pcpu_gen_cookie, __##name); \ 24 + static struct gen_cookie name = { \ 25 + .local = &__##name, \ 26 + .forward_last = ATOMIC64_INIT(0), \ 27 + .reverse_last = ATOMIC64_INIT(0), \ 28 + } 29 + 30 + static __always_inline u64 gen_cookie_next(struct gen_cookie *gc) 31 + { 32 + struct pcpu_gen_cookie *local = this_cpu_ptr(gc->local); 33 + u64 val; 34 + 35 + if (likely(local_inc_return(&local->nesting) == 1)) { 36 + val = local->last; 37 + if (__is_defined(CONFIG_SMP) && 38 + unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) { 39 + s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH, 40 + &gc->forward_last); 41 + val = next - COOKIE_LOCAL_BATCH; 42 + } 43 + local->last = ++val; 44 + } else { 45 + val = atomic64_dec_return(&gc->reverse_last); 46 + } 47 + local_dec(&local->nesting); 48 + return val; 49 + } 50 + 51 + #endif /* __LINUX_COOKIE_H */
+13 -1
include/linux/sock_diag.h
··· 25 25 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); 26 26 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); 27 27 28 - u64 sock_gen_cookie(struct sock *sk); 28 + u64 __sock_gen_cookie(struct sock *sk); 29 + 30 + static inline u64 sock_gen_cookie(struct sock *sk) 31 + { 32 + u64 cookie; 33 + 34 + preempt_disable(); 35 + cookie = __sock_gen_cookie(sk); 36 + preempt_enable(); 37 + 38 + return cookie; 39 + } 40 + 29 41 int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); 30 42 void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); 31 43
+1 -1
include/net/net_namespace.h
··· 230 230 struct net *get_net_ns_by_pid(pid_t pid); 231 231 struct net *get_net_ns_by_fd(int fd); 232 232 233 - u64 net_gen_cookie(struct net *net); 233 + u64 __net_gen_cookie(struct net *net); 234 234 235 235 #ifdef CONFIG_SYSCTL 236 236 void ipx_register_sysctl(void);
+1 -1
kernel/bpf/reuseport_array.c
··· 191 191 rcu_read_lock(); 192 192 sk = reuseport_array_lookup_elem(map, key); 193 193 if (sk) { 194 - *(u64 *)value = sock_gen_cookie(sk); 194 + *(u64 *)value = __sock_gen_cookie(sk); 195 195 err = 0; 196 196 } else { 197 197 err = -ENOENT;
+5 -5
net/core/filter.c
··· 4232 4232 4233 4233 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) 4234 4234 { 4235 - return skb->sk ? sock_gen_cookie(skb->sk) : 0; 4235 + return skb->sk ? __sock_gen_cookie(skb->sk) : 0; 4236 4236 } 4237 4237 4238 4238 static const struct bpf_func_proto bpf_get_socket_cookie_proto = { ··· 4244 4244 4245 4245 BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx) 4246 4246 { 4247 - return sock_gen_cookie(ctx->sk); 4247 + return __sock_gen_cookie(ctx->sk); 4248 4248 } 4249 4249 4250 4250 static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = { ··· 4256 4256 4257 4257 BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx) 4258 4258 { 4259 - return sock_gen_cookie(ctx); 4259 + return __sock_gen_cookie(ctx); 4260 4260 } 4261 4261 4262 4262 static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = { ··· 4268 4268 4269 4269 BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx) 4270 4270 { 4271 - return sock_gen_cookie(ctx->sk); 4271 + return __sock_gen_cookie(ctx->sk); 4272 4272 } 4273 4273 4274 4274 static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = { ··· 4281 4281 static u64 __bpf_get_netns_cookie(struct sock *sk) 4282 4282 { 4283 4283 #ifdef CONFIG_NET_NS 4284 - return net_gen_cookie(sk ? sk->sk_net.net : &init_net); 4284 + return __net_gen_cookie(sk ? sk->sk_net.net : &init_net); 4285 4285 #else 4286 4286 return 0; 4287 4287 #endif
+8 -4
net/core/net_namespace.c
··· 19 19 #include <linux/net_namespace.h> 20 20 #include <linux/sched/task.h> 21 21 #include <linux/uidgid.h> 22 + #include <linux/cookie.h> 22 23 23 24 #include <net/sock.h> 24 25 #include <net/netlink.h> ··· 70 69 71 70 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 72 71 73 - static atomic64_t cookie_gen; 72 + DEFINE_COOKIE(net_cookie); 74 73 75 - u64 net_gen_cookie(struct net *net) 74 + u64 __net_gen_cookie(struct net *net) 76 75 { 77 76 while (1) { 78 77 u64 res = atomic64_read(&net->net_cookie); 79 78 80 79 if (res) 81 80 return res; 82 - res = atomic64_inc_return(&cookie_gen); 81 + res = gen_cookie_next(&net_cookie); 83 82 atomic64_cmpxchg(&net->net_cookie, 0, res); 84 83 } 85 84 } ··· 1102 1101 panic("Could not allocate generic netns"); 1103 1102 1104 1103 rcu_assign_pointer(init_net.gen, ng); 1105 - net_gen_cookie(&init_net); 1104 + 1105 + preempt_disable(); 1106 + __net_gen_cookie(&init_net); 1107 + preempt_enable(); 1106 1108 1107 1109 down_write(&pernet_ops_rwsem); 1108 1110 if (setup_net(&init_net, &init_user_ns))
+5 -4
net/core/sock_diag.c
··· 11 11 #include <linux/tcp.h> 12 12 #include <linux/workqueue.h> 13 13 #include <linux/nospec.h> 14 - 14 + #include <linux/cookie.h> 15 15 #include <linux/inet_diag.h> 16 16 #include <linux/sock_diag.h> 17 17 ··· 19 19 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); 20 20 static DEFINE_MUTEX(sock_diag_table_mutex); 21 21 static struct workqueue_struct *broadcast_wq; 22 - static atomic64_t cookie_gen; 23 22 24 - u64 sock_gen_cookie(struct sock *sk) 23 + DEFINE_COOKIE(sock_cookie); 24 + 25 + u64 __sock_gen_cookie(struct sock *sk) 25 26 { 26 27 while (1) { 27 28 u64 res = atomic64_read(&sk->sk_cookie); 28 29 29 30 if (res) 30 31 return res; 31 - res = atomic64_inc_return(&cookie_gen); 32 + res = gen_cookie_next(&sock_cookie); 32 33 atomic64_cmpxchg(&sk->sk_cookie, 0, res); 33 34 } 34 35 }
+2 -2
net/core/sock_map.c
··· 401 401 if (!sk) 402 402 return ERR_PTR(-ENOENT); 403 403 404 - sock_gen_cookie(sk); 404 + __sock_gen_cookie(sk); 405 405 return &sk->sk_cookie; 406 406 } 407 407 ··· 1209 1209 if (!sk) 1210 1210 return ERR_PTR(-ENOENT); 1211 1211 1212 - sock_gen_cookie(sk); 1212 + __sock_gen_cookie(sk); 1213 1213 return &sk->sk_cookie; 1214 1214 } 1215 1215