Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

udp: correct reuseport selection with connected sockets

UDP reuseport groups can hold a mix unconnected and connected sockets.
Ensure that connections only receive all traffic to their 4-tuple.

Fast reuseport returns on the first reuseport match on the assumption
that all matches are equal. Only if connections are present, return to
the previous behavior of scoring all sockets.

Record if connections are present and if so (1) treat such connected
sockets as an independent match from the group, (2) only return
2-tuple matches from reuseport and (3) do not return on the first
2-tuple reuseport match to allow for a higher scoring match later.

New field has_conns is set without locks. No other fields in the
bitmap are modified at runtime and the field is only ever set
unconditionally, so an RMW cannot miss a change.

Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
Link: http://lkml.kernel.org/r/CA+FuTSfRP09aJNYRt04SS6qj22ViiOEWaWmLAwX0psk8-PGNxw@mail.gmail.com
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Craig Gallek <kraig@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Willem de Bruijn and committed by
David S. Miller
acdcecc6 05a82481

+42 -7
+19 -1
include/net/sock_reuseport.h
··· 21 21 unsigned int synq_overflow_ts; 22 22 /* ID stays the same even after the size of socks[] grows. */ 23 23 unsigned int reuseport_id; 24 - bool bind_inany; 24 + unsigned int bind_inany:1; 25 + unsigned int has_conns:1; 25 26 struct bpf_prog __rcu *prog; /* optional BPF sock selector */ 26 27 struct sock *socks[0]; /* array of sock pointers */ 27 28 }; ··· 37 36 int hdr_len); 38 37 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog); 39 38 extern int reuseport_detach_prog(struct sock *sk); 39 + 40 + static inline bool reuseport_has_conns(struct sock *sk, bool set) 41 + { 42 + struct sock_reuseport *reuse; 43 + bool ret = false; 44 + 45 + rcu_read_lock(); 46 + reuse = rcu_dereference(sk->sk_reuseport_cb); 47 + if (reuse) { 48 + if (set) 49 + reuse->has_conns = 1; 50 + ret = reuse->has_conns; 51 + } 52 + rcu_read_unlock(); 53 + 54 + return ret; 55 + } 40 56 41 57 int reuseport_get_id(struct sock_reuseport *reuse); 42 58
+13 -2
net/core/sock_reuseport.c
··· 295 295 296 296 select_by_hash: 297 297 /* no bpf or invalid bpf result: fall back to hash usage */ 298 - if (!sk2) 299 - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; 298 + if (!sk2) { 299 + int i, j; 300 + 301 + i = j = reciprocal_scale(hash, socks); 302 + while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { 303 + i++; 304 + if (i >= reuse->num_socks) 305 + i = 0; 306 + if (i == j) 307 + goto out; 308 + } 309 + sk2 = reuse->socks[i]; 310 + } 300 311 } 301 312 302 313 out:
+2
net/ipv4/datagram.c
··· 15 15 #include <net/sock.h> 16 16 #include <net/route.h> 17 17 #include <net/tcp_states.h> 18 + #include <net/sock_reuseport.h> 18 19 19 20 int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) 20 21 { ··· 70 69 } 71 70 inet->inet_daddr = fl4->daddr; 72 71 inet->inet_dport = usin->sin_port; 72 + reuseport_has_conns(sk, true); 73 73 sk->sk_state = TCP_ESTABLISHED; 74 74 sk_set_txhash(sk); 75 75 inet->inet_id = jiffies;
+3 -2
net/ipv4/udp.c
··· 423 423 score = compute_score(sk, net, saddr, sport, 424 424 daddr, hnum, dif, sdif); 425 425 if (score > badness) { 426 - if (sk->sk_reuseport) { 426 + if (sk->sk_reuseport && 427 + sk->sk_state != TCP_ESTABLISHED) { 427 428 hash = udp_ehashfn(net, daddr, hnum, 428 429 saddr, sport); 429 430 result = reuseport_select_sock(sk, hash, skb, 430 431 sizeof(struct udphdr)); 431 - if (result) 432 + if (result && !reuseport_has_conns(sk, false)) 432 433 return result; 433 434 } 434 435 badness = score;
+2
net/ipv6/datagram.c
··· 27 27 #include <net/ip6_route.h> 28 28 #include <net/tcp_states.h> 29 29 #include <net/dsfield.h> 30 + #include <net/sock_reuseport.h> 30 31 31 32 #include <linux/errqueue.h> 32 33 #include <linux/uaccess.h> ··· 255 254 goto out; 256 255 } 257 256 257 + reuseport_has_conns(sk, true); 258 258 sk->sk_state = TCP_ESTABLISHED; 259 259 sk_set_txhash(sk); 260 260 out:
+3 -2
net/ipv6/udp.c
··· 158 158 score = compute_score(sk, net, saddr, sport, 159 159 daddr, hnum, dif, sdif); 160 160 if (score > badness) { 161 - if (sk->sk_reuseport) { 161 + if (sk->sk_reuseport && 162 + sk->sk_state != TCP_ESTABLISHED) { 162 163 hash = udp6_ehashfn(net, daddr, hnum, 163 164 saddr, sport); 164 165 165 166 result = reuseport_select_sock(sk, hash, skb, 166 167 sizeof(struct udphdr)); 167 - if (result) 168 + if (result && !reuseport_has_conns(sk, false)) 168 169 return result; 169 170 } 170 171 result = sk;