Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

udp: add rehash on connect()

commit 30fff923 introduced in linux-2.6.33 (udp: bind() optimisation)
added a secondary hash on UDP, hashed on (local addr, local port).

Problem is that following sequence :

fd = socket(...)
connect(fd, &remote, ...)

not only selects remote end point (address and port), but also sets
local address, while UDP stack stored in secondary hash table the socket
while its local address was INADDR_ANY (or ipv6 equivalent)

Sequence is :
- autobind() : choose a random local port, insert socket in hash tables
[while local address is INADDR_ANY]
- connect() : set remote address and port, change local address to IP
given by a route lookup.

When an incoming UDP frame comes, if more than 10 sockets are found in
primary hash table, we switch to secondary table, and fail to find
socket because its local address changed.

One solution to this problem is to rehash datagram socket if needed.

We add a new rehash(struct socket *) method in "struct proto", and
implement this method for UDP v4 & v6, using a common helper.

This rehashing only takes care of secondary hash table, since primary
hash (based on local port only) is not changed.

Reported-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
719f8358 ae2688d5

+66 -2
+1
include/net/sock.h
··· 752 /* Keeping track of sk's, looking them up, and port selection methods. */ 753 void (*hash)(struct sock *sk); 754 void (*unhash)(struct sock *sk); 755 int (*get_port)(struct sock *sk, unsigned short snum); 756 757 /* Keeping track of sockets in use */
··· 752 /* Keeping track of sk's, looking them up, and port selection methods. */ 753 void (*hash)(struct sock *sk); 754 void (*unhash)(struct sock *sk); 755 + void (*rehash)(struct sock *sk); 756 int (*get_port)(struct sock *sk, unsigned short snum); 757 758 /* Keeping track of sockets in use */
+1
include/net/udp.h
··· 151 } 152 153 extern void udp_lib_unhash(struct sock *sk); 154 155 static inline void udp_lib_close(struct sock *sk, long timeout) 156 {
··· 151 } 152 153 extern void udp_lib_unhash(struct sock *sk); 154 + extern void udp_lib_rehash(struct sock *sk, u16 new_hash); 155 156 static inline void udp_lib_close(struct sock *sk, long timeout) 157 {
+4 -1
net/ipv4/datagram.c
··· 62 } 63 if (!inet->inet_saddr) 64 inet->inet_saddr = rt->rt_src; /* Update source address */ 65 - if (!inet->inet_rcv_saddr) 66 inet->inet_rcv_saddr = rt->rt_src; 67 inet->inet_daddr = rt->rt_dst; 68 inet->inet_dport = usin->sin_port; 69 sk->sk_state = TCP_ESTABLISHED;
··· 62 } 63 if (!inet->inet_saddr) 64 inet->inet_saddr = rt->rt_src; /* Update source address */ 65 + if (!inet->inet_rcv_saddr) { 66 inet->inet_rcv_saddr = rt->rt_src; 67 + if (sk->sk_prot->rehash) 68 + sk->sk_prot->rehash(sk); 69 + } 70 inet->inet_daddr = rt->rt_dst; 71 inet->inet_dport = usin->sin_port; 72 sk->sk_state = TCP_ESTABLISHED;
+44
net/ipv4/udp.c
··· 1260 } 1261 EXPORT_SYMBOL(udp_lib_unhash); 1262 1263 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1264 { 1265 int rc; ··· 1886 .backlog_rcv = __udp_queue_rcv_skb, 1887 .hash = udp_lib_hash, 1888 .unhash = udp_lib_unhash, 1889 .get_port = udp_v4_get_port, 1890 .memory_allocated = &udp_memory_allocated, 1891 .sysctl_mem = sysctl_udp_mem,
··· 1260 } 1261 EXPORT_SYMBOL(udp_lib_unhash); 1262 1263 + /* 1264 + * inet_rcv_saddr was changed, we must rehash secondary hash 1265 + */ 1266 + void udp_lib_rehash(struct sock *sk, u16 newhash) 1267 + { 1268 + if (sk_hashed(sk)) { 1269 + struct udp_table *udptable = sk->sk_prot->h.udp_table; 1270 + struct udp_hslot *hslot, *hslot2, *nhslot2; 1271 + 1272 + hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); 1273 + nhslot2 = udp_hashslot2(udptable, newhash); 1274 + udp_sk(sk)->udp_portaddr_hash = newhash; 1275 + if (hslot2 != nhslot2) { 1276 + hslot = udp_hashslot(udptable, sock_net(sk), 1277 + udp_sk(sk)->udp_port_hash); 1278 + /* we must lock primary chain too */ 1279 + spin_lock_bh(&hslot->lock); 1280 + 1281 + spin_lock(&hslot2->lock); 1282 + hlist_nulls_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); 1283 + hslot2->count--; 1284 + spin_unlock(&hslot2->lock); 1285 + 1286 + spin_lock(&nhslot2->lock); 1287 + hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node, 1288 + &nhslot2->head); 1289 + nhslot2->count++; 1290 + spin_unlock(&nhslot2->lock); 1291 + 1292 + spin_unlock_bh(&hslot->lock); 1293 + } 1294 + } 1295 + } 1296 + EXPORT_SYMBOL(udp_lib_rehash); 1297 + 1298 + static void udp_v4_rehash(struct sock *sk) 1299 + { 1300 + u16 new_hash = udp4_portaddr_hash(sock_net(sk), 1301 + inet_sk(sk)->inet_rcv_saddr, 1302 + inet_sk(sk)->inet_num); 1303 + udp_lib_rehash(sk, new_hash); 1304 + } 1305 + 1306 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) 1307 { 1308 int rc; ··· 1843 .backlog_rcv = __udp_queue_rcv_skb, 1844 .hash = udp_lib_hash, 1845 .unhash = udp_lib_unhash, 1846 + .rehash = udp_v4_rehash, 1847 .get_port = udp_v4_get_port, 1848 .memory_allocated = &udp_memory_allocated, 1849 .sysctl_mem = sysctl_udp_mem,
+6 -1
net/ipv6/datagram.c
··· 105 if (ipv6_addr_any(&np->saddr)) 106 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); 107 108 - if (ipv6_addr_any(&np->rcv_saddr)) 109 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, 110 &np->rcv_saddr); 111 112 goto out; 113 } ··· 184 if (ipv6_addr_any(&np->rcv_saddr)) { 185 ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); 186 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 187 } 188 189 ip6_dst_store(sk, dst,
··· 105 if (ipv6_addr_any(&np->saddr)) 106 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); 107 108 + if (ipv6_addr_any(&np->rcv_saddr)) { 109 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, 110 &np->rcv_saddr); 111 + if (sk->sk_prot->rehash) 112 + sk->sk_prot->rehash(sk); 113 + } 114 115 goto out; 116 } ··· 181 if (ipv6_addr_any(&np->rcv_saddr)) { 182 ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src); 183 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 184 + if (sk->sk_prot->rehash) 185 + sk->sk_prot->rehash(sk); 186 } 187 188 ip6_dst_store(sk, dst,
+10
net/ipv6/udp.c
··· 111 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); 112 } 113 114 static inline int compute_score(struct sock *sk, struct net *net, 115 unsigned short hnum, 116 struct in6_addr *saddr, __be16 sport, ··· 1456 .backlog_rcv = udpv6_queue_rcv_skb, 1457 .hash = udp_lib_hash, 1458 .unhash = udp_lib_unhash, 1459 .get_port = udp_v6_get_port, 1460 .memory_allocated = &udp_memory_allocated, 1461 .sysctl_mem = sysctl_udp_mem,
··· 111 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); 112 } 113 114 + static void udp_v6_rehash(struct sock *sk) 115 + { 116 + u16 new_hash = udp6_portaddr_hash(sock_net(sk), 117 + &inet6_sk(sk)->rcv_saddr, 118 + inet_sk(sk)->inet_num); 119 + 120 + udp_lib_rehash(sk, new_hash); 121 + } 122 + 123 static inline int compute_score(struct sock *sk, struct net *net, 124 unsigned short hnum, 125 struct in6_addr *saddr, __be16 sport, ··· 1447 .backlog_rcv = udpv6_queue_rcv_skb, 1448 .hash = udp_lib_hash, 1449 .unhash = udp_lib_unhash, 1450 + .rehash = udp_v6_rehash, 1451 .get_port = udp_v6_get_port, 1452 .memory_allocated = &udp_memory_allocated, 1453 .sysctl_mem = sysctl_udp_mem,