Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

udp: bind() optimisation

UDP bind() can be O(N^2) in some pathological cases.

Thanks to secondary hash tables, we can make it O(N)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
30fff923 0ab365f4

+80 -16
+6
include/linux/udp.h
··· 88 88 return (struct udp_sock *)sk; 89 89 } 90 90 91 + #define udp_portaddr_for_each_entry(__sk, node, list) \ 92 + hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node) 93 + 94 + #define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ 95 + hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) 96 + 91 97 #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) 92 98 93 99 #endif
+2 -1
include/net/udp.h
··· 158 158 } 159 159 160 160 extern int udp_lib_get_port(struct sock *sk, unsigned short snum, 161 - int (*)(const struct sock*,const struct sock*)); 161 + int (*)(const struct sock *,const struct sock *), 162 + unsigned int hash2_nulladdr); 162 163 163 164 /* net/ipv4/udp.c */ 164 165 extern int udp_get_port(struct sock *sk, unsigned short snum,
+65 -8
net/ipv4/udp.c
··· 152 152 return 0; 153 153 } 154 154 155 + /* 156 + * Note: we still hold spinlock of primary hash chain, so no other writer 157 + * can insert/delete a socket with local_port == num 158 + */ 159 + static int udp_lib_lport_inuse2(struct net *net, __u16 num, 160 + struct udp_hslot *hslot2, 161 + struct sock *sk, 162 + int (*saddr_comp)(const struct sock *sk1, 163 + const struct sock *sk2)) 164 + { 165 + struct sock *sk2; 166 + struct hlist_nulls_node *node; 167 + int res = 0; 168 + 169 + spin_lock(&hslot2->lock); 170 + udp_portaddr_for_each_entry(sk2, node, &hslot2->head) 171 + if (net_eq(sock_net(sk2), net) && 172 + sk2 != sk && 173 + (udp_sk(sk2)->udp_port_hash == num) && 174 + (!sk2->sk_reuse || !sk->sk_reuse) && 175 + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 176 + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 177 + (*saddr_comp)(sk, sk2)) { 178 + res = 1; 179 + break; 180 + } 181 + spin_unlock(&hslot2->lock); 182 + return res; 183 + } 184 + 155 185 /** 156 186 * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 157 187 * 158 188 * @sk: socket struct in question 159 189 * @snum: port number to look up 160 190 * @saddr_comp: AF-dependent comparison of bound local IP addresses 191 + * @hash2_nulladdr: AF-dependant hash value in secondary hash chains, 192 + * with NULL address 161 193 */ 162 194 int udp_lib_get_port(struct sock *sk, unsigned short snum, 163 195 int (*saddr_comp)(const struct sock *sk1, 164 - const struct sock *sk2)) 196 + const struct sock *sk2), 197 + unsigned int hash2_nulladdr) 165 198 { 166 199 struct udp_hslot *hslot, *hslot2; 167 200 struct udp_table *udptable = sk->sk_prot->h.udp_table; ··· 243 210 } else { 244 211 hslot = udp_hashslot(udptable, net, snum); 245 212 spin_lock_bh(&hslot->lock); 213 + if (hslot->count > 10) { 214 + int exist; 215 + unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum; 216 + 217 + slot2 &= udptable->mask; 218 + hash2_nulladdr &= udptable->mask; 219 + 220 + hslot2 = udp_hashslot2(udptable, slot2); 221 + if (hslot->count < hslot2->count) 222 + goto scan_primary_hash; 223 + 224 + exist = udp_lib_lport_inuse2(net, snum, hslot2, 225 + sk, saddr_comp); 226 + if (!exist && (hash2_nulladdr != slot2)) { 227 + hslot2 = udp_hashslot2(udptable, hash2_nulladdr); 228 + exist = udp_lib_lport_inuse2(net, snum, hslot2, 229 + sk, saddr_comp); 230 + } 231 + if (exist) 232 + goto fail_unlock; 233 + else 234 + goto found; 235 + } 236 + scan_primary_hash: 246 237 if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, 247 238 saddr_comp, 0)) 248 239 goto fail_unlock; ··· 312 255 313 256 int udp_v4_get_port(struct sock *sk, unsigned short snum) 314 257 { 258 + unsigned int hash2_nulladdr = 259 + udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum); 260 + unsigned int hash2_partial = 261 + udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); 262 + 315 263 /* precompute partial secondary hash */ 316 - udp_sk(sk)->udp_portaddr_hash = 317 - udp4_portaddr_hash(sock_net(sk), 318 - inet_sk(sk)->inet_rcv_saddr, 319 - 0); 320 - return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal); 264 + udp_sk(sk)->udp_portaddr_hash = hash2_partial; 265 + return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); 321 266 } 322 267 323 268 static inline int compute_score(struct sock *sk, struct net *net, __be32 saddr, ··· 395 336 return score; 396 337 } 397 338 398 - #define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ 399 - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) 400 339 401 340 /* called with read_rcu_lock() */ 402 341 static struct sock *udp4_lib_lookup2(struct net *net,
+7 -7
net/ipv6/udp.c
··· 100 100 101 101 int udp_v6_get_port(struct sock *sk, unsigned short snum) 102 102 { 103 + unsigned int hash2_nulladdr = 104 + udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum); 105 + unsigned int hash2_partial = 106 + udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); 107 + 103 108 /* precompute partial secondary hash */ 104 - udp_sk(sk)->udp_portaddr_hash = 105 - udp6_portaddr_hash(sock_net(sk), 106 - &inet6_sk(sk)->rcv_saddr, 107 - 0); 108 - return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); 109 + udp_sk(sk)->udp_portaddr_hash = hash2_partial; 110 + return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal, hash2_nulladdr); 109 111 } 110 112 111 113 static inline int compute_score(struct sock *sk, struct net *net, ··· 183 181 return score; 184 182 } 185 183 186 - #define udp_portaddr_for_each_entry_rcu(__sk, node, list) \ 187 - hlist_nulls_for_each_entry_rcu(__sk, node, list, __sk_common.skc_portaddr_node) 188 184 189 185 /* called with read_rcu_lock() */ 190 186 static struct sock *udp6_lib_lookup2(struct net *net,