Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

udp: optimize bind(0) if many ports are in use

commit 9088c5609584684149f3fb5b065aa7f18dcb03ff
(udp: Improve port randomization) introduced a regression for UDP bind() syscall
to null port (getting a random port) in case lot of ports are already in use.

This is because we do about 28000 scans of very long chains (220 sockets per chain),
with many spin_lock_bh()/spin_unlock_bh() calls.

Fix this using a bitmap (64 bytes for current value of UDP_HTABLE_SIZE)
so that we scan chains at most once.

Instead of 250 ms per bind() call, we get after patch a time of 2.9 ms

Based on a report from Vitaly Mayatskikh

Reported-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Tested-by: Vitaly Mayatskikh <v.mayatskih@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
98322f22 8527bec5

+39 -16
+39 -16
net/ipv4/udp.c
··· 120 120 atomic_t udp_memory_allocated; 121 121 EXPORT_SYMBOL(udp_memory_allocated); 122 122 123 + #define PORTS_PER_CHAIN (65536 / UDP_HTABLE_SIZE) 124 + 123 125 static int udp_lib_lport_inuse(struct net *net, __u16 num, 124 126 const struct udp_hslot *hslot, 127 + unsigned long *bitmap, 125 128 struct sock *sk, 126 129 int (*saddr_comp)(const struct sock *sk1, 127 130 const struct sock *sk2)) ··· 135 132 sk_nulls_for_each(sk2, node, &hslot->head) 136 133 if (net_eq(sock_net(sk2), net) && 137 134 sk2 != sk && 138 - sk2->sk_hash == num && 135 + (bitmap || sk2->sk_hash == num) && 139 136 (!sk2->sk_reuse || !sk->sk_reuse) && 140 137 (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 141 138 || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 142 - (*saddr_comp)(sk, sk2)) 143 - return 1; 139 + (*saddr_comp)(sk, sk2)) { 140 + if (bitmap) 141 + __set_bit(sk2->sk_hash / UDP_HTABLE_SIZE, 142 + bitmap); 143 + else 144 + return 1; 145 + } 144 146 return 0; 145 147 } 146 148 ··· 168 160 if (!snum) { 169 161 int low, high, remaining; 170 162 unsigned rand; 171 - unsigned short first; 163 + unsigned short first, last; 164 + DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); 172 165 173 166 inet_get_local_port_range(&low, &high); 174 167 remaining = (high - low) + 1; 175 168 176 169 rand = net_random(); 177 - snum = first = rand % remaining + low; 178 - rand |= 1; 179 - for (;;) { 180 - hslot = &udptable->hash[udp_hashfn(net, snum)]; 170 + first = (((u64)rand * remaining) >> 32) + low; 171 + /* 172 + * force rand to be an odd multiple of UDP_HTABLE_SIZE 173 + */ 174 + rand = (rand | 1) * UDP_HTABLE_SIZE; 175 + for (last = first + UDP_HTABLE_SIZE; first != last; first++) { 176 + hslot = &udptable->hash[udp_hashfn(net, first)]; 177 + bitmap_zero(bitmap, PORTS_PER_CHAIN); 181 178 spin_lock_bh(&hslot->lock); 182 - if (!udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) 183 - break; 184 - spin_unlock_bh(&hslot->lock); 179 + udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, 180 + saddr_comp); 181 + 182 + snum = first; 183 + /* 184 + * Iterate on all possible values of snum for this hash. 185 + * Using steps of an odd multiple of UDP_HTABLE_SIZE 186 + * give us randomization and full range coverage. 187 + */ 185 188 do { 186 - snum = snum + rand; 187 - } while (snum < low || snum > high); 188 - if (snum == first) 189 - goto fail; 189 + if (low <= snum && snum <= high && 190 + !test_bit(snum / UDP_HTABLE_SIZE, bitmap)) 191 + goto found; 192 + snum += rand; 193 + } while (snum != first); 194 + spin_unlock_bh(&hslot->lock); 190 195 } 196 + goto fail; 191 197 } else { 192 198 hslot = &udptable->hash[udp_hashfn(net, snum)]; 193 199 spin_lock_bh(&hslot->lock); 194 - if (udp_lib_lport_inuse(net, snum, hslot, sk, saddr_comp)) 200 + if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp)) 195 201 goto fail_unlock; 196 202 } 203 + found: 197 204 inet_sk(sk)->num = snum; 198 205 sk->sk_hash = snum; 199 206 if (sk_unhashed(sk)) {