Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[UDP]: Revert 2-pass hashing changes.

This reverts changesets:

6aaf47fa48d3c44280810b1b470261d340e4ed87
b7b5f487ab39bc10ed0694af35651a03d9cb97ff
de34ed91c4ffa4727964a832c46e624dd1495cf5
fc038410b4b1643766f8033f4940bcdb1dace633

There are still some correctness issues recently
discovered which do not have a known fix that doesn't
involve doing a full hash table scan on port bind.

So revert for now.

Signed-off-by: David S. Miller <davem@davemloft.net>

+78 -212
+1 -8
include/net/udp.h
··· 119 119 } 120 120 121 121 122 - struct udp_get_port_ops { 123 - int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2); 124 - int (*saddr_any)(const struct sock *sk); 125 - unsigned int (*hash_port_and_rcv_saddr)(__u16 port, 126 - const struct sock *sk); 127 - }; 128 - 129 122 /* net/ipv4/udp.c */ 130 123 extern int udp_get_port(struct sock *sk, unsigned short snum, 131 - const struct udp_get_port_ops *ops); 124 + int (*saddr_cmp)(const struct sock *, const struct sock *)); 132 125 extern void udp_err(struct sk_buff *, u32); 133 126 134 127 extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk,
+1 -1
include/net/udplite.h
··· 120 120 121 121 extern void udplite4_register(void); 122 122 extern int udplite_get_port(struct sock *sk, unsigned short snum, 123 - const struct udp_get_port_ops *ops); 123 + int (*scmp)(const struct sock *, const struct sock *)); 124 124 #endif /* _UDPLITE_H */
+68 -173
net/ipv4/udp.c
··· 114 114 115 115 static int udp_port_rover; 116 116 117 - /* 118 - * Note about this hash function : 119 - * Typical use is probably daddr = 0, only dport is going to vary hash 120 - */ 121 - static inline unsigned int udp_hash_port(__u16 port) 122 - { 123 - return port; 124 - } 125 - 126 - static inline int __udp_lib_port_inuse(unsigned int hash, int port, 127 - const struct sock *this_sk, 128 - struct hlist_head udptable[], 129 - const struct udp_get_port_ops *ops) 117 + static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) 130 118 { 131 119 struct sock *sk; 132 120 struct hlist_node *node; 133 - struct inet_sock *inet; 134 121 135 - sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { 136 - if (sk->sk_hash != hash) 137 - continue; 138 - inet = inet_sk(sk); 139 - if (inet->num != port) 140 - continue; 141 - if (this_sk) { 142 - if (ops->saddr_cmp(sk, this_sk)) 143 - return 1; 144 - } else if (ops->saddr_any(sk)) 122 + sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) 123 + if (sk->sk_hash == num) 145 124 return 1; 146 - } 147 125 return 0; 148 126 } 149 127 ··· 132 154 * @snum: port number to look up 133 155 * @udptable: hash list table, must be of UDP_HTABLE_SIZE 134 156 * @port_rover: pointer to record of last unallocated port 135 - * @ops: AF-dependent address operations 157 + * @saddr_comp: AF-dependent comparison of bound local IP addresses 136 158 */ 137 159 int __udp_lib_get_port(struct sock *sk, unsigned short snum, 138 160 struct hlist_head udptable[], int *port_rover, 139 - const struct udp_get_port_ops *ops) 161 + int (*saddr_comp)(const struct sock *sk1, 162 + const struct sock *sk2 ) ) 140 163 { 141 164 struct hlist_node *node; 142 165 struct hlist_head *head; 143 166 struct sock *sk2; 144 - unsigned int hash; 145 167 int error = 1; 146 168 147 169 write_lock_bh(&udp_hash_lock); ··· 156 178 for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { 157 179 int size; 158 180 159 - hash = ops->hash_port_and_rcv_saddr(result, sk); 160 - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; 181 + head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; 161 182 if (hlist_empty(head)) { 162 183 if (result > sysctl_local_port_range[1]) 163 184 result = sysctl_local_port_range[0] + ··· 181 204 result = sysctl_local_port_range[0] 182 205 + ((result - sysctl_local_port_range[0]) & 183 206 (UDP_HTABLE_SIZE - 1)); 184 - hash = udp_hash_port(result); 185 - if (__udp_lib_port_inuse(hash, result, 186 - NULL, udptable, ops)) 187 - continue; 188 - if (ops->saddr_any(sk)) 189 - break; 190 - 191 - hash = ops->hash_port_and_rcv_saddr(result, sk); 192 - if (! __udp_lib_port_inuse(hash, result, 193 - sk, udptable, ops)) 207 + if (! __udp_lib_lport_inuse(result, udptable)) 194 208 break; 195 209 } 196 210 if (i >= (1 << 16) / UDP_HTABLE_SIZE) ··· 189 221 gotit: 190 222 *port_rover = snum = result; 191 223 } else { 192 - hash = udp_hash_port(snum); 193 - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; 224 + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 194 225 195 226 sk_for_each(sk2, node, head) 196 - if (sk2->sk_hash == hash && 197 - sk2 != sk && 198 - inet_sk(sk2)->num == snum && 199 - (!sk2->sk_reuse || !sk->sk_reuse) && 200 - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || 201 - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 202 - ops->saddr_cmp(sk, sk2)) 227 + if (sk2->sk_hash == snum && 228 + sk2 != sk && 229 + (!sk2->sk_reuse || !sk->sk_reuse) && 230 + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if 231 + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && 232 + (*saddr_comp)(sk, sk2) ) 203 233 goto fail; 204 - 205 - if (!ops->saddr_any(sk)) { 206 - hash = ops->hash_port_and_rcv_saddr(snum, sk); 207 - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; 208 - 209 - sk_for_each(sk2, node, head) 210 - if (sk2->sk_hash == hash && 211 - sk2 != sk && 212 - inet_sk(sk2)->num == snum && 213 - (!sk2->sk_reuse || !sk->sk_reuse) && 214 - (!sk2->sk_bound_dev_if || 215 - !sk->sk_bound_dev_if || 216 - sk2->sk_bound_dev_if == 217 - sk->sk_bound_dev_if) && 218 - ops->saddr_cmp(sk, sk2)) 219 - goto fail; 220 - } 221 234 } 222 235 inet_sk(sk)->num = snum; 223 - sk->sk_hash = hash; 236 + sk->sk_hash = snum; 224 237 if (sk_unhashed(sk)) { 225 - head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; 238 + head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; 226 239 sk_add_node(sk, head); 227 240 sock_prot_inc_use(sk->sk_prot); 228 241 } ··· 214 265 } 215 266 216 267 int udp_get_port(struct sock *sk, unsigned short snum, 217 - const struct udp_get_port_ops *ops) 268 + int (*scmp)(const struct sock *, const struct sock *)) 218 269 { 219 - return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, ops); 270 + return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); 220 271 } 221 272 222 - static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 273 + int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 223 274 { 224 275 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 225 276 ··· 228 279 inet1->rcv_saddr == inet2->rcv_saddr )); 229 280 } 230 281 231 - static int ipv4_rcv_saddr_any(const struct sock *sk) 232 - { 233 - return !inet_sk(sk)->rcv_saddr; 234 - } 235 - 236 - static inline unsigned int ipv4_hash_port_and_addr(__u16 port, __be32 addr) 237 - { 238 - addr ^= addr >> 16; 239 - addr ^= addr >> 8; 240 - return port ^ addr; 241 - } 242 - 243 - static unsigned int ipv4_hash_port_and_rcv_saddr(__u16 port, 244 - const struct sock *sk) 245 - { 246 - return ipv4_hash_port_and_addr(port, inet_sk(sk)->rcv_saddr); 247 - } 248 - 249 - const struct udp_get_port_ops udp_ipv4_ops = { 250 - .saddr_cmp = ipv4_rcv_saddr_equal, 251 - .saddr_any = ipv4_rcv_saddr_any, 252 - .hash_port_and_rcv_saddr = ipv4_hash_port_and_rcv_saddr, 253 - }; 254 - 255 282 static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) 256 283 { 257 - return udp_get_port(sk, snum, &udp_ipv4_ops); 284 + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); 258 285 } 259 286 260 287 /* UDP is nearly always wildcards out the wazoo, it makes no sense to try ··· 242 317 { 243 318 struct sock *sk, *result = NULL; 244 319 struct hlist_node *node; 245 - unsigned int hash, hashwild; 246 - int score, best = -1, hport = ntohs(dport); 247 - 248 - hash = ipv4_hash_port_and_addr(hport, daddr); 249 - hashwild = udp_hash_port(hport); 320 + unsigned short hnum = ntohs(dport); 321 + int badness = -1; 250 322 251 323 read_lock(&udp_hash_lock); 252 - 253 - lookup: 254 - 255 - sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { 324 + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { 256 325 struct inet_sock *inet = inet_sk(sk); 257 326 258 - if (sk->sk_hash != hash || ipv6_only_sock(sk) || 259 - inet->num != hport) 260 - continue; 261 - 262 - score = (sk->sk_family == PF_INET ? 1 : 0); 263 - if (inet->rcv_saddr) { 264 - if (inet->rcv_saddr != daddr) 265 - continue; 266 - score+=2; 267 - } 268 - if (inet->daddr) { 269 - if (inet->daddr != saddr) 270 - continue; 271 - score+=2; 272 - } 273 - if (inet->dport) { 274 - if (inet->dport != sport) 275 - continue; 276 - score+=2; 277 - } 278 - if (sk->sk_bound_dev_if) { 279 - if (sk->sk_bound_dev_if != dif) 280 - continue; 281 - score+=2; 282 - } 283 - if (score == 9) { 284 - result = sk; 285 - goto found; 286 - } else if (score > best) { 287 - result = sk; 288 - best = score; 327 + if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { 328 + int score = (sk->sk_family == PF_INET ? 1 : 0); 329 + if (inet->rcv_saddr) { 330 + if (inet->rcv_saddr != daddr) 331 + continue; 332 + score+=2; 333 + } 334 + if (inet->daddr) { 335 + if (inet->daddr != saddr) 336 + continue; 337 + score+=2; 338 + } 339 + if (inet->dport) { 340 + if (inet->dport != sport) 341 + continue; 342 + score+=2; 343 + } 344 + if (sk->sk_bound_dev_if) { 345 + if (sk->sk_bound_dev_if != dif) 346 + continue; 347 + score+=2; 348 + } 349 + if (score == 9) { 350 + result = sk; 351 + break; 352 + } else if (score > badness) { 353 + result = sk; 354 + badness = score; 355 + } 289 356 } 290 357 } 291 - 292 - if (hash != hashwild) { 293 - hash = hashwild; 294 - goto lookup; 295 - } 296 - found: 297 358 if (result) 298 359 sock_hold(result); 299 360 read_unlock(&udp_hash_lock); 300 361 return result; 301 362 } 302 363 303 - static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum, 304 - int hport, __be32 loc_addr, 364 + static inline struct sock *udp_v4_mcast_next(struct sock *sk, 365 + __be16 loc_port, __be32 loc_addr, 305 366 __be16 rmt_port, __be32 rmt_addr, 306 367 int dif) 307 368 { 308 369 struct hlist_node *node; 309 370 struct sock *s = sk; 371 + unsigned short hnum = ntohs(loc_port); 310 372 311 373 sk_for_each_from(s, node) { 312 374 struct inet_sock *inet = inet_sk(s); 313 375 314 376 if (s->sk_hash != hnum || 315 - inet->num != hport || 316 377 (inet->daddr && inet->daddr != rmt_addr) || 317 378 (inet->dport != rmt_port && inet->dport) || 318 379 (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || ··· 1132 1221 __be32 saddr, __be32 daddr, 1133 1222 struct hlist_head udptable[]) 1134 1223 { 1135 - struct sock *sk, *skw, *sknext; 1224 + struct sock *sk; 1136 1225 int dif; 1137 - int hport = ntohs(uh->dest); 1138 - unsigned int hash = ipv4_hash_port_and_addr(hport, daddr); 1139 - unsigned int hashwild = udp_hash_port(hport); 1140 - 1141 - dif = skb->dev->ifindex; 1142 1226 1143 1227 read_lock(&udp_hash_lock); 1144 - 1145 - sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); 1146 - skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); 1147 - 1148 - sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif); 1149 - if (!sk) { 1150 - hash = hashwild; 1151 - sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source, 1152 - saddr, dif); 1153 - } 1228 + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); 1229 + dif = skb->dev->ifindex; 1230 + sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); 1154 1231 if (sk) { 1232 + struct sock *sknext = NULL; 1233 + 1155 1234 do { 1156 1235 struct sk_buff *skb1 = skb; 1157 - sknext = udp_v4_mcast_next(sk_next(sk), hash, hport, 1158 - daddr, uh->source, saddr, dif); 1159 - if (!sknext && hash != hashwild) { 1160 - hash = hashwild; 1161 - sknext = udp_v4_mcast_next(skw, hash, hport, 1162 - daddr, uh->source, saddr, dif); 1163 - } 1236 + 1237 + sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, 1238 + uh->source, saddr, dif); 1164 1239 if (sknext) 1165 1240 skb1 = skb_clone(skb, GFP_ATOMIC); 1166 1241 1167 1242 if (skb1) { 1168 1243 int ret = udp_queue_rcv_skb(sk, skb1); 1169 1244 if (ret > 0) 1170 - /* 1171 - * we should probably re-process 1172 - * instead of dropping packets here. 1173 - */ 1245 + /* we should probably re-process instead 1246 + * of dropping packets here. */ 1174 1247 kfree_skb(skb1); 1175 1248 } 1176 1249 sk = sknext; ··· 1241 1346 return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); 1242 1347 1243 1348 sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, 1244 - skb->dev->ifindex, udptable); 1349 + skb->dev->ifindex, udptable ); 1245 1350 1246 1351 if (sk != NULL) { 1247 1352 int ret = udp_queue_rcv_skb(sk, skb);
+3 -3
net/ipv4/udp_impl.h
··· 5 5 #include <net/protocol.h> 6 6 #include <net/inet_common.h> 7 7 8 - extern const struct udp_get_port_ops udp_ipv4_ops; 9 - 10 8 extern int __udp4_lib_rcv(struct sk_buff *, struct hlist_head [], int ); 11 9 extern void __udp4_lib_err(struct sk_buff *, u32, struct hlist_head []); 12 10 13 11 extern int __udp_lib_get_port(struct sock *sk, unsigned short snum, 14 12 struct hlist_head udptable[], int *port_rover, 15 - const struct udp_get_port_ops *ops); 13 + int (*)(const struct sock*,const struct sock*)); 14 + extern int ipv4_rcv_saddr_equal(const struct sock *, const struct sock *); 15 + 16 16 17 17 extern int udp_setsockopt(struct sock *sk, int level, int optname, 18 18 char __user *optval, int optlen);
+3 -4
net/ipv4/udplite.c
··· 19 19 static int udplite_port_rover; 20 20 21 21 int udplite_get_port(struct sock *sk, unsigned short p, 22 - const struct udp_get_port_ops *ops) 22 + int (*c)(const struct sock *, const struct sock *)) 23 23 { 24 - return __udp_lib_get_port(sk, p, udplite_hash, 25 - &udplite_port_rover, ops); 24 + return __udp_lib_get_port(sk, p, udplite_hash, &udplite_port_rover, c); 26 25 } 27 26 28 27 static int udplite_v4_get_port(struct sock *sk, unsigned short snum) 29 28 { 30 - return udplite_get_port(sk, snum, &udp_ipv4_ops); 29 + return udplite_get_port(sk, snum, ipv4_rcv_saddr_equal); 31 30 } 32 31 33 32 static int udplite_rcv(struct sk_buff *skb)
+1 -20
net/ipv6/udp.c
··· 52 52 53 53 DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; 54 54 55 - static int ipv6_rcv_saddr_any(const struct sock *sk) 56 - { 57 - struct ipv6_pinfo *np = inet6_sk(sk); 58 - 59 - return ipv6_addr_any(&np->rcv_saddr); 60 - } 61 - 62 - static unsigned int ipv6_hash_port_and_rcv_saddr(__u16 port, 63 - const struct sock *sk) 64 - { 65 - return port; 66 - } 67 - 68 - const struct udp_get_port_ops udp_ipv6_ops = { 69 - .saddr_cmp = ipv6_rcv_saddr_equal, 70 - .saddr_any = ipv6_rcv_saddr_any, 71 - .hash_port_and_rcv_saddr = ipv6_hash_port_and_rcv_saddr, 72 - }; 73 - 74 55 static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) 75 56 { 76 - return udp_get_port(sk, snum, &udp_ipv6_ops); 57 + return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); 77 58 } 78 59 79 60 static struct sock *__udp6_lib_lookup(struct in6_addr *saddr, __be16 sport,
-2
net/ipv6/udp_impl.h
··· 6 6 #include <net/addrconf.h> 7 7 #include <net/inet_common.h> 8 8 9 - extern const struct udp_get_port_ops udp_ipv6_ops; 10 - 11 9 extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); 12 10 extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, 13 11 int , int , int , __be32 , struct hlist_head []);
+1 -1
net/ipv6/udplite.c
··· 37 37 38 38 static int udplite_v6_get_port(struct sock *sk, unsigned short snum) 39 39 { 40 - return udplite_get_port(sk, snum, &udp_ipv6_ops); 40 + return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); 41 41 } 42 42 43 43 struct proto udplitev6_prot = {