Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tcp/dccp: remove twchain

TCP listener refactoring, part 3 :

Our goal is to hash SYN_RECV sockets into main ehash for fast lookup,
and parallel SYN processing.

Current inet_ehash_bucket contains two chains, one for ESTABLISH (and
friend states) sockets, another for TIME_WAIT sockets only.

As the hash table is sized to get at most one socket per bucket, it
makes little sense to have separate twchain, as it makes the lookup
slightly more complicated, and doubles hash table memory usage.

If we make sure all socket types have the lookup keys at the same
offsets, we can use a generic and faster lookup. It turns out TIME_WAIT
and ESTABLISHED sockets already have common lookup fields for IPv4.

[ INET_TW_MATCH() is no longer needed ]

I'll provide a follow-up to factorize IPv6 lookup as well, to remove
INET6_TW_MATCH()

This way, SYN_RECV pseudo sockets will be supported the same.

A new sock_gen_put() helper is added, doing either a sock_put() or
inet_twsk_put() [ and will support SYN_RECV later ].

Note this helper should only be called in real slow path, when rcu
lookup found a socket that was moved to another identity (freed/reused
immediately), but could eventually be used in other contexts, like
sock_edemux()

Before patch :

dmesg | grep "TCP established"

TCP established hash table entries: 524288 (order: 11, 8388608 bytes)

After patch :

TCP established hash table entries: 524288 (order: 10, 4194304 bytes)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
05dbc7b5 53af53ae

+133 -262
+2 -7
include/net/inet_hashtables.h
··· 37 37 #include <asm/byteorder.h> 38 38 39 39 /* This is for all connections with a full identity, no wildcards. 40 - * One chain is dedicated to TIME_WAIT sockets. 41 - * I'll experiment with dynamic table growth later. 40 + * The 'e' prefix stands for Establish, but we really put all sockets 41 + * but LISTEN ones. 42 42 */ 43 43 struct inet_ehash_bucket { 44 44 struct hlist_nulls_head chain; 45 - struct hlist_nulls_head twchain; 46 45 }; 47 46 48 47 /* There are a few simple rules, which allow for local port reuse by ··· 122 123 * 123 124 * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE 124 125 * 125 - * TIME_WAIT sockets use a separate chain (twchain). 126 126 */ 127 127 struct inet_ehash_bucket *ehash; 128 128 spinlock_t *ehash_locks; ··· 315 317 ((__sk)->sk_bound_dev_if == (__dif))) && \ 316 318 net_eq(sock_net(__sk), (__net))) 317 319 #endif /* 64-bit arch */ 318 - 319 - #define INET_TW_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif)\ 320 - INET_MATCH(__sk, __net, __cookie, __saddr, __daddr, __ports, __dif) 321 320 322 321 /* 323 322 * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
+1 -12
include/net/inet_timewait_sock.h
··· 141 141 }; 142 142 #define tw_tclass tw_tos 143 143 144 - static inline void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, 145 - struct hlist_nulls_head *list) 146 - { 147 - hlist_nulls_add_head_rcu(&tw->tw_node, list); 148 - } 149 - 150 - static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, 151 - struct hlist_head *list) 152 - { 153 - hlist_add_head(&tw->tw_bind_node, list); 154 - } 155 - 156 144 static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) 157 145 { 158 146 return !hlist_unhashed(&tw->tw_death_node); ··· 180 192 return (struct inet_timewait_sock *)sk; 181 193 } 182 194 195 + void inet_twsk_free(struct inet_timewait_sock *tw); 183 196 void inet_twsk_put(struct inet_timewait_sock *tw); 184 197 185 198 int inet_twsk_unhash(struct inet_timewait_sock *tw);
+7 -1
include/net/sock.h
··· 156 156 */ 157 157 struct sock_common { 158 158 /* skc_daddr and skc_rcv_saddr must be grouped on a 8 bytes aligned 159 - * address on 64bit arches : cf INET_MATCH() and INET_TW_MATCH() 159 + * address on 64bit arches : cf INET_MATCH() 160 160 */ 161 161 union { 162 162 __addrpair skc_addrpair; ··· 301 301 #define sk_dontcopy_end __sk_common.skc_dontcopy_end 302 302 #define sk_hash __sk_common.skc_hash 303 303 #define sk_portpair __sk_common.skc_portpair 304 + #define sk_num __sk_common.skc_num 305 + #define sk_dport __sk_common.skc_dport 304 306 #define sk_addrpair __sk_common.skc_addrpair 305 307 #define sk_daddr __sk_common.skc_daddr 306 308 #define sk_rcv_saddr __sk_common.skc_rcv_saddr ··· 1655 1653 if (atomic_dec_and_test(&sk->sk_refcnt)) 1656 1654 sk_free(sk); 1657 1655 } 1656 + /* Generic version of sock_put(), dealing with all sockets 1657 + * (TCP_TIMEWAIT, ESTABLISHED...) 1658 + */ 1659 + void sock_gen_put(struct sock *sk); 1658 1660 1659 1661 int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested); 1660 1662
-1
include/net/tcp.h
··· 1519 1519 TCP_SEQ_STATE_LISTENING, 1520 1520 TCP_SEQ_STATE_OPENREQ, 1521 1521 TCP_SEQ_STATE_ESTABLISHED, 1522 - TCP_SEQ_STATE_TIME_WAIT, 1523 1522 }; 1524 1523 1525 1524 int tcp_seq_open(struct inode *inode, struct file *file);
+1 -3
net/dccp/proto.c
··· 1158 1158 goto out_free_bind_bucket_cachep; 1159 1159 } 1160 1160 1161 - for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) { 1161 + for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) 1162 1162 INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); 1163 - INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].twchain, i); 1164 - } 1165 1163 1166 1164 if (inet_ehash_locks_alloc(&dccp_hashinfo)) 1167 1165 goto out_free_dccp_ehash;
+13 -35
net/ipv4/inet_diag.c
··· 635 635 cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); 636 636 } 637 637 638 - static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, 638 + static int inet_twsk_diag_dump(struct sock *sk, 639 639 struct sk_buff *skb, 640 640 struct netlink_callback *cb, 641 641 struct inet_diag_req_v2 *r, 642 642 const struct nlattr *bc) 643 643 { 644 + struct inet_timewait_sock *tw = inet_twsk(sk); 645 + 644 646 if (bc != NULL) { 645 647 struct inet_diag_entry entry; 646 648 ··· 913 911 914 912 num = 0; 915 913 916 - if (hlist_nulls_empty(&head->chain) && 917 - hlist_nulls_empty(&head->twchain)) 914 + if (hlist_nulls_empty(&head->chain)) 918 915 continue; 919 916 920 917 if (i > s_i) ··· 921 920 922 921 spin_lock_bh(lock); 923 922 sk_nulls_for_each(sk, node, &head->chain) { 924 - struct inet_sock *inet = inet_sk(sk); 923 + int res; 925 924 926 925 if (!net_eq(sock_net(sk), net)) 927 926 continue; ··· 930 929 if (!(r->idiag_states & (1 << sk->sk_state))) 931 930 goto next_normal; 932 931 if (r->sdiag_family != AF_UNSPEC && 933 - sk->sk_family != r->sdiag_family) 932 + sk->sk_family != r->sdiag_family) 934 933 goto next_normal; 935 - if (r->id.idiag_sport != inet->inet_sport && 934 + if (r->id.idiag_sport != htons(sk->sk_num) && 936 935 r->id.idiag_sport) 937 936 goto next_normal; 938 - if (r->id.idiag_dport != inet->inet_dport && 937 + if (r->id.idiag_dport != sk->sk_dport && 939 938 r->id.idiag_dport) 940 939 goto next_normal; 941 - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { 940 + if (sk->sk_state == TCP_TIME_WAIT) 941 + res = inet_twsk_diag_dump(sk, skb, cb, r, bc); 942 + else 943 + res = inet_csk_diag_dump(sk, skb, cb, r, bc); 944 + if (res < 0) { 942 945 spin_unlock_bh(lock); 943 946 goto done; 944 947 } ··· 950 945 ++num; 951 946 } 952 947 953 - if (r->idiag_states & TCPF_TIME_WAIT) { 954 - struct inet_timewait_sock *tw; 955 - 956 - inet_twsk_for_each(tw, node, 957 - &head->twchain) { 958 - if (!net_eq(twsk_net(tw), net)) 959 - continue; 960 - 961 - if (num < s_num) 962 - goto next_dying; 963 - if (r->sdiag_family != AF_UNSPEC && 964 - tw->tw_family != r->sdiag_family) 965 - goto next_dying; 966 - if (r->id.idiag_sport != tw->tw_sport && 967 - r->id.idiag_sport) 968 - goto next_dying; 969 - if (r->id.idiag_dport != tw->tw_dport && 970 - r->id.idiag_dport) 971 - goto next_dying; 972 - if (inet_twsk_diag_dump(tw, skb, cb, r, bc) < 0) { 973 - spin_unlock_bh(lock); 974 - goto done; 975 - } 976 - next_dying: 977 - ++num; 978 - } 979 - } 980 948 spin_unlock_bh(lock); 981 949 } 982 950
+29 -54
net/ipv4/inet_hashtables.c
··· 230 230 } 231 231 EXPORT_SYMBOL_GPL(__inet_lookup_listener); 232 232 233 + /* All sockets share common refcount, but have different destructors */ 234 + void sock_gen_put(struct sock *sk) 235 + { 236 + if (!atomic_dec_and_test(&sk->sk_refcnt)) 237 + return; 238 + 239 + if (sk->sk_state == TCP_TIME_WAIT) 240 + inet_twsk_free(inet_twsk(sk)); 241 + else 242 + sk_free(sk); 243 + } 244 + EXPORT_SYMBOL_GPL(sock_gen_put); 245 + 233 246 struct sock *__inet_lookup_established(struct net *net, 234 247 struct inet_hashinfo *hashinfo, 235 248 const __be32 saddr, const __be16 sport, ··· 268 255 if (likely(INET_MATCH(sk, net, acookie, 269 256 saddr, daddr, ports, dif))) { 270 257 if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 271 - goto begintw; 258 + goto out; 272 259 if (unlikely(!INET_MATCH(sk, net, acookie, 273 260 saddr, daddr, ports, dif))) { 274 - sock_put(sk); 261 + sock_gen_put(sk); 275 262 goto begin; 276 263 } 277 - goto out; 264 + goto found; 278 265 } 279 266 } 280 267 /* ··· 284 271 */ 285 272 if (get_nulls_value(node) != slot) 286 273 goto begin; 287 - 288 - begintw: 289 - /* Must check for a TIME_WAIT'er before going to listener hash. */ 290 - sk_nulls_for_each_rcu(sk, node, &head->twchain) { 291 - if (sk->sk_hash != hash) 292 - continue; 293 - if (likely(INET_TW_MATCH(sk, net, acookie, 294 - saddr, daddr, ports, 295 - dif))) { 296 - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { 297 - sk = NULL; 298 - goto out; 299 - } 300 - if (unlikely(!INET_TW_MATCH(sk, net, acookie, 301 - saddr, daddr, ports, 302 - dif))) { 303 - inet_twsk_put(inet_twsk(sk)); 304 - goto begintw; 305 - } 306 - goto out; 307 - } 308 - } 309 - /* 310 - * if the nulls value we got at the end of this lookup is 311 - * not the expected one, we must restart lookup. 312 - * We probably met an item that was moved to another chain. 313 - */ 314 - if (get_nulls_value(node) != slot) 315 - goto begintw; 316 - sk = NULL; 317 274 out: 275 + sk = NULL; 276 + found: 318 277 rcu_read_unlock(); 319 278 return sk; 320 279 } ··· 311 326 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 312 327 struct sock *sk2; 313 328 const struct hlist_nulls_node *node; 314 - struct inet_timewait_sock *tw; 329 + struct inet_timewait_sock *tw = NULL; 315 330 int twrefcnt = 0; 316 331 317 332 spin_lock(lock); 318 333 319 - /* Check TIME-WAIT sockets first. */ 320 - sk_nulls_for_each(sk2, node, &head->twchain) { 321 - if (sk2->sk_hash != hash) 322 - continue; 323 - 324 - if (likely(INET_TW_MATCH(sk2, net, acookie, 325 - saddr, daddr, ports, dif))) { 326 - tw = inet_twsk(sk2); 327 - if (twsk_unique(sk, sk2, twp)) 328 - goto unique; 329 - else 330 - goto not_unique; 331 - } 332 - } 333 - tw = NULL; 334 - 335 - /* And established part... */ 336 334 sk_nulls_for_each(sk2, node, &head->chain) { 337 335 if (sk2->sk_hash != hash) 338 336 continue; 337 + 339 338 if (likely(INET_MATCH(sk2, net, acookie, 340 - saddr, daddr, ports, dif))) 339 + saddr, daddr, ports, dif))) { 340 + if (sk2->sk_state == TCP_TIME_WAIT) { 341 + tw = inet_twsk(sk2); 342 + if (twsk_unique(sk, sk2, twp)) 343 + break; 344 + } 341 345 goto not_unique; 346 + } 342 347 } 343 348 344 - unique: 345 349 /* Must record num and sport now. Otherwise we will see 346 - * in hash table socket with a funny identity. */ 350 + * in hash table socket with a funny identity. 351 + */ 347 352 inet->inet_num = lport; 348 353 inet->inet_sport = htons(lport); 349 354 sk->sk_hash = hash;
+28 -27
net/ipv4/inet_timewait_sock.c
··· 87 87 refcnt += inet_twsk_bind_unhash(tw, hashinfo); 88 88 spin_unlock(&bhead->lock); 89 89 90 - #ifdef SOCK_REFCNT_DEBUG 91 - if (atomic_read(&tw->tw_refcnt) != 1) { 92 - pr_debug("%s timewait_sock %p refcnt=%d\n", 93 - tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); 94 - } 95 - #endif 96 - while (refcnt) { 97 - inet_twsk_put(tw); 98 - refcnt--; 99 - } 90 + BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); 91 + atomic_sub(refcnt, &tw->tw_refcnt); 100 92 } 101 93 102 - static noinline void inet_twsk_free(struct inet_timewait_sock *tw) 94 + void inet_twsk_free(struct inet_timewait_sock *tw) 103 95 { 104 96 struct module *owner = tw->tw_prot->owner; 105 97 twsk_destructor((struct sock *)tw); ··· 109 117 inet_twsk_free(tw); 110 118 } 111 119 EXPORT_SYMBOL_GPL(inet_twsk_put); 120 + 121 + static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, 122 + struct hlist_nulls_head *list) 123 + { 124 + hlist_nulls_add_head_rcu(&tw->tw_node, list); 125 + } 126 + 127 + static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, 128 + struct hlist_head *list) 129 + { 130 + hlist_add_head(&tw->tw_bind_node, list); 131 + } 112 132 113 133 /* 114 134 * Enter the time wait state. This is called with locally disabled BH. ··· 150 146 spin_lock(lock); 151 147 152 148 /* 153 - * Step 2: Hash TW into TIMEWAIT chain. 154 - * Should be done before removing sk from established chain 155 - * because readers are lockless and search established first. 149 + * Step 2: Hash TW into tcp ehash chain. 150 + * Notes : 151 + * - tw_refcnt is set to 3 because : 152 + * - We have one reference from bhash chain. 153 + * - We have one reference from ehash chain. 154 + * We can use atomic_set() because prior spin_lock()/spin_unlock() 155 + * committed into memory all tw fields. 156 156 */ 157 - inet_twsk_add_node_rcu(tw, &ehead->twchain); 157 + atomic_set(&tw->tw_refcnt, 1 + 1 + 1); 158 + inet_twsk_add_node_rcu(tw, &ehead->chain); 158 159 159 - /* Step 3: Remove SK from established hash. */ 160 + /* Step 3: Remove SK from hash chain */ 160 161 if (__sk_nulls_del_node_init_rcu(sk)) 161 162 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); 162 - 163 - /* 164 - * Notes : 165 - * - We initially set tw_refcnt to 0 in inet_twsk_alloc() 166 - * - We add one reference for the bhash link 167 - * - We add one reference for the ehash link 168 - * - We want this refcnt update done before allowing other 169 - * threads to find this tw in ehash chain. 170 - */ 171 - atomic_add(1 + 1 + 1, &tw->tw_refcnt); 172 163 173 164 spin_unlock(lock); 174 165 } ··· 489 490 restart_rcu: 490 491 rcu_read_lock(); 491 492 restart: 492 - sk_nulls_for_each_rcu(sk, node, &head->twchain) { 493 + sk_nulls_for_each_rcu(sk, node, &head->chain) { 494 + if (sk->sk_state != TCP_TIME_WAIT) 495 + continue; 493 496 tw = inet_twsk(sk); 494 497 if ((tw->tw_family != family) || 495 498 atomic_read(&twsk_net(tw)->count))
+2 -3
net/ipv4/tcp.c
··· 3137 3137 &tcp_hashinfo.ehash_mask, 3138 3138 0, 3139 3139 thash_entries ? 0 : 512 * 1024); 3140 - for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) { 3140 + for (i = 0; i <= tcp_hashinfo.ehash_mask; i++) 3141 3141 INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].chain, i); 3142 - INIT_HLIST_NULLS_HEAD(&tcp_hashinfo.ehash[i].twchain, i); 3143 - } 3142 + 3144 3143 if (inet_ehash_locks_alloc(&tcp_hashinfo)) 3145 3144 panic("TCP: failed to alloc ehash_locks"); 3146 3145 tcp_hashinfo.bhash =
+13 -70
net/ipv4/tcp_ipv4.c
··· 2194 2194 #ifdef CONFIG_PROC_FS 2195 2195 /* Proc filesystem TCP sock list dumping. */ 2196 2196 2197 - static inline struct inet_timewait_sock *tw_head(struct hlist_nulls_head *head) 2198 - { 2199 - return hlist_nulls_empty(head) ? NULL : 2200 - list_entry(head->first, struct inet_timewait_sock, tw_node); 2201 - } 2202 - 2203 - static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) 2204 - { 2205 - return !is_a_nulls(tw->tw_node.next) ? 2206 - hlist_nulls_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; 2207 - } 2208 - 2209 2197 /* 2210 2198 * Get next listener socket follow cur. If cur is NULL, get first socket 2211 2199 * starting from bucket given in st->bucket; when st->bucket is zero the ··· 2297 2309 return rc; 2298 2310 } 2299 2311 2300 - static inline bool empty_bucket(struct tcp_iter_state *st) 2312 + static inline bool empty_bucket(const struct tcp_iter_state *st) 2301 2313 { 2302 - return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain) && 2303 - hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].twchain); 2314 + return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain); 2304 2315 } 2305 2316 2306 2317 /* ··· 2316 2329 for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { 2317 2330 struct sock *sk; 2318 2331 struct hlist_nulls_node *node; 2319 - struct inet_timewait_sock *tw; 2320 2332 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); 2321 2333 2322 2334 /* Lockless fast path for the common case of empty buckets */ ··· 2331 2345 rc = sk; 2332 2346 goto out; 2333 2347 } 2334 - st->state = TCP_SEQ_STATE_TIME_WAIT; 2335 - inet_twsk_for_each(tw, node, 2336 - &tcp_hashinfo.ehash[st->bucket].twchain) { 2337 - if (tw->tw_family != st->family || 2338 - !net_eq(twsk_net(tw), net)) { 2339 - continue; 2340 - } 2341 - rc = tw; 2342 - goto out; 2343 - } 2344 2348 spin_unlock_bh(lock); 2345 - st->state = TCP_SEQ_STATE_ESTABLISHED; 2346 2349 } 2347 2350 out: 2348 2351 return rc; ··· 2340 2365 static void *established_get_next(struct seq_file *seq, void *cur) 2341 2366 { 2342 2367 struct sock *sk = cur; 2343 - struct inet_timewait_sock *tw; 2344 2368 struct hlist_nulls_node *node; 2345 2369 struct tcp_iter_state *st = seq->private; 2346 2370 struct net *net = seq_file_net(seq); ··· 2347 2373 ++st->num; 2348 2374 ++st->offset; 2349 2375 2350 - if (st->state == TCP_SEQ_STATE_TIME_WAIT) { 2351 - tw = cur; 2352 - tw = tw_next(tw); 2353 - get_tw: 2354 - while (tw && (tw->tw_family != st->family || !net_eq(twsk_net(tw), net))) { 2355 - tw = tw_next(tw); 2356 - } 2357 - if (tw) { 2358 - cur = tw; 2359 - goto out; 2360 - } 2361 - spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2362 - st->state = TCP_SEQ_STATE_ESTABLISHED; 2363 - 2364 - /* Look for next non empty bucket */ 2365 - st->offset = 0; 2366 - while (++st->bucket <= tcp_hashinfo.ehash_mask && 2367 - empty_bucket(st)) 2368 - ; 2369 - if (st->bucket > tcp_hashinfo.ehash_mask) 2370 - return NULL; 2371 - 2372 - spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2373 - sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); 2374 - } else 2375 - sk = sk_nulls_next(sk); 2376 + sk = sk_nulls_next(sk); 2376 2377 2377 2378 sk_nulls_for_each_from(sk, node) { 2378 2379 if (sk->sk_family == st->family && net_eq(sock_net(sk), net)) 2379 - goto found; 2380 + return sk; 2380 2381 } 2381 2382 2382 - st->state = TCP_SEQ_STATE_TIME_WAIT; 2383 - tw = tw_head(&tcp_hashinfo.ehash[st->bucket].twchain); 2384 - goto get_tw; 2385 - found: 2386 - cur = sk; 2387 - out: 2388 - return cur; 2383 + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); 2384 + ++st->bucket; 2385 + return established_get_first(seq); 2389 2386 } 2390 2387 2391 2388 static void *established_get_idx(struct seq_file *seq, loff_t pos) ··· 2409 2464 if (rc) 2410 2465 break; 2411 2466 st->bucket = 0; 2467 + st->state = TCP_SEQ_STATE_ESTABLISHED; 2412 2468 /* Fallthrough */ 2413 2469 case TCP_SEQ_STATE_ESTABLISHED: 2414 - case TCP_SEQ_STATE_TIME_WAIT: 2415 - st->state = TCP_SEQ_STATE_ESTABLISHED; 2416 2470 if (st->bucket > tcp_hashinfo.ehash_mask) 2417 2471 break; 2418 2472 rc = established_get_first(seq); ··· 2468 2524 } 2469 2525 break; 2470 2526 case TCP_SEQ_STATE_ESTABLISHED: 2471 - case TCP_SEQ_STATE_TIME_WAIT: 2472 2527 rc = established_get_next(seq, v); 2473 2528 break; 2474 2529 } ··· 2491 2548 if (v != SEQ_START_TOKEN) 2492 2549 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); 2493 2550 break; 2494 - case TCP_SEQ_STATE_TIME_WAIT: 2495 2551 case TCP_SEQ_STATE_ESTABLISHED: 2496 2552 if (v) 2497 2553 spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); ··· 2649 2707 static int tcp4_seq_show(struct seq_file *seq, void *v) 2650 2708 { 2651 2709 struct tcp_iter_state *st; 2710 + struct sock *sk = v; 2652 2711 int len; 2653 2712 2654 2713 if (v == SEQ_START_TOKEN) { ··· 2664 2721 switch (st->state) { 2665 2722 case TCP_SEQ_STATE_LISTENING: 2666 2723 case TCP_SEQ_STATE_ESTABLISHED: 2667 - get_tcp4_sock(v, seq, st->num, &len); 2724 + if (sk->sk_state == TCP_TIME_WAIT) 2725 + get_timewait4_sock(v, seq, st->num, &len); 2726 + else 2727 + get_tcp4_sock(v, seq, st->num, &len); 2668 2728 break; 2669 2729 case TCP_SEQ_STATE_OPENREQ: 2670 2730 get_openreq4(st->syn_wait_sk, v, seq, st->num, st->uid, &len); 2671 - break; 2672 - case TCP_SEQ_STATE_TIME_WAIT: 2673 - get_timewait4_sock(v, seq, st->num, &len); 2674 2731 break; 2675 2732 } 2676 2733 seq_printf(seq, "%*s\n", TMPSZ - 1 - len, "");
+32 -45
net/ipv6/inet6_hashtables.c
··· 89 89 sk_nulls_for_each_rcu(sk, node, &head->chain) { 90 90 if (sk->sk_hash != hash) 91 91 continue; 92 - if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { 93 - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 94 - goto begintw; 92 + if (sk->sk_state == TCP_TIME_WAIT) { 93 + if (!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif)) 94 + continue; 95 + } else { 96 + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) 97 + continue; 98 + } 99 + if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) 100 + goto out; 101 + 102 + if (sk->sk_state == TCP_TIME_WAIT) { 103 + if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, 104 + ports, dif))) { 105 + sock_gen_put(sk); 106 + goto begin; 107 + } 108 + } else { 95 109 if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, 96 110 ports, dif))) { 97 111 sock_put(sk); 98 112 goto begin; 99 113 } 100 - goto out; 114 + goto found; 101 115 } 102 116 } 103 117 if (get_nulls_value(node) != slot) 104 118 goto begin; 105 - 106 - begintw: 107 - /* Must check for a TIME_WAIT'er before going to listener hash. */ 108 - sk_nulls_for_each_rcu(sk, node, &head->twchain) { 109 - if (sk->sk_hash != hash) 110 - continue; 111 - if (likely(INET6_TW_MATCH(sk, net, saddr, daddr, 112 - ports, dif))) { 113 - if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { 114 - sk = NULL; 115 - goto out; 116 - } 117 - if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, 118 - ports, dif))) { 119 - inet_twsk_put(inet_twsk(sk)); 120 - goto begintw; 121 - } 122 - goto out; 123 - } 124 - } 125 - if (get_nulls_value(node) != slot) 126 - goto begintw; 127 - sk = NULL; 128 119 out: 120 + sk = NULL; 121 + found: 129 122 rcu_read_unlock(); 130 123 return sk; 131 124 } ··· 241 248 spinlock_t *lock = inet_ehash_lockp(hinfo, hash); 242 249 struct sock *sk2; 243 250 const struct hlist_nulls_node *node; 244 - struct inet_timewait_sock *tw; 251 + struct inet_timewait_sock *tw = NULL; 245 252 int twrefcnt = 0; 246 253 247 254 spin_lock(lock); 248 255 249 - /* Check TIME-WAIT sockets first. */ 250 - sk_nulls_for_each(sk2, node, &head->twchain) { 251 - if (sk2->sk_hash != hash) 252 - continue; 253 - 254 - if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, 255 - ports, dif))) { 256 - tw = inet_twsk(sk2); 257 - if (twsk_unique(sk, sk2, twp)) 258 - goto unique; 259 - else 260 - goto not_unique; 261 - } 262 - } 263 - tw = NULL; 264 - 265 - /* And established part... */ 266 256 sk_nulls_for_each(sk2, node, &head->chain) { 267 257 if (sk2->sk_hash != hash) 268 258 continue; 259 + 260 + if (sk2->sk_state == TCP_TIME_WAIT) { 261 + if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, 262 + ports, dif))) { 263 + tw = inet_twsk(sk2); 264 + if (twsk_unique(sk, sk2, twp)) 265 + goto unique; 266 + else 267 + goto not_unique; 268 + } 269 + } 269 270 if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) 270 271 goto not_unique; 271 272 }
+5 -4
net/ipv6/tcp_ipv6.c
··· 1834 1834 static int tcp6_seq_show(struct seq_file *seq, void *v) 1835 1835 { 1836 1836 struct tcp_iter_state *st; 1837 + struct sock *sk = v; 1837 1838 1838 1839 if (v == SEQ_START_TOKEN) { 1839 1840 seq_puts(seq, ··· 1850 1849 switch (st->state) { 1851 1850 case TCP_SEQ_STATE_LISTENING: 1852 1851 case TCP_SEQ_STATE_ESTABLISHED: 1853 - get_tcp6_sock(seq, v, st->num); 1852 + if (sk->sk_state == TCP_TIME_WAIT) 1853 + get_timewait6_sock(seq, v, st->num); 1854 + else 1855 + get_tcp6_sock(seq, v, st->num); 1854 1856 break; 1855 1857 case TCP_SEQ_STATE_OPENREQ: 1856 1858 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); 1857 - break; 1858 - case TCP_SEQ_STATE_TIME_WAIT: 1859 - get_timewait6_sock(seq, v, st->num); 1860 1859 break; 1861 1860 } 1862 1861 out: