Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

inet: simplify timewait refcounting

timewait sockets have a complex refcounting logic.
Once we realize it should be similar to established and
syn_recv sockets, we can use sk_nulls_del_node_init_rcu()
and remove inet_twsk_unhash()

In particular, deferred inet_twsk_put() added in commit
13475a30b66cd ("tcp: connect() race with timewait reuse")
looks unecessary : When removing a timewait socket from
ehash or bhash, caller must own a reference on the socket
anyway.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
fc01538f 3fd2f1b9

+21 -68
+2 -2
include/net/inet_hashtables.h
··· 205 205 206 206 void inet_hashinfo_init(struct inet_hashinfo *h); 207 207 208 - int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw); 209 - int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw); 208 + void __inet_hash_nolisten(struct sock *sk, struct sock *osk); 209 + void __inet_hash(struct sock *sk, struct sock *osk); 210 210 void inet_hash(struct sock *sk); 211 211 void inet_unhash(struct sock *sk); 212 212
+2 -4
include/net/inet_timewait_sock.h
··· 100 100 void inet_twsk_free(struct inet_timewait_sock *tw); 101 101 void inet_twsk_put(struct inet_timewait_sock *tw); 102 102 103 - int inet_twsk_unhash(struct inet_timewait_sock *tw); 104 - 105 - int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, 106 - struct inet_hashinfo *hashinfo); 103 + void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, 104 + struct inet_hashinfo *hashinfo); 107 105 108 106 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, 109 107 struct inet_timewait_death_row *dr,
+10 -21
net/ipv4/inet_hashtables.c
··· 343 343 struct sock *sk2; 344 344 const struct hlist_nulls_node *node; 345 345 struct inet_timewait_sock *tw = NULL; 346 - int twrefcnt = 0; 347 346 348 347 spin_lock(lock); 349 348 ··· 370 371 WARN_ON(!sk_unhashed(sk)); 371 372 __sk_nulls_add_node_rcu(sk, &head->chain); 372 373 if (tw) { 373 - twrefcnt = inet_twsk_unhash(tw); 374 + sk_nulls_del_node_init_rcu((struct sock *)tw); 374 375 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); 375 376 } 376 377 spin_unlock(lock); 377 - if (twrefcnt) 378 - inet_twsk_put(tw); 379 378 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 380 379 381 380 if (twp) { ··· 381 384 } else if (tw) { 382 385 /* Silly. Should hash-dance instead... */ 383 386 inet_twsk_deschedule(tw); 384 - 385 387 inet_twsk_put(tw); 386 388 } 387 389 return 0; ··· 399 403 inet->inet_dport); 400 404 } 401 405 402 - int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw) 406 + void __inet_hash_nolisten(struct sock *sk, struct sock *osk) 403 407 { 404 408 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 405 409 struct hlist_nulls_head *list; 406 410 struct inet_ehash_bucket *head; 407 411 spinlock_t *lock; 408 - int twrefcnt = 0; 409 412 410 413 WARN_ON(!sk_unhashed(sk)); 411 414 ··· 415 420 416 421 spin_lock(lock); 417 422 __sk_nulls_add_node_rcu(sk, list); 418 - if (tw) { 419 - WARN_ON(sk->sk_hash != tw->tw_hash); 420 - twrefcnt = inet_twsk_unhash(tw); 423 + if (osk) { 424 + WARN_ON(sk->sk_hash != osk->sk_hash); 425 + sk_nulls_del_node_init_rcu(osk); 421 426 } 422 427 spin_unlock(lock); 423 428 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 424 - return twrefcnt; 425 429 } 426 430 EXPORT_SYMBOL_GPL(__inet_hash_nolisten); 427 431 428 - int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw) 432 + void __inet_hash(struct sock *sk, struct sock *osk) 429 433 { 430 434 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 431 435 struct inet_listen_hashbucket *ilb; 432 436 433 437 if (sk->sk_state != TCP_LISTEN) 434 - return __inet_hash_nolisten(sk, tw); 438 + return __inet_hash_nolisten(sk, osk); 435 439 436 440 WARN_ON(!sk_unhashed(sk)); 437 441 ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; ··· 439 445 __sk_nulls_add_node_rcu(sk, &ilb->head); 440 446 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 441 447 spin_unlock(&ilb->lock); 442 - return 0; 443 448 } 444 449 EXPORT_SYMBOL(__inet_hash); 445 450 ··· 485 492 struct inet_bind_bucket *tb; 486 493 int ret; 487 494 struct net *net = sock_net(sk); 488 - int twrefcnt = 1; 489 495 490 496 if (!snum) { 491 497 int i, remaining, low, high, port; ··· 552 560 inet_bind_hash(sk, tb, port); 553 561 if (sk_unhashed(sk)) { 554 562 inet_sk(sk)->inet_sport = htons(port); 555 - twrefcnt += __inet_hash_nolisten(sk, tw); 563 + __inet_hash_nolisten(sk, (struct sock *)tw); 556 564 } 557 565 if (tw) 558 - twrefcnt += inet_twsk_bind_unhash(tw, hinfo); 566 + inet_twsk_bind_unhash(tw, hinfo); 559 567 spin_unlock(&head->lock); 560 568 561 569 if (tw) { 562 570 inet_twsk_deschedule(tw); 563 - while (twrefcnt) { 564 - twrefcnt--; 565 - inet_twsk_put(tw); 566 - } 571 + inet_twsk_put(tw); 567 572 } 568 573 569 574 ret = 0;
+6 -36
net/ipv4/inet_timewait_sock.c
··· 18 18 19 19 20 20 /** 21 - * inet_twsk_unhash - unhash a timewait socket from established hash 22 - * @tw: timewait socket 23 - * 24 - * unhash a timewait socket from established hash, if hashed. 25 - * ehash lock must be held by caller. 26 - * Returns 1 if caller should call inet_twsk_put() after lock release. 27 - */ 28 - int inet_twsk_unhash(struct inet_timewait_sock *tw) 29 - { 30 - if (hlist_nulls_unhashed(&tw->tw_node)) 31 - return 0; 32 - 33 - hlist_nulls_del_rcu(&tw->tw_node); 34 - sk_nulls_node_init(&tw->tw_node); 35 - /* 36 - * We cannot call inet_twsk_put() ourself under lock, 37 - * caller must call it for us. 38 - */ 39 - return 1; 40 - } 41 - 42 - /** 43 21 * inet_twsk_bind_unhash - unhash a timewait socket from bind hash 44 22 * @tw: timewait socket 45 23 * @hashinfo: hashinfo pointer ··· 26 48 * bind hash lock must be held by caller. 27 49 * Returns 1 if caller should call inet_twsk_put() after lock release. 28 50 */ 29 - int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, 51 + void inet_twsk_bind_unhash(struct inet_timewait_sock *tw, 30 52 struct inet_hashinfo *hashinfo) 31 53 { 32 54 struct inet_bind_bucket *tb = tw->tw_tb; 33 55 34 56 if (!tb) 35 - return 0; 57 + return; 36 58 37 59 __hlist_del(&tw->tw_bind_node); 38 60 tw->tw_tb = NULL; 39 61 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 40 - /* 41 - * We cannot call inet_twsk_put() ourself under lock, 42 - * caller must call it for us. 43 - */ 44 - return 1; 62 + __sock_put((struct sock *)tw); 45 63 } 46 64 47 65 /* Must be called with locally disabled BHs. */ 48 66 static void inet_twsk_kill(struct inet_timewait_sock *tw) 49 67 { 50 68 struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; 51 - struct inet_bind_hashbucket *bhead; 52 - int refcnt; 53 - /* Unlink from established hashes. */ 54 69 spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); 70 + struct inet_bind_hashbucket *bhead; 55 71 56 72 spin_lock(lock); 57 - refcnt = inet_twsk_unhash(tw); 73 + sk_nulls_del_node_init_rcu((struct sock *)tw); 58 74 spin_unlock(lock); 59 75 60 76 /* Disassociate with bind bucket. */ ··· 56 84 hashinfo->bhash_size)]; 57 85 58 86 spin_lock(&bhead->lock); 59 - refcnt += inet_twsk_bind_unhash(tw, hashinfo); 87 + inet_twsk_bind_unhash(tw, hashinfo); 60 88 spin_unlock(&bhead->lock); 61 89 62 - BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); 63 - atomic_sub(refcnt, &tw->tw_refcnt); 64 90 atomic_dec(&tw->tw_dr->tw_count); 65 91 inet_twsk_put(tw); 66 92 }
+1 -5
net/ipv6/inet6_hashtables.c
··· 207 207 struct sock *sk2; 208 208 const struct hlist_nulls_node *node; 209 209 struct inet_timewait_sock *tw = NULL; 210 - int twrefcnt = 0; 211 210 212 211 spin_lock(lock); 213 212 ··· 233 234 WARN_ON(!sk_unhashed(sk)); 234 235 __sk_nulls_add_node_rcu(sk, &head->chain); 235 236 if (tw) { 236 - twrefcnt = inet_twsk_unhash(tw); 237 + sk_nulls_del_node_init_rcu((struct sock *)tw); 237 238 NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); 238 239 } 239 240 spin_unlock(lock); 240 - if (twrefcnt) 241 - inet_twsk_put(tw); 242 241 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 243 242 244 243 if (twp) { ··· 244 247 } else if (tw) { 245 248 /* Silly. Should hash-dance instead... */ 246 249 inet_twsk_deschedule(tw); 247 - 248 250 inet_twsk_put(tw); 249 251 } 250 252 return 0;