Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

inet: get rid of central tcp/dccp listener timer

One of the major issue for TCP is the SYNACK rtx handling,
done by inet_csk_reqsk_queue_prune(), fired by the keepalive
timer of a TCP_LISTEN socket.

This function runs for awful long times, with socket lock held,
meaning that other cpus needing this lock have to spin for hundred of ms.

SYNACK are sent in huge bursts, likely to cause severe drops anyway.

This model was OK 15 years ago when memory was very tight.

We now can afford to have a timer per request sock.

Timer invocations no longer need to lock the listener,
and can be run from all cpus in parallel.

With following patch increasing somaxconn width to 32 bits,
I tested a listener with more than 4 million active request sockets,
and a steady SYNFLOOD of ~200,000 SYN per second.
Host was sending ~830,000 SYNACK per second.

This is ~100 times more what we could achieve before this patch.

Later, we will get rid of the listener hash and use ehash instead.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
fa76ce73 52452c54

+177 -202
+1 -1
include/net/inet6_connection_sock.h
··· 28 28 struct dst_entry *inet6_csk_route_req(struct sock *sk, struct flowi6 *fl6, 29 29 const struct request_sock *req); 30 30 31 - struct request_sock *inet6_csk_search_req(const struct sock *sk, 31 + struct request_sock *inet6_csk_search_req(struct sock *sk, 32 32 const __be16 rport, 33 33 const struct in6_addr *raddr, 34 34 const struct in6_addr *laddr,
+4 -11
include/net/inet_connection_sock.h
··· 256 256 257 257 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); 258 258 259 - struct request_sock *inet_csk_search_req(const struct sock *sk, 259 + struct request_sock *inet_csk_search_req(struct sock *sk, 260 260 const __be16 rport, 261 261 const __be32 raddr, 262 262 const __be32 laddr); ··· 282 282 static inline void inet_csk_reqsk_queue_removed(struct sock *sk, 283 283 struct request_sock *req) 284 284 { 285 - if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) 286 - inet_csk_delete_keepalive_timer(sk); 285 + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 287 286 } 288 287 289 288 static inline void inet_csk_reqsk_queue_added(struct sock *sk, 290 289 const unsigned long timeout) 291 290 { 292 - if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) 293 - inet_csk_reset_keepalive_timer(sk, timeout); 291 + reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue); 294 292 } 295 293 296 294 static inline int inet_csk_reqsk_queue_len(const struct sock *sk) ··· 317 319 { 318 320 inet_csk_reqsk_queue_unlink(sk, req); 319 321 inet_csk_reqsk_queue_removed(sk, req); 320 - reqsk_free(req); 322 + reqsk_put(req); 321 323 } 322 - 323 - void inet_csk_reqsk_queue_prune(struct sock *parent, 324 - const unsigned long interval, 325 - const unsigned long timeout, 326 - const unsigned long max_rto); 327 324 328 325 void inet_csk_destroy_sock(struct sock *sk); 329 326 void inet_csk_prepare_forced_close(struct sock *sk);
+41 -46
include/net/request_sock.h
··· 62 62 u32 window_clamp; /* window clamp at creation time */ 63 63 u32 rcv_wnd; /* rcv_wnd offered first time */ 64 64 u32 ts_recent; 65 - unsigned long expires; 65 + struct timer_list rsk_timer; 66 66 const struct request_sock_ops *rsk_ops; 67 67 struct sock *sk; 68 68 u32 secid; ··· 110 110 111 111 static inline void reqsk_put(struct request_sock *req) 112 112 { 113 - /* temporary debugging, until req sock are put into ehash table */ 114 - WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1); 115 - 116 113 if (atomic_dec_and_test(&req->rsk_refcnt)) 117 114 reqsk_free(req); 118 115 } ··· 121 124 * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs 122 125 */ 123 126 struct listen_sock { 124 - u8 max_qlen_log; 127 + int qlen_inc; /* protected by listener lock */ 128 + int young_inc;/* protected by listener lock */ 129 + 130 + /* following fields can be updated by timer */ 131 + atomic_t qlen_dec; /* qlen = qlen_inc - qlen_dec */ 132 + atomic_t young_dec; 133 + 134 + u8 max_qlen_log ____cacheline_aligned_in_smp; 125 135 u8 synflood_warned; 126 136 /* 2 bytes hole, try to use */ 127 - int qlen; 128 - int qlen_young; 129 - int clock_hand; 130 137 u32 hash_rnd; 131 138 u32 nr_table_entries; 132 139 struct request_sock *syn_table[0]; ··· 183 182 struct request_sock_queue { 184 183 struct request_sock *rskq_accept_head; 185 184 struct request_sock *rskq_accept_tail; 186 - rwlock_t syn_wait_lock; 187 185 u8 rskq_defer_accept; 188 - /* 3 bytes hole, try to pack */ 189 186 struct listen_sock *listen_opt; 190 187 struct fastopen_queue *fastopenq; /* This is non-NULL iff TFO has been 191 188 * enabled on this listener. Check ··· 191 192 * to determine if TFO is enabled 192 193 * right at this moment. 193 194 */ 195 + 196 + /* temporary alignment, our goal is to get rid of this lock */ 197 + rwlock_t syn_wait_lock ____cacheline_aligned_in_smp; 194 198 }; 195 199 196 200 int reqsk_queue_alloc(struct request_sock_queue *queue, ··· 225 223 struct request_sock **prev; 226 224 227 225 write_lock(&queue->syn_wait_lock); 226 + 228 227 prev = &lopt->syn_table[req->rsk_hash]; 229 228 while (*prev != req) 230 229 prev = &(*prev)->dl_next; 231 230 *prev = req->dl_next; 231 + 232 232 write_unlock(&queue->syn_wait_lock); 233 + if (del_timer(&req->rsk_timer)) 234 + reqsk_put(req); 233 235 } 234 236 235 237 static inline void reqsk_queue_add(struct request_sock_queue *queue, ··· 266 260 return req; 267 261 } 268 262 269 - static inline int reqsk_queue_removed(struct request_sock_queue *queue, 270 - struct request_sock *req) 263 + static inline void reqsk_queue_removed(struct request_sock_queue *queue, 264 + const struct request_sock *req) 271 265 { 272 266 struct listen_sock *lopt = queue->listen_opt; 273 267 274 268 if (req->num_timeout == 0) 275 - --lopt->qlen_young; 276 - 277 - return --lopt->qlen; 269 + atomic_inc(&lopt->young_dec); 270 + atomic_inc(&lopt->qlen_dec); 278 271 } 279 272 280 - static inline int reqsk_queue_added(struct request_sock_queue *queue) 273 + static inline void reqsk_queue_added(struct request_sock_queue *queue) 281 274 { 282 275 struct listen_sock *lopt = queue->listen_opt; 283 - const int prev_qlen = lopt->qlen; 284 276 285 - lopt->qlen_young++; 286 - lopt->qlen++; 287 - return prev_qlen; 277 + lopt->young_inc++; 278 + lopt->qlen_inc++; 279 + } 280 + 281 + static inline int listen_sock_qlen(const struct listen_sock *lopt) 282 + { 283 + return lopt->qlen_inc - atomic_read(&lopt->qlen_dec); 284 + } 285 + 286 + static inline int listen_sock_young(const struct listen_sock *lopt) 287 + { 288 + return lopt->young_inc - atomic_read(&lopt->young_dec); 288 289 } 289 290 290 291 static inline int reqsk_queue_len(const struct request_sock_queue *queue) 291 292 { 292 - return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; 293 + const struct listen_sock *lopt = queue->listen_opt; 294 + 295 + return lopt ? listen_sock_qlen(lopt) : 0; 293 296 } 294 297 295 298 static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) 296 299 { 297 - return queue->listen_opt->qlen_young; 300 + return listen_sock_young(queue->listen_opt); 298 301 } 299 302 300 303 static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) 301 304 { 302 - return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; 305 + return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log; 303 306 } 304 307 305 - static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, 306 - u32 hash, struct request_sock *req, 307 - unsigned long timeout) 308 - { 309 - struct listen_sock *lopt = queue->listen_opt; 310 - 311 - req->expires = jiffies + timeout; 312 - req->num_retrans = 0; 313 - req->num_timeout = 0; 314 - req->sk = NULL; 315 - 316 - /* before letting lookups find us, make sure all req fields 317 - * are committed to memory and refcnt initialized. 318 - */ 319 - smp_wmb(); 320 - atomic_set(&req->rsk_refcnt, 1); 321 - 322 - req->rsk_hash = hash; 323 - write_lock(&queue->syn_wait_lock); 324 - req->dl_next = lopt->syn_table[hash]; 325 - lopt->syn_table[hash] = req; 326 - write_unlock(&queue->syn_wait_lock); 327 - } 308 + void reqsk_queue_hash_req(struct request_sock_queue *queue, 309 + u32 hash, struct request_sock *req, 310 + unsigned long timeout); 328 311 329 312 #endif /* _REQUEST_SOCK_H */
+9 -4
net/core/request_sock.c
··· 94 94 /* make all the listen_opt local to us */ 95 95 struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); 96 96 97 - if (lopt->qlen != 0) { 97 + if (listen_sock_qlen(lopt) != 0) { 98 98 unsigned int i; 99 99 100 100 for (i = 0; i < lopt->nr_table_entries; i++) { 101 101 struct request_sock *req; 102 102 103 + write_lock_bh(&queue->syn_wait_lock); 103 104 while ((req = lopt->syn_table[i]) != NULL) { 104 105 lopt->syn_table[i] = req->dl_next; 105 - lopt->qlen--; 106 + atomic_inc(&lopt->qlen_dec); 107 + if (del_timer(&req->rsk_timer)) 108 + reqsk_put(req); 106 109 reqsk_put(req); 107 110 } 111 + write_unlock_bh(&queue->syn_wait_lock); 108 112 } 109 113 } 110 114 111 - WARN_ON(lopt->qlen != 0); 115 + if (WARN_ON(listen_sock_qlen(lopt) != 0)) 116 + pr_err("qlen %u\n", listen_sock_qlen(lopt)); 112 117 kvfree(lopt); 113 118 } 114 119 ··· 192 187 * 193 188 * For more details see CoNext'11 "TCP Fast Open" paper. 194 189 */ 195 - req->expires = jiffies + 60*HZ; 190 + req->rsk_timer.expires = jiffies + 60*HZ; 196 191 if (fastopenq->rskq_rst_head == NULL) 197 192 fastopenq->rskq_rst_head = req; 198 193 else
+1 -1
net/core/sock.c
··· 2739 2739 2740 2740 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, 2741 2741 rsk_prot->obj_size, 0, 2742 - SLAB_HWCACHE_ALIGN, NULL); 2742 + 0, NULL); 2743 2743 2744 2744 if (!rsk_prot->slab) { 2745 2745 pr_crit("%s: Can't create request sock SLAB cache!\n",
+7 -3
net/dccp/ipv4.c
··· 306 306 if (!between48(seq, dccp_rsk(req)->dreq_iss, 307 307 dccp_rsk(req)->dreq_gss)) { 308 308 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 309 + reqsk_put(req); 309 310 goto out; 310 311 } 311 312 /* ··· 316 315 * errors returned from accept(). 317 316 */ 318 317 inet_csk_reqsk_queue_drop(sk, req); 318 + reqsk_put(req); 319 319 goto out; 320 320 321 321 case DCCP_REQUESTING: ··· 453 451 /* Find possible connection requests. */ 454 452 struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, 455 453 iph->saddr, iph->daddr); 456 - if (req) 457 - return dccp_check_req(sk, skb, req); 458 - 454 + if (req) { 455 + nsk = dccp_check_req(sk, skb, req); 456 + reqsk_put(req); 457 + return nsk; 458 + } 459 459 nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, 460 460 iph->saddr, dh->dccph_sport, 461 461 iph->daddr, dh->dccph_dport,
+8 -4
net/dccp/ipv6.c
··· 157 157 req = inet6_csk_search_req(sk, dh->dccph_dport, 158 158 &hdr->daddr, &hdr->saddr, 159 159 inet6_iif(skb)); 160 - if (req == NULL) 160 + if (!req) 161 161 goto out; 162 162 163 163 /* ··· 169 169 if (!between48(seq, dccp_rsk(req)->dreq_iss, 170 170 dccp_rsk(req)->dreq_gss)) { 171 171 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 172 + reqsk_put(req); 172 173 goto out; 173 174 } 174 175 175 176 inet_csk_reqsk_queue_drop(sk, req); 177 + reqsk_put(req); 176 178 goto out; 177 179 178 180 case DCCP_REQUESTING: ··· 324 322 325 323 req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, 326 324 &iph->daddr, inet6_iif(skb)); 327 - if (req != NULL) 328 - return dccp_check_req(sk, skb, req); 329 - 325 + if (req) { 326 + nsk = dccp_check_req(sk, skb, req); 327 + reqsk_put(req); 328 + return nsk; 329 + } 330 330 nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, 331 331 &iph->saddr, dh->dccph_sport, 332 332 &iph->daddr, ntohs(dh->dccph_dport),
+1 -23
net/dccp/timer.c
··· 161 161 sock_put(sk); 162 162 } 163 163 164 - /* 165 - * Timer for listening sockets 166 - */ 167 - static void dccp_response_timer(struct sock *sk) 168 - { 169 - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, 170 - DCCP_RTO_MAX); 171 - } 172 - 173 164 static void dccp_keepalive_timer(unsigned long data) 174 165 { 175 166 struct sock *sk = (struct sock *)data; 176 167 177 - /* Only process if socket is not in use. */ 178 - bh_lock_sock(sk); 179 - if (sock_owned_by_user(sk)) { 180 - /* Try again later. */ 181 - inet_csk_reset_keepalive_timer(sk, HZ / 20); 182 - goto out; 183 - } 184 - 185 - if (sk->sk_state == DCCP_LISTEN) { 186 - dccp_response_timer(sk); 187 - goto out; 188 - } 189 - out: 190 - bh_unlock_sock(sk); 168 + pr_err("dccp should not use a keepalive timer !\n"); 191 169 sock_put(sk); 192 170 } 193 171
+72 -75
net/ipv4/inet_connection_sock.c
··· 23 23 #include <net/route.h> 24 24 #include <net/tcp_states.h> 25 25 #include <net/xfrm.h> 26 + #include <net/tcp.h> 26 27 27 28 #ifdef INET_CSK_DEBUG 28 29 const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; ··· 477 476 #if IS_ENABLED(CONFIG_IPV6) 478 477 #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 479 478 #else 480 - #define AF_INET_FAMILY(fam) 1 479 + #define AF_INET_FAMILY(fam) true 481 480 #endif 482 481 483 - struct request_sock *inet_csk_search_req(const struct sock *sk, 484 - const __be16 rport, const __be32 raddr, 482 + /* Note: this is temporary : 483 + * req sock will no longer be in listener hash table 484 + */ 485 + struct request_sock *inet_csk_search_req(struct sock *sk, 486 + const __be16 rport, 487 + const __be32 raddr, 485 488 const __be32 laddr) 486 489 { 487 - const struct inet_connection_sock *icsk = inet_csk(sk); 490 + struct inet_connection_sock *icsk = inet_csk(sk); 488 491 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 489 492 struct request_sock *req; 493 + u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd, 494 + lopt->nr_table_entries); 490 495 491 - for (req = lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, 492 - lopt->nr_table_entries)]; 493 - req != NULL; 494 - req = req->dl_next) { 496 + write_lock(&icsk->icsk_accept_queue.syn_wait_lock); 497 + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { 495 498 const struct inet_request_sock *ireq = inet_rsk(req); 496 499 497 500 if (ireq->ir_rmt_port == rport && 498 501 ireq->ir_rmt_addr == raddr && 499 502 ireq->ir_loc_addr == laddr && 500 503 AF_INET_FAMILY(req->rsk_ops->family)) { 504 + atomic_inc(&req->rsk_refcnt); 501 505 WARN_ON(req->sk); 502 506 break; 503 507 } 504 508 } 509 + write_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 505 510 506 511 return req; 507 512 } ··· 563 556 } 564 557 EXPORT_SYMBOL(inet_rtx_syn_ack); 565 558 566 - void inet_csk_reqsk_queue_prune(struct sock *parent, 567 - const unsigned long interval, 568 - const unsigned long timeout, 569 - const unsigned long max_rto) 559 + static void reqsk_timer_handler(unsigned long data) 570 560 { 571 - struct inet_connection_sock *icsk = inet_csk(parent); 561 + struct request_sock *req = (struct request_sock *)data; 562 + struct sock *sk_listener = req->rsk_listener; 563 + struct inet_connection_sock *icsk = inet_csk(sk_listener); 572 564 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 573 565 struct listen_sock *lopt = queue->listen_opt; 574 - int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 575 - int thresh = max_retries; 576 - unsigned long now = jiffies; 577 - struct request_sock **reqp, *req; 578 - int i, budget; 566 + int expire = 0, resend = 0; 567 + int max_retries, thresh; 579 568 580 - if (lopt == NULL || lopt->qlen == 0) 569 + if (sk_listener->sk_state != TCP_LISTEN || !lopt) { 570 + reqsk_put(req); 581 571 return; 572 + } 582 573 574 + max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 575 + thresh = max_retries; 583 576 /* Normally all the openreqs are young and become mature 584 577 * (i.e. converted to established socket) for first timeout. 585 578 * If synack was not acknowledged for 1 second, it means ··· 597 590 * embrions; and abort old ones without pity, if old 598 591 * ones are about to clog our table. 599 592 */ 600 - if (lopt->qlen>>(lopt->max_qlen_log-1)) { 601 - int young = (lopt->qlen_young<<1); 593 + if (listen_sock_qlen(lopt) >> (lopt->max_qlen_log - 1)) { 594 + int young = listen_sock_young(lopt) << 1; 602 595 603 596 while (thresh > 2) { 604 - if (lopt->qlen < young) 597 + if (listen_sock_qlen(lopt) < young) 605 598 break; 606 599 thresh--; 607 600 young <<= 1; 608 601 } 609 602 } 610 - 611 603 if (queue->rskq_defer_accept) 612 604 max_retries = queue->rskq_defer_accept; 605 + syn_ack_recalc(req, thresh, max_retries, queue->rskq_defer_accept, 606 + &expire, &resend); 607 + req->rsk_ops->syn_ack_timeout(sk_listener, req); 608 + if (!expire && 609 + (!resend || 610 + !inet_rtx_syn_ack(sk_listener, req) || 611 + inet_rsk(req)->acked)) { 612 + unsigned long timeo; 613 613 614 - budget = 2 * (lopt->nr_table_entries / (timeout / interval)); 615 - i = lopt->clock_hand; 616 - 617 - do { 618 - reqp = &lopt->syn_table[i]; 619 - if (!*reqp) 620 - goto next_bucket; 621 - write_lock(&queue->syn_wait_lock); 622 - while ((req = *reqp) != NULL) { 623 - if (time_after_eq(now, req->expires)) { 624 - int expire = 0, resend = 0; 625 - 626 - syn_ack_recalc(req, thresh, max_retries, 627 - queue->rskq_defer_accept, 628 - &expire, &resend); 629 - req->rsk_ops->syn_ack_timeout(parent, req); 630 - if (!expire && 631 - (!resend || 632 - !inet_rtx_syn_ack(parent, req) || 633 - inet_rsk(req)->acked)) { 634 - unsigned long timeo; 635 - 636 - if (req->num_timeout++ == 0) 637 - lopt->qlen_young--; 638 - timeo = min(timeout << req->num_timeout, 639 - max_rto); 640 - req->expires = now + timeo; 641 - reqp = &req->dl_next; 642 - continue; 643 - } 644 - 645 - /* Drop this request */ 646 - *reqp = req->dl_next; 647 - reqsk_queue_removed(queue, req); 648 - reqsk_put(req); 649 - continue; 650 - } 651 - reqp = &req->dl_next; 652 - } 653 - write_unlock(&queue->syn_wait_lock); 654 - next_bucket: 655 - i = (i + 1) & (lopt->nr_table_entries - 1); 656 - 657 - } while (--budget > 0); 658 - 659 - lopt->clock_hand = i; 660 - 661 - if (lopt->qlen) 662 - inet_csk_reset_keepalive_timer(parent, interval); 614 + if (req->num_timeout++ == 0) 615 + atomic_inc(&lopt->young_dec); 616 + timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 617 + mod_timer_pinned(&req->rsk_timer, jiffies + timeo); 618 + return; 619 + } 620 + inet_csk_reqsk_queue_drop(sk_listener, req); 621 + reqsk_put(req); 663 622 } 664 - EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); 623 + 624 + void reqsk_queue_hash_req(struct request_sock_queue *queue, 625 + u32 hash, struct request_sock *req, 626 + unsigned long timeout) 627 + { 628 + struct listen_sock *lopt = queue->listen_opt; 629 + 630 + req->num_retrans = 0; 631 + req->num_timeout = 0; 632 + req->sk = NULL; 633 + 634 + /* before letting lookups find us, make sure all req fields 635 + * are committed to memory and refcnt initialized. 636 + */ 637 + smp_wmb(); 638 + atomic_set(&req->rsk_refcnt, 2); 639 + setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); 640 + req->rsk_hash = hash; 641 + 642 + write_lock(&queue->syn_wait_lock); 643 + req->dl_next = lopt->syn_table[hash]; 644 + lopt->syn_table[hash] = req; 645 + write_unlock(&queue->syn_wait_lock); 646 + 647 + mod_timer_pinned(&req->rsk_timer, jiffies + timeout); 648 + } 649 + EXPORT_SYMBOL(reqsk_queue_hash_req); 665 650 666 651 /** 667 652 * inet_csk_clone_lock - clone an inet socket, and lock its clone ··· 788 789 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 789 790 struct request_sock *acc_req; 790 791 struct request_sock *req; 791 - 792 - inet_csk_delete_keepalive_timer(sk); 793 792 794 793 /* make all the listen_opt local to us */ 795 794 acc_req = reqsk_queue_yank_acceptq(queue);
+2 -2
net/ipv4/inet_diag.c
··· 285 285 BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != 286 286 offsetof(struct sock, sk_cookie)); 287 287 288 - tmo = inet_reqsk(sk)->expires - jiffies; 288 + tmo = inet_reqsk(sk)->rsk_timer.expires - jiffies; 289 289 r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0; 290 290 r->idiag_rqueue = 0; 291 291 r->idiag_wqueue = 0; ··· 719 719 read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 720 720 721 721 lopt = icsk->icsk_accept_queue.listen_opt; 722 - if (!lopt || !lopt->qlen) 722 + if (!lopt || !listen_sock_qlen(lopt)) 723 723 goto out; 724 724 725 725 if (bc) {
-1
net/ipv4/syncookies.c
··· 361 361 goto out; 362 362 } 363 363 364 - req->expires = 0UL; 365 364 req->num_retrans = 0; 366 365 367 366 /*
+1 -1
net/ipv4/tcp_fastopen.c
··· 240 240 struct request_sock *req1; 241 241 spin_lock(&fastopenq->lock); 242 242 req1 = fastopenq->rskq_rst_head; 243 - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { 243 + if (!req1 || time_after(req1->rsk_timer.expires, jiffies)) { 244 244 spin_unlock(&fastopenq->lock); 245 245 NET_INC_STATS_BH(sock_net(sk), 246 246 LINUX_MIB_TCPFASTOPENLISTENOVERFLOW);
+8 -3
net/ipv4/tcp_ipv4.c
··· 475 475 476 476 if (seq != tcp_rsk(req)->snt_isn) { 477 477 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 478 + reqsk_put(req); 478 479 goto out; 479 480 } 480 481 ··· 487 486 */ 488 487 inet_csk_reqsk_queue_drop(sk, req); 489 488 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 489 + reqsk_put(req); 490 490 goto out; 491 491 492 492 case TCP_SYN_SENT: ··· 1400 1398 struct sock *nsk; 1401 1399 1402 1400 req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); 1403 - if (req) 1404 - return tcp_check_req(sk, skb, req, false); 1401 + if (req) { 1402 + nsk = tcp_check_req(sk, skb, req, false); 1403 + reqsk_put(req); 1404 + return nsk; 1405 + } 1405 1406 1406 1407 nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1407 1408 th->source, iph->daddr, th->dest, inet_iif(skb)); ··· 2213 2208 struct seq_file *f, int i, kuid_t uid) 2214 2209 { 2215 2210 const struct inet_request_sock *ireq = inet_rsk(req); 2216 - long delta = req->expires - jiffies; 2211 + long delta = req->rsk_timer.expires - jiffies; 2217 2212 2218 2213 seq_printf(f, "%4d: %08X:%04X %08X:%04X" 2219 2214 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
+3 -2
net/ipv4/tcp_minisocks.c
··· 629 629 &tcp_rsk(req)->last_oow_ack_time) && 630 630 631 631 !inet_rtx_syn_ack(sk, req)) 632 - req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout, 633 - TCP_RTO_MAX) + jiffies; 632 + mod_timer_pending(&req->rsk_timer, jiffies + 633 + min(TCP_TIMEOUT_INIT << req->num_timeout, 634 + TCP_RTO_MAX)); 634 635 return NULL; 635 636 } 636 637
+1 -11
net/ipv4/tcp_timer.c
··· 539 539 sock_put(sk); 540 540 } 541 541 542 - /* 543 - * Timer for listening sockets 544 - */ 545 - 546 - static void tcp_synack_timer(struct sock *sk) 547 - { 548 - inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, 549 - TCP_TIMEOUT_INIT, TCP_RTO_MAX); 550 - } 551 - 552 542 void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) 553 543 { 554 544 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); ··· 573 583 } 574 584 575 585 if (sk->sk_state == TCP_LISTEN) { 576 - tcp_synack_timer(sk); 586 + pr_err("Hmm... keepalive on a LISTEN ???\n"); 577 587 goto out; 578 588 } 579 589
+10 -9
net/ipv6/inet6_connection_sock.c
··· 112 112 return c & (synq_hsize - 1); 113 113 } 114 114 115 - struct request_sock *inet6_csk_search_req(const struct sock *sk, 115 + struct request_sock *inet6_csk_search_req(struct sock *sk, 116 116 const __be16 rport, 117 117 const struct in6_addr *raddr, 118 118 const struct in6_addr *laddr, 119 119 const int iif) 120 120 { 121 - const struct inet_connection_sock *icsk = inet_csk(sk); 121 + struct inet_connection_sock *icsk = inet_csk(sk); 122 122 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 123 123 struct request_sock *req; 124 + u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, 125 + lopt->nr_table_entries); 124 126 125 - for (req = lopt->syn_table[inet6_synq_hash(raddr, rport, 126 - lopt->hash_rnd, 127 - lopt->nr_table_entries)]; 128 - req != NULL; 129 - req = req->dl_next) { 127 + write_lock(&icsk->icsk_accept_queue.syn_wait_lock); 128 + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { 130 129 const struct inet_request_sock *ireq = inet_rsk(req); 131 130 132 131 if (ireq->ir_rmt_port == rport && ··· 133 134 ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && 134 135 ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && 135 136 (!ireq->ir_iif || ireq->ir_iif == iif)) { 137 + atomic_inc(&req->rsk_refcnt); 136 138 WARN_ON(req->sk != NULL); 137 - return req; 139 + break; 138 140 } 139 141 } 142 + write_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 140 143 141 - return NULL; 144 + return req; 142 145 } 143 146 EXPORT_SYMBOL_GPL(inet6_csk_search_req); 144 147
-1
net/ipv6/syncookies.c
··· 222 222 223 223 ireq->ir_mark = inet_request_mark(sk, skb); 224 224 225 - req->expires = 0UL; 226 225 req->num_retrans = 0; 227 226 ireq->snd_wscale = tcp_opt.snd_wscale; 228 227 ireq->sack_ok = tcp_opt.sack_ok;
+8 -4
net/ipv6/tcp_ipv6.c
··· 421 421 422 422 if (seq != tcp_rsk(req)->snt_isn) { 423 423 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); 424 + reqsk_put(req); 424 425 goto out; 425 426 } 426 427 427 428 inet_csk_reqsk_queue_drop(sk, req); 428 429 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 430 + reqsk_put(req); 429 431 goto out; 430 432 431 433 case TCP_SYN_SENT: ··· 990 988 req = inet6_csk_search_req(sk, th->source, 991 989 &ipv6_hdr(skb)->saddr, 992 990 &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); 993 - if (req) 994 - return tcp_check_req(sk, skb, req, false); 995 - 991 + if (req) { 992 + nsk = tcp_check_req(sk, skb, req, false); 993 + reqsk_put(req); 994 + return nsk; 995 + } 996 996 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 997 997 &ipv6_hdr(skb)->saddr, th->source, 998 998 &ipv6_hdr(skb)->daddr, ntohs(th->dest), ··· 1674 1670 static void get_openreq6(struct seq_file *seq, 1675 1671 struct request_sock *req, int i, kuid_t uid) 1676 1672 { 1677 - int ttd = req->expires - jiffies; 1673 + long ttd = req->rsk_timer.expires - jiffies; 1678 1674 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1679 1675 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1680 1676