Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'tcp-lockless-listener'

Eric Dumazet says:

====================
tcp/dccp: lockless listener

TCP listener refactoring : this is becoming interesting !

This patch series takes the steps to use normal TCP/DCCP ehash
table to store SYN_RECV requests, instead of the private per-listener
hash table we had until now.

SYNACK skb are now attached to their syn_recv request socket,
so that we no longer heavily modify listener sk_wmem_alloc.

listener lock is no longer held in fast path, including
SYNCOOKIE mode.

During my tests, my server was able to process 3,500,000
SYN packets per second on one listener and still had available
cpu cycles.

That is about 2 to 3 order of magnitude what we had with older kernels.

This effort started two years ago and I am pleased to reach expectations.

We'll probably extend SO_REUSEPORT to add proper cpu/numa affinities,
so that heavy duty TCP servers can get proper siloing thanks to multi-queues
NIC.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+310 -749
-9
include/net/inet6_connection_sock.h
··· 28 28 struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, 29 29 const struct request_sock *req, u8 proto); 30 30 31 - struct request_sock *inet6_csk_search_req(struct sock *sk, 32 - const __be16 rport, 33 - const struct in6_addr *raddr, 34 - const struct in6_addr *laddr, 35 - const int iif); 36 - 37 - void inet6_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 38 - const unsigned long timeout); 39 - 40 31 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); 41 32 42 33 int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);
+2 -7
include/net/inet_connection_sock.h
··· 258 258 259 259 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); 260 260 261 - struct request_sock *inet_csk_search_req(struct sock *sk, 262 - const __be16 rport, 263 - const __be32 raddr, 264 - const __be32 laddr); 265 261 int inet_csk_bind_conflict(const struct sock *sk, 266 262 const struct inet_bind_bucket *tb, bool relax); 267 263 int inet_csk_get_port(struct sock *sk, unsigned short snum); ··· 278 282 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 279 283 unsigned long timeout); 280 284 281 - static inline void inet_csk_reqsk_queue_added(struct sock *sk, 282 - const unsigned long timeout) 285 + static inline void inet_csk_reqsk_queue_added(struct sock *sk) 283 286 { 284 287 reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue); 285 288 } ··· 295 300 296 301 static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) 297 302 { 298 - return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); 303 + return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; 299 304 } 300 305 301 306 void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
+1
include/net/inet_hashtables.h
··· 205 205 206 206 void inet_hashinfo_init(struct inet_hashinfo *h); 207 207 208 + int inet_ehash_insert(struct sock *sk, struct sock *osk); 208 209 void __inet_hash_nolisten(struct sock *sk, struct sock *osk); 209 210 void __inet_hash(struct sock *sk, struct sock *osk); 210 211 void inet_hash(struct sock *sk);
+41 -91
include/net/request_sock.h
··· 69 69 u32 peer_secid; 70 70 }; 71 71 72 + static inline struct request_sock *inet_reqsk(struct sock *sk) 73 + { 74 + return (struct request_sock *)sk; 75 + } 76 + 77 + static inline struct sock *req_to_sk(struct request_sock *req) 78 + { 79 + return (struct sock *)req; 80 + } 81 + 72 82 static inline struct request_sock * 73 83 reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) 74 84 { ··· 88 78 req->rsk_ops = ops; 89 79 sock_hold(sk_listener); 90 80 req->rsk_listener = sk_listener; 81 + req_to_sk(req)->sk_prot = sk_listener->sk_prot; 82 + sk_node_init(&req_to_sk(req)->sk_node); 91 83 req->saved_syn = NULL; 92 84 /* Following is temporary. It is coupled with debugging 93 85 * helpers in reqsk_put() & reqsk_free() ··· 97 85 atomic_set(&req->rsk_refcnt, 0); 98 86 } 99 87 return req; 100 - } 101 - 102 - static inline struct request_sock *inet_reqsk(struct sock *sk) 103 - { 104 - return (struct request_sock *)sk; 105 - } 106 - 107 - static inline struct sock *req_to_sk(struct request_sock *req) 108 - { 109 - return (struct sock *)req; 110 88 } 111 89 112 90 static inline void reqsk_free(struct request_sock *req) ··· 118 116 } 119 117 120 118 extern int sysctl_max_syn_backlog; 121 - 122 - /** struct listen_sock - listen state 123 - * 124 - * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs 125 - */ 126 - struct listen_sock { 127 - int qlen_inc; /* protected by listener lock */ 128 - int young_inc;/* protected by listener lock */ 129 - 130 - /* following fields can be updated by timer */ 131 - atomic_t qlen_dec; /* qlen = qlen_inc - qlen_dec */ 132 - atomic_t young_dec; 133 - 134 - u32 max_qlen_log ____cacheline_aligned_in_smp; 135 - u32 synflood_warned; 136 - u32 hash_rnd; 137 - u32 nr_table_entries; 138 - struct request_sock *syn_table[0]; 139 - }; 140 119 141 120 /* 142 121 * For a TCP Fast Open listener - ··· 152 169 * @rskq_accept_head - FIFO head of established children 153 170 * @rskq_accept_tail - FIFO tail of established children 154 171 * @rskq_defer_accept - User waits for some data after accept() 155 - * @syn_wait_lock - serializer 156 - * 157 - * %syn_wait_lock is necessary only to avoid proc interface having to grab the main 158 - * lock sock while browsing the listening hash (otherwise it's deadlock prone). 159 172 * 160 173 */ 161 174 struct request_sock_queue { 175 + spinlock_t rskq_lock; 176 + u8 rskq_defer_accept; 177 + 178 + u32 synflood_warned; 179 + atomic_t qlen; 180 + atomic_t young; 181 + 162 182 struct request_sock *rskq_accept_head; 163 183 struct request_sock *rskq_accept_tail; 164 - u8 rskq_defer_accept; 165 - struct listen_sock *listen_opt; 166 184 struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine 167 185 * if TFO is enabled. 168 186 */ 169 - 170 - /* temporary alignment, our goal is to get rid of this lock */ 171 - spinlock_t syn_wait_lock ____cacheline_aligned_in_smp; 172 187 }; 173 188 174 - int reqsk_queue_alloc(struct request_sock_queue *queue, 175 - unsigned int nr_table_entries); 189 + void reqsk_queue_alloc(struct request_sock_queue *queue); 176 190 177 - void __reqsk_queue_destroy(struct request_sock_queue *queue); 178 - void reqsk_queue_destroy(struct request_sock_queue *queue); 179 191 void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, 180 192 bool reset); 181 193 182 - static inline struct request_sock * 183 - reqsk_queue_yank_acceptq(struct request_sock_queue *queue) 184 - { 185 - struct request_sock *req = queue->rskq_accept_head; 186 - 187 - queue->rskq_accept_head = NULL; 188 - return req; 189 - } 190 - 191 - static inline int reqsk_queue_empty(struct request_sock_queue *queue) 194 + static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) 192 195 { 193 196 return queue->rskq_accept_head == NULL; 194 197 } ··· 184 215 struct sock *parent, 185 216 struct sock *child) 186 217 { 218 + spin_lock(&queue->rskq_lock); 187 219 req->sk = child; 188 220 sk_acceptq_added(parent); 189 221 ··· 195 225 196 226 queue->rskq_accept_tail = req; 197 227 req->dl_next = NULL; 228 + spin_unlock(&queue->rskq_lock); 198 229 } 199 230 200 - static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) 231 + static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, 232 + struct sock *parent) 201 233 { 202 - struct request_sock *req = queue->rskq_accept_head; 234 + struct request_sock *req; 203 235 204 - WARN_ON(req == NULL); 205 - 206 - queue->rskq_accept_head = req->dl_next; 207 - if (queue->rskq_accept_head == NULL) 208 - queue->rskq_accept_tail = NULL; 209 - 236 + spin_lock_bh(&queue->rskq_lock); 237 + req = queue->rskq_accept_head; 238 + if (req) { 239 + sk_acceptq_removed(parent); 240 + queue->rskq_accept_head = req->dl_next; 241 + if (queue->rskq_accept_head == NULL) 242 + queue->rskq_accept_tail = NULL; 243 + } 244 + spin_unlock_bh(&queue->rskq_lock); 210 245 return req; 211 246 } 212 247 213 248 static inline void reqsk_queue_removed(struct request_sock_queue *queue, 214 249 const struct request_sock *req) 215 250 { 216 - struct listen_sock *lopt = queue->listen_opt; 217 - 218 251 if (req->num_timeout == 0) 219 - atomic_inc(&lopt->young_dec); 220 - atomic_inc(&lopt->qlen_dec); 252 + atomic_dec(&queue->young); 253 + atomic_dec(&queue->qlen); 221 254 } 222 255 223 256 static inline void reqsk_queue_added(struct request_sock_queue *queue) 224 257 { 225 - struct listen_sock *lopt = queue->listen_opt; 226 - 227 - lopt->young_inc++; 228 - lopt->qlen_inc++; 229 - } 230 - 231 - static inline int listen_sock_qlen(const struct listen_sock *lopt) 232 - { 233 - return lopt->qlen_inc - atomic_read(&lopt->qlen_dec); 234 - } 235 - 236 - static inline int listen_sock_young(const struct listen_sock *lopt) 237 - { 238 - return lopt->young_inc - atomic_read(&lopt->young_dec); 258 + atomic_inc(&queue->young); 259 + atomic_inc(&queue->qlen); 239 260 } 240 261 241 262 static inline int reqsk_queue_len(const struct request_sock_queue *queue) 242 263 { 243 - const struct listen_sock *lopt = queue->listen_opt; 244 - 245 - return lopt ? listen_sock_qlen(lopt) : 0; 264 + return atomic_read(&queue->qlen); 246 265 } 247 266 248 267 static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) 249 268 { 250 - return listen_sock_young(queue->listen_opt); 269 + return atomic_read(&queue->young); 251 270 } 252 - 253 - static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) 254 - { 255 - return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log; 256 - } 257 - 258 - void reqsk_queue_hash_req(struct request_sock_queue *queue, 259 - u32 hash, struct request_sock *req, 260 - unsigned long timeout); 261 271 262 272 #endif /* _REQUEST_SOCK_H */
+4 -6
include/net/tcp.h
··· 462 462 int tcp_connect(struct sock *sk); 463 463 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, 464 464 struct request_sock *req, 465 - struct tcp_fastopen_cookie *foc); 465 + struct tcp_fastopen_cookie *foc, 466 + bool attach_req); 466 467 int tcp_disconnect(struct sock *sk, int flags); 467 468 468 469 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); ··· 1619 1618 /* /proc */ 1620 1619 enum tcp_seq_states { 1621 1620 TCP_SEQ_STATE_LISTENING, 1622 - TCP_SEQ_STATE_OPENREQ, 1623 1621 TCP_SEQ_STATE_ESTABLISHED, 1624 1622 }; 1625 1623 ··· 1637 1637 enum tcp_seq_states state; 1638 1638 struct sock *syn_wait_sk; 1639 1639 int bucket, offset, sbucket, num; 1640 - kuid_t uid; 1641 1640 loff_t last_pos; 1642 1641 }; 1643 1642 ··· 1716 1717 __u32 (*init_seq)(const struct sk_buff *skb); 1717 1718 int (*send_synack)(const struct sock *sk, struct dst_entry *dst, 1718 1719 struct flowi *fl, struct request_sock *req, 1719 - u16 queue_mapping, struct tcp_fastopen_cookie *foc); 1720 - void (*queue_hash_add)(struct sock *sk, struct request_sock *req, 1721 - const unsigned long timeout); 1720 + u16 queue_mapping, struct tcp_fastopen_cookie *foc, 1721 + bool attach_req); 1722 1722 }; 1723 1723 1724 1724 #ifdef CONFIG_SYN_COOKIES
+2 -82
net/core/request_sock.c
··· 37 37 int sysctl_max_syn_backlog = 256; 38 38 EXPORT_SYMBOL(sysctl_max_syn_backlog); 39 39 40 - int reqsk_queue_alloc(struct request_sock_queue *queue, 41 - unsigned int nr_table_entries) 40 + void reqsk_queue_alloc(struct request_sock_queue *queue) 42 41 { 43 - size_t lopt_size = sizeof(struct listen_sock); 44 - struct listen_sock *lopt = NULL; 45 - 46 - nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); 47 - nr_table_entries = max_t(u32, nr_table_entries, 8); 48 - nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); 49 - lopt_size += nr_table_entries * sizeof(struct request_sock *); 50 - 51 - if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) 52 - lopt = kzalloc(lopt_size, GFP_KERNEL | 53 - __GFP_NOWARN | 54 - __GFP_NORETRY); 55 - if (!lopt) 56 - lopt = vzalloc(lopt_size); 57 - if (!lopt) 58 - return -ENOMEM; 59 - 60 - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); 61 - spin_lock_init(&queue->syn_wait_lock); 42 + spin_lock_init(&queue->rskq_lock); 62 43 63 44 spin_lock_init(&queue->fastopenq.lock); 64 45 queue->fastopenq.rskq_rst_head = NULL; ··· 48 67 queue->fastopenq.max_qlen = 0; 49 68 50 69 queue->rskq_accept_head = NULL; 51 - lopt->nr_table_entries = nr_table_entries; 52 - lopt->max_qlen_log = ilog2(nr_table_entries); 53 - 54 - spin_lock_bh(&queue->syn_wait_lock); 55 - queue->listen_opt = lopt; 56 - spin_unlock_bh(&queue->syn_wait_lock); 57 - 58 - return 0; 59 - } 60 - 61 - void __reqsk_queue_destroy(struct request_sock_queue *queue) 62 - { 63 - /* This is an error recovery path only, no locking needed */ 64 - kvfree(queue->listen_opt); 65 - } 66 - 67 - static inline struct listen_sock *reqsk_queue_yank_listen_sk( 68 - struct request_sock_queue *queue) 69 - { 70 - struct listen_sock *lopt; 71 - 72 - spin_lock_bh(&queue->syn_wait_lock); 73 - lopt = queue->listen_opt; 74 - queue->listen_opt = NULL; 75 - spin_unlock_bh(&queue->syn_wait_lock); 76 - 77 - return lopt; 78 - } 79 - 80 - void reqsk_queue_destroy(struct request_sock_queue *queue) 81 - { 82 - /* make all the listen_opt local to us */ 83 - struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); 84 - 85 - if (listen_sock_qlen(lopt) != 0) { 86 - unsigned int i; 87 - 88 - for (i = 0; i < lopt->nr_table_entries; i++) { 89 - struct request_sock *req; 90 - 91 - spin_lock_bh(&queue->syn_wait_lock); 92 - while ((req = lopt->syn_table[i]) != NULL) { 93 - lopt->syn_table[i] = req->dl_next; 94 - /* Because of following del_timer_sync(), 95 - * we must release the spinlock here 96 - * or risk a dead lock. 97 - */ 98 - spin_unlock_bh(&queue->syn_wait_lock); 99 - atomic_inc(&lopt->qlen_dec); 100 - if (del_timer_sync(&req->rsk_timer)) 101 - reqsk_put(req); 102 - reqsk_put(req); 103 - spin_lock_bh(&queue->syn_wait_lock); 104 - } 105 - spin_unlock_bh(&queue->syn_wait_lock); 106 - } 107 - } 108 - 109 - if (WARN_ON(listen_sock_qlen(lopt) != 0)) 110 - pr_err("qlen %u\n", listen_sock_qlen(lopt)); 111 - kvfree(lopt); 112 70 } 113 71 114 72 /*
+21 -43
net/dccp/ipv4.c
··· 444 444 } 445 445 EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); 446 446 447 - static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 448 - { 449 - const struct dccp_hdr *dh = dccp_hdr(skb); 450 - const struct iphdr *iph = ip_hdr(skb); 451 - struct sock *nsk; 452 - /* Find possible connection requests. */ 453 - struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, 454 - iph->saddr, iph->daddr); 455 - if (req) { 456 - nsk = dccp_check_req(sk, skb, req); 457 - if (!nsk) 458 - reqsk_put(req); 459 - return nsk; 460 - } 461 - nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, 462 - iph->saddr, dh->dccph_sport, 463 - iph->daddr, dh->dccph_dport, 464 - inet_iif(skb)); 465 - if (nsk != NULL) { 466 - if (nsk->sk_state != DCCP_TIME_WAIT) { 467 - bh_lock_sock(nsk); 468 - return nsk; 469 - } 470 - inet_twsk_put(inet_twsk(nsk)); 471 - return NULL; 472 - } 473 - 474 - return sk; 475 - } 476 - 477 447 static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, 478 448 struct sk_buff *skb) 479 449 { ··· 675 705 * NOTE: the check for the packet types is done in 676 706 * dccp_rcv_state_process 677 707 */ 678 - if (sk->sk_state == DCCP_LISTEN) { 679 - struct sock *nsk = dccp_v4_hnd_req(sk, skb); 680 - 681 - if (nsk == NULL) 682 - goto discard; 683 - 684 - if (nsk != sk) { 685 - if (dccp_child_process(sk, nsk, skb)) 686 - goto reset; 687 - return 0; 688 - } 689 - } 690 708 691 709 if (dccp_rcv_state_process(sk, skb, dh, skb->len)) 692 710 goto reset; ··· 682 724 683 725 reset: 684 726 dccp_v4_ctl_send_reset(sk, skb); 685 - discard: 686 727 kfree_skb(skb); 687 728 return 0; 688 729 } ··· 825 868 goto no_dccp_socket; 826 869 } 827 870 871 + if (sk->sk_state == DCCP_NEW_SYN_RECV) { 872 + struct request_sock *req = inet_reqsk(sk); 873 + struct sock *nsk = NULL; 874 + 875 + sk = req->rsk_listener; 876 + if (sk->sk_state == DCCP_LISTEN) 877 + nsk = dccp_check_req(sk, skb, req); 878 + if (!nsk) { 879 + reqsk_put(req); 880 + goto discard_it; 881 + } 882 + if (nsk == sk) { 883 + sock_hold(sk); 884 + reqsk_put(req); 885 + } else if (dccp_child_process(sk, nsk, skb)) { 886 + dccp_v4_ctl_send_reset(sk, skb); 887 + goto discard_it; 888 + } else { 889 + return 0; 890 + } 891 + } 828 892 /* 829 893 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage 830 894 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
+22 -50
net/dccp/ipv6.c
··· 290 290 .syn_ack_timeout = dccp_syn_ack_timeout, 291 291 }; 292 292 293 - static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 294 - { 295 - const struct dccp_hdr *dh = dccp_hdr(skb); 296 - const struct ipv6hdr *iph = ipv6_hdr(skb); 297 - struct request_sock *req; 298 - struct sock *nsk; 299 - 300 - req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, 301 - &iph->daddr, inet6_iif(skb)); 302 - if (req) { 303 - nsk = dccp_check_req(sk, skb, req); 304 - if (!nsk) 305 - reqsk_put(req); 306 - return nsk; 307 - } 308 - nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, 309 - &iph->saddr, dh->dccph_sport, 310 - &iph->daddr, ntohs(dh->dccph_dport), 311 - inet6_iif(skb)); 312 - if (nsk != NULL) { 313 - if (nsk->sk_state != DCCP_TIME_WAIT) { 314 - bh_lock_sock(nsk); 315 - return nsk; 316 - } 317 - inet_twsk_put(inet_twsk(nsk)); 318 - return NULL; 319 - } 320 - 321 - return sk; 322 - } 323 - 324 293 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 325 294 { 326 295 struct request_sock *req; ··· 367 398 if (dccp_v6_send_response(sk, req)) 368 399 goto drop_and_free; 369 400 370 - inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 401 + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 371 402 return 0; 372 403 373 404 drop_and_free: ··· 610 641 * NOTE: the check for the packet types is done in 611 642 * dccp_rcv_state_process 612 643 */ 613 - if (sk->sk_state == DCCP_LISTEN) { 614 - struct sock *nsk = dccp_v6_hnd_req(sk, skb); 615 - 616 - if (nsk == NULL) 617 - goto discard; 618 - /* 619 - * Queue it on the new socket if the new socket is active, 620 - * otherwise we just shortcircuit this and continue with 621 - * the new socket.. 622 - */ 623 - if (nsk != sk) { 624 - if (dccp_child_process(sk, nsk, skb)) 625 - goto reset; 626 - if (opt_skb != NULL) 627 - __kfree_skb(opt_skb); 628 - return 0; 629 - } 630 - } 631 644 632 645 if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) 633 646 goto reset; ··· 683 732 goto no_dccp_socket; 684 733 } 685 734 735 + if (sk->sk_state == DCCP_NEW_SYN_RECV) { 736 + struct request_sock *req = inet_reqsk(sk); 737 + struct sock *nsk = NULL; 738 + 739 + sk = req->rsk_listener; 740 + if (sk->sk_state == DCCP_LISTEN) 741 + nsk = dccp_check_req(sk, skb, req); 742 + if (!nsk) { 743 + reqsk_put(req); 744 + goto discard_it; 745 + } 746 + if (nsk == sk) { 747 + sock_hold(sk); 748 + reqsk_put(req); 749 + } else if (dccp_child_process(sk, nsk, skb)) { 750 + dccp_v6_ctl_send_reset(sk, skb); 751 + goto discard_it; 752 + } else { 753 + return 0; 754 + } 755 + } 686 756 /* 687 757 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage 688 758 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
+37 -110
net/ipv4/inet_connection_sock.c
··· 330 330 if (error) 331 331 goto out_err; 332 332 } 333 - req = reqsk_queue_remove(queue); 333 + req = reqsk_queue_remove(queue, sk); 334 334 newsk = req->sk; 335 335 336 - sk_acceptq_removed(sk); 337 336 if (sk->sk_protocol == IPPROTO_TCP && 338 337 tcp_rsk(req)->tfo_listener) { 339 338 spin_lock_bh(&queue->fastopenq.lock); ··· 476 477 } 477 478 EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 478 479 479 - static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 480 - const u32 rnd, const u32 synq_hsize) 481 - { 482 - return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); 483 - } 484 - 485 480 #if IS_ENABLED(CONFIG_IPV6) 486 481 #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 487 482 #else 488 483 #define AF_INET_FAMILY(fam) true 489 484 #endif 490 - 491 - /* Note: this is temporary : 492 - * req sock will no longer be in listener hash table 493 - */ 494 - struct request_sock *inet_csk_search_req(struct sock *sk, 495 - const __be16 rport, 496 - const __be32 raddr, 497 - const __be32 laddr) 498 - { 499 - struct inet_connection_sock *icsk = inet_csk(sk); 500 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 501 - struct request_sock *req; 502 - u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd, 503 - lopt->nr_table_entries); 504 - 505 - spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); 506 - for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { 507 - const struct inet_request_sock *ireq = inet_rsk(req); 508 - 509 - if (ireq->ir_rmt_port == rport && 510 - ireq->ir_rmt_addr == raddr && 511 - ireq->ir_loc_addr == laddr && 512 - AF_INET_FAMILY(req->rsk_ops->family)) { 513 - atomic_inc(&req->rsk_refcnt); 514 - WARN_ON(req->sk); 515 - break; 516 - } 517 - } 518 - spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 519 - 520 - return req; 521 - } 522 - EXPORT_SYMBOL_GPL(inet_csk_search_req); 523 - 524 - void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 525 - unsigned long timeout) 526 - { 527 - struct inet_connection_sock *icsk = inet_csk(sk); 528 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 529 - const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr, 530 - inet_rsk(req)->ir_rmt_port, 531 - lopt->hash_rnd, lopt->nr_table_entries); 532 - 533 - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 534 - inet_csk_reqsk_queue_added(sk, timeout); 535 - } 536 - EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 537 485 538 486 /* Only thing we need from tcp.h */ 539 487 extern int sysctl_tcp_synack_retries; ··· 518 572 } 519 573 EXPORT_SYMBOL(inet_rtx_syn_ack); 520 574 521 - /* return true if req was found in the syn_table[] */ 575 + /* return true if req was found in the ehash table */ 522 576 static bool reqsk_queue_unlink(struct request_sock_queue *queue, 523 577 struct request_sock *req) 524 578 { 525 - struct listen_sock *lopt = queue->listen_opt; 526 - struct request_sock **prev; 527 - bool found = false; 579 + struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; 580 + spinlock_t *lock; 581 + bool found; 528 582 529 - spin_lock(&queue->syn_wait_lock); 583 + lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 530 584 531 - for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; 532 - prev = &(*prev)->dl_next) { 533 - if (*prev == req) { 534 - *prev = req->dl_next; 535 - found = true; 536 - break; 537 - } 538 - } 585 + spin_lock(lock); 586 + found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); 587 + spin_unlock(lock); 539 588 540 - spin_unlock(&queue->syn_wait_lock); 541 589 if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 542 590 reqsk_put(req); 543 591 return found; ··· 552 612 struct sock *sk_listener = req->rsk_listener; 553 613 struct inet_connection_sock *icsk = inet_csk(sk_listener); 554 614 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 555 - struct listen_sock *lopt = queue->listen_opt; 556 615 int qlen, expire = 0, resend = 0; 557 616 int max_retries, thresh; 558 617 u8 defer_accept; 559 618 560 - if (sk_listener->sk_state != TCP_LISTEN || !lopt) { 561 - reqsk_put(req); 562 - return; 563 - } 619 + if (sk_listener->sk_state != TCP_LISTEN) 620 + goto drop; 564 621 565 622 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 566 623 thresh = max_retries; ··· 578 641 * embrions; and abort old ones without pity, if old 579 642 * ones are about to clog our table. 580 643 */ 581 - qlen = listen_sock_qlen(lopt); 582 - if (qlen >> (lopt->max_qlen_log - 1)) { 583 - int young = listen_sock_young(lopt) << 1; 644 + qlen = reqsk_queue_len(queue); 645 + if ((qlen << 1) > sk_listener->sk_max_ack_backlog) { 646 + int young = reqsk_queue_len_young(queue) << 1; 584 647 585 648 while (thresh > 2) { 586 649 if (qlen < young) ··· 602 665 unsigned long timeo; 603 666 604 667 if (req->num_timeout++ == 0) 605 - atomic_inc(&lopt->young_dec); 668 + atomic_dec(&queue->young); 606 669 timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 607 670 mod_timer_pinned(&req->rsk_timer, jiffies + timeo); 608 671 return; 609 672 } 673 + drop: 610 674 inet_csk_reqsk_queue_drop(sk_listener, req); 611 675 reqsk_put(req); 612 676 } 613 677 614 - void reqsk_queue_hash_req(struct request_sock_queue *queue, 615 - u32 hash, struct request_sock *req, 616 - unsigned long timeout) 678 + static void reqsk_queue_hash_req(struct request_sock *req, 679 + unsigned long timeout) 617 680 { 618 - struct listen_sock *lopt = queue->listen_opt; 619 - 620 681 req->num_retrans = 0; 621 682 req->num_timeout = 0; 622 683 req->sk = NULL; 623 684 624 685 setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); 625 686 mod_timer_pinned(&req->rsk_timer, jiffies + timeout); 626 - req->rsk_hash = hash; 627 687 688 + inet_ehash_insert(req_to_sk(req), NULL); 628 689 /* before letting lookups find us, make sure all req fields 629 690 * are committed to memory and refcnt initialized. 630 691 */ 631 692 smp_wmb(); 632 - atomic_set(&req->rsk_refcnt, 2); 633 - 634 - spin_lock(&queue->syn_wait_lock); 635 - req->dl_next = lopt->syn_table[hash]; 636 - lopt->syn_table[hash] = req; 637 - spin_unlock(&queue->syn_wait_lock); 693 + atomic_set(&req->rsk_refcnt, 2 + 1); 638 694 } 639 - EXPORT_SYMBOL(reqsk_queue_hash_req); 695 + 696 + void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 697 + unsigned long timeout) 698 + { 699 + reqsk_queue_hash_req(req, timeout); 700 + inet_csk_reqsk_queue_added(sk); 701 + } 702 + EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 640 703 641 704 /** 642 705 * inet_csk_clone_lock - clone an inet socket, and lock its clone ··· 729 792 730 793 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 731 794 { 732 - struct inet_sock *inet = inet_sk(sk); 733 795 struct inet_connection_sock *icsk = inet_csk(sk); 734 - int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 796 + struct inet_sock *inet = inet_sk(sk); 735 797 736 - if (rc != 0) 737 - return rc; 798 + reqsk_queue_alloc(&icsk->icsk_accept_queue); 738 799 739 800 sk->sk_max_ack_backlog = 0; 740 801 sk->sk_ack_backlog = 0; ··· 754 819 } 755 820 756 821 sk->sk_state = TCP_CLOSE; 757 - __reqsk_queue_destroy(&icsk->icsk_accept_queue); 758 822 return -EADDRINUSE; 759 823 } 760 824 EXPORT_SYMBOL_GPL(inet_csk_listen_start); ··· 766 832 { 767 833 struct inet_connection_sock *icsk = inet_csk(sk); 768 834 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 769 - struct request_sock *acc_req; 770 - struct request_sock *req; 771 - 772 - /* make all the listen_opt local to us */ 773 - acc_req = reqsk_queue_yank_acceptq(queue); 835 + struct request_sock *next, *req; 774 836 775 837 /* Following specs, it would be better either to send FIN 776 838 * (and enter FIN-WAIT-1, it is normal close) ··· 776 846 * To be honest, we are not able to make either 777 847 * of the variants now. --ANK 778 848 */ 779 - reqsk_queue_destroy(queue); 780 - 781 - while ((req = acc_req) != NULL) { 849 + while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 782 850 struct sock *child = req->sk; 783 - 784 - acc_req = req->dl_next; 785 851 786 852 local_bh_disable(); 787 853 bh_lock_sock(child); ··· 808 882 local_bh_enable(); 809 883 sock_put(child); 810 884 811 - sk_acceptq_removed(sk); 812 885 reqsk_put(req); 886 + cond_resched(); 813 887 } 814 888 if (queue->fastopenq.rskq_rst_head) { 815 889 /* Free all the reqs queued in rskq_rst_head. */ 816 890 spin_lock_bh(&queue->fastopenq.lock); 817 - acc_req = queue->fastopenq.rskq_rst_head; 891 + req = queue->fastopenq.rskq_rst_head; 818 892 queue->fastopenq.rskq_rst_head = NULL; 819 893 spin_unlock_bh(&queue->fastopenq.lock); 820 - while ((req = acc_req) != NULL) { 821 - acc_req = req->dl_next; 894 + while (req != NULL) { 895 + next = req->dl_next; 822 896 reqsk_put(req); 897 + req = next; 823 898 } 824 899 } 825 900 WARN_ON(sk->sk_ack_backlog);
+8 -88
net/ipv4/inet_diag.c
··· 730 730 #endif 731 731 } 732 732 733 - static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, 734 - struct netlink_callback *cb, 735 - const struct inet_diag_req_v2 *r, 736 - const struct nlattr *bc) 737 - { 738 - struct inet_connection_sock *icsk = inet_csk(sk); 739 - struct inet_sock *inet = inet_sk(sk); 740 - struct inet_diag_entry entry; 741 - int j, s_j, reqnum, s_reqnum; 742 - struct listen_sock *lopt; 743 - int err = 0; 744 - 745 - s_j = cb->args[3]; 746 - s_reqnum = cb->args[4]; 747 - 748 - if (s_j > 0) 749 - s_j--; 750 - 751 - entry.family = sk->sk_family; 752 - 753 - spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); 754 - 755 - lopt = icsk->icsk_accept_queue.listen_opt; 756 - if (!lopt || !listen_sock_qlen(lopt)) 757 - goto out; 758 - 759 - if (bc) { 760 - entry.sport = inet->inet_num; 761 - entry.userlocks = sk->sk_userlocks; 762 - } 763 - 764 - for (j = s_j; j < lopt->nr_table_entries; j++) { 765 - struct request_sock *req, *head = lopt->syn_table[j]; 766 - 767 - reqnum = 0; 768 - for (req = head; req; reqnum++, req = req->dl_next) { 769 - struct inet_request_sock *ireq = inet_rsk(req); 770 - 771 - if (reqnum < s_reqnum) 772 - continue; 773 - if (r->id.idiag_dport != ireq->ir_rmt_port && 774 - r->id.idiag_dport) 775 - continue; 776 - 777 - if (bc) { 778 - /* Note: entry.sport and entry.userlocks are already set */ 779 - entry_fill_addrs(&entry, req_to_sk(req)); 780 - entry.dport = ntohs(ireq->ir_rmt_port); 781 - 782 - if (!inet_diag_bc_run(bc, &entry)) 783 - continue; 784 - } 785 - 786 - err = inet_req_diag_fill(req_to_sk(req), skb, 787 - NETLINK_CB(cb->skb).portid, 788 - cb->nlh->nlmsg_seq, 789 - NLM_F_MULTI, cb->nlh); 790 - if (err < 0) { 791 - cb->args[3] = j + 1; 792 - cb->args[4] = reqnum; 793 - goto out; 794 - } 795 - } 796 - 797 - s_reqnum = 0; 798 - } 799 - 800 - out: 801 - spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 802 - 803 - return err; 804 - } 805 - 806 733 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, 807 734 struct netlink_callback *cb, 808 735 const struct inet_diag_req_v2 *r, struct nlattr *bc) 809 736 { 810 737 struct net *net = sock_net(skb->sk); 811 738 int i, num, s_i, s_num; 739 + u32 idiag_states = r->idiag_states; 812 740 741 + if (idiag_states & TCPF_SYN_RECV) 742 + idiag_states |= TCPF_NEW_SYN_RECV; 813 743 s_i = cb->args[1]; 814 744 s_num = num = cb->args[2]; 815 745 816 746 if (cb->args[0] == 0) { 817 - if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) 747 + if (!(idiag_states & TCPF_LISTEN)) 818 748 goto skip_listen_ht; 819 749 820 750 for (i = s_i; i < INET_LHTABLE_SIZE; i++) { ··· 774 844 r->id.idiag_sport) 775 845 goto next_listen; 776 846 777 - if (!(r->idiag_states & TCPF_LISTEN) || 778 - r->id.idiag_dport || 847 + if (r->id.idiag_dport || 779 848 cb->args[3] > 0) 780 - goto syn_recv; 781 - 782 - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { 783 - spin_unlock_bh(&ilb->lock); 784 - goto done; 785 - } 786 - 787 - syn_recv: 788 - if (!(r->idiag_states & TCPF_SYN_RECV)) 789 849 goto next_listen; 790 850 791 - if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { 851 + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { 792 852 spin_unlock_bh(&ilb->lock); 793 853 goto done; 794 854 } ··· 799 879 s_i = num = s_num = 0; 800 880 } 801 881 802 - if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) 882 + if (!(idiag_states & ~TCPF_LISTEN)) 803 883 goto out; 804 884 805 885 for (i = s_i; i <= hashinfo->ehash_mask; i++) { ··· 826 906 goto next_normal; 827 907 state = (sk->sk_state == TCP_TIME_WAIT) ? 828 908 inet_twsk(sk)->tw_substate : sk->sk_state; 829 - if (!(r->idiag_states & (1 << state))) 909 + if (!(idiag_states & (1 << state))) 830 910 goto next_normal; 831 911 if (r->sdiag_family != AF_UNSPEC && 832 912 sk->sk_family != r->sdiag_family)
+12 -2
net/ipv4/inet_hashtables.c
··· 398 398 inet->inet_dport); 399 399 } 400 400 401 - void __inet_hash_nolisten(struct sock *sk, struct sock *osk) 401 + /* insert a socket into ehash, and eventually remove another one 402 + * (The another one can be a SYN_RECV or TIMEWAIT 403 + */ 404 + int inet_ehash_insert(struct sock *sk, struct sock *osk) 402 405 { 403 406 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 404 407 struct hlist_nulls_head *list; 405 408 struct inet_ehash_bucket *head; 406 409 spinlock_t *lock; 410 + int ret = 0; 407 411 408 - WARN_ON(!sk_unhashed(sk)); 412 + WARN_ON_ONCE(!sk_unhashed(sk)); 409 413 410 414 sk->sk_hash = sk_ehashfn(sk); 411 415 head = inet_ehash_bucket(hashinfo, sk->sk_hash); ··· 423 419 sk_nulls_del_node_init_rcu(osk); 424 420 } 425 421 spin_unlock(lock); 422 + return ret; 423 + } 424 + 425 + void __inet_hash_nolisten(struct sock *sk, struct sock *osk) 426 + { 427 + inet_ehash_insert(sk, osk); 426 428 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 427 429 } 428 430 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
+4
net/ipv4/syncookies.c
··· 284 284 } 285 285 EXPORT_SYMBOL(cookie_ecn_ok); 286 286 287 + /* On input, sk is a listener. 288 + * Output is listener if incoming packet would not create a child 289 + * NULL if memory could not be allocated. 290 + */ 287 291 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) 288 292 { 289 293 struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
+2 -2
net/ipv4/tcp_fastopen.c
··· 161 161 tp->snd_wnd = ntohs(tcp_hdr(skb)->window); 162 162 163 163 /* Activate the retrans timer so that SYNACK can be retransmitted. 164 - * The request socket is not added to the SYN table of the parent 164 + * The request socket is not added to the ehash 165 165 * because it's been added to the accept queue directly. 166 166 */ 167 167 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, 168 168 TCP_TIMEOUT_INIT, TCP_RTO_MAX); 169 169 170 - atomic_set(&req->rsk_refcnt, 1); 170 + atomic_set(&req->rsk_refcnt, 2); 171 171 /* Add the child socket directly into the accept queue */ 172 172 inet_csk_reqsk_queue_add(sk, req, child); 173 173
+15 -15
net/ipv4/tcp_input.c
··· 6068 6068 const struct sk_buff *skb, 6069 6069 const char *proto) 6070 6070 { 6071 + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 6071 6072 const char *msg = "Dropping request"; 6072 6073 bool want_cookie = false; 6073 - struct listen_sock *lopt; 6074 6074 6075 6075 #ifdef CONFIG_SYN_COOKIES 6076 6076 if (sysctl_tcp_syncookies) { ··· 6081 6081 #endif 6082 6082 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 6083 6083 6084 - lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 6085 - if (!lopt->synflood_warned && 6084 + if (!queue->synflood_warned && 6086 6085 sysctl_tcp_syncookies != 2 && 6087 - xchg(&lopt->synflood_warned, 1) == 0) 6086 + xchg(&queue->synflood_warned, 1) == 0) 6088 6087 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 6089 6088 proto, ntohs(tcp_hdr(skb)->dest), msg); 6090 6089 ··· 6120 6121 struct request_sock *req; 6121 6122 bool want_cookie = false; 6122 6123 struct flowi fl; 6123 - int err; 6124 - 6125 6124 6126 6125 /* TW buckets are converted to open requests without 6127 6126 * limitations, they conserve resources and peer is ··· 6228 6231 tcp_rsk(req)->snt_isn = isn; 6229 6232 tcp_rsk(req)->txhash = net_tx_rndhash(); 6230 6233 tcp_openreq_init_rwin(req, sk, dst); 6231 - if (!want_cookie) 6234 + if (!want_cookie) { 6232 6235 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); 6233 - err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req, 6234 - skb_get_queue_mapping(skb), &foc); 6236 + tcp_reqsk_record_syn(sk, req, skb); 6237 + } 6235 6238 if (fastopen_sk) { 6239 + af_ops->send_synack(fastopen_sk, dst, &fl, req, 6240 + skb_get_queue_mapping(skb), &foc, false); 6236 6241 sock_put(fastopen_sk); 6237 6242 } else { 6238 - if (err || want_cookie) 6239 - goto drop_and_free; 6240 - 6241 6243 tcp_rsk(req)->tfo_listener = false; 6242 - af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6244 + if (!want_cookie) 6245 + inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6246 + af_ops->send_synack(sk, dst, &fl, req, 6247 + skb_get_queue_mapping(skb), &foc, !want_cookie); 6248 + if (want_cookie) 6249 + goto drop_and_free; 6243 6250 } 6244 - tcp_reqsk_record_syn(sk, req, skb); 6245 - 6251 + reqsk_put(req); 6246 6252 return 0; 6247 6253 6248 6254 drop_and_release:
+57 -104
net/ipv4/tcp_ipv4.c
··· 822 822 struct flowi *fl, 823 823 struct request_sock *req, 824 824 u16 queue_mapping, 825 - struct tcp_fastopen_cookie *foc) 825 + struct tcp_fastopen_cookie *foc, 826 + bool attach_req) 826 827 { 827 828 const struct inet_request_sock *ireq = inet_rsk(req); 828 829 struct flowi4 fl4; ··· 834 833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 835 834 return -1; 836 835 837 - skb = tcp_make_synack(sk, dst, req, foc); 836 + skb = tcp_make_synack(sk, dst, req, foc, attach_req); 838 837 839 838 if (skb) { 840 839 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); ··· 1113 1112 } 1114 1113 EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1115 1114 1115 + #endif 1116 + 1116 1117 /* Called with rcu_read_lock() */ 1117 - static bool tcp_v4_inbound_md5_hash(struct sock *sk, 1118 + static bool tcp_v4_inbound_md5_hash(const struct sock *sk, 1118 1119 const struct sk_buff *skb) 1119 1120 { 1121 + #ifdef CONFIG_TCP_MD5SIG 1120 1122 /* 1121 1123 * This gets called for each TCP segment that arrives 1122 1124 * so we want to be efficient. ··· 1169 1165 return true; 1170 1166 } 1171 1167 return false; 1172 - } 1173 1168 #endif 1169 + return false; 1170 + } 1174 1171 1175 1172 static void tcp_v4_init_req(struct request_sock *req, 1176 1173 const struct sock *sk_listener, ··· 1225 1220 .route_req = tcp_v4_route_req, 1226 1221 .init_seq = tcp_v4_init_sequence, 1227 1222 .send_synack = tcp_v4_send_synack, 1228 - .queue_hash_add = inet_csk_reqsk_queue_hash_add, 1229 1223 }; 1230 1224 1231 1225 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ··· 1343 1339 } 1344 1340 EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1345 1341 1346 - static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1342 + static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) 1347 1343 { 1348 - const struct tcphdr *th = tcp_hdr(skb); 1349 - const struct iphdr *iph = ip_hdr(skb); 1350 - struct request_sock *req; 1351 - struct sock *nsk; 1352 - 1353 - req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); 1354 - if (req) { 1355 - nsk = tcp_check_req(sk, skb, req, false); 1356 - if (!nsk || nsk == sk) 1357 - reqsk_put(req); 1358 - return nsk; 1359 - } 1360 - 1361 - nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1362 - th->source, iph->daddr, th->dest, inet_iif(skb)); 1363 - 1364 - if (nsk) { 1365 - if (nsk->sk_state != TCP_TIME_WAIT) { 1366 - bh_lock_sock(nsk); 1367 - return nsk; 1368 - } 1369 - inet_twsk_put(inet_twsk(nsk)); 1370 - return NULL; 1371 - } 1372 - 1373 1344 #ifdef CONFIG_SYN_COOKIES 1345 + const struct tcphdr *th = tcp_hdr(skb); 1346 + 1374 1347 if (!th->syn) 1375 1348 sk = cookie_v4_check(sk, skb); 1376 1349 #endif ··· 1355 1374 } 1356 1375 1357 1376 /* The socket must have it's spinlock held when we get 1358 - * here. 1377 + * here, unless it is a TCP_LISTEN socket. 1359 1378 * 1360 1379 * We have a potential double-lock case here, so even when 1361 1380 * doing backlog processing we use the BH locking scheme. ··· 1386 1405 goto csum_err; 1387 1406 1388 1407 if (sk->sk_state == TCP_LISTEN) { 1389 - struct sock *nsk = tcp_v4_hnd_req(sk, skb); 1408 + struct sock *nsk = tcp_v4_cookie_check(sk, skb); 1409 + 1390 1410 if (!nsk) 1391 1411 goto discard; 1392 - 1393 1412 if (nsk != sk) { 1394 1413 sock_rps_save_rxhash(nsk, skb); 1395 - sk_mark_napi_id(sk, skb); 1414 + sk_mark_napi_id(nsk, skb); 1396 1415 if (tcp_child_process(sk, nsk, skb)) { 1397 1416 rsk = nsk; 1398 1417 goto reset; ··· 1580 1599 if (sk->sk_state == TCP_TIME_WAIT) 1581 1600 goto do_time_wait; 1582 1601 1602 + if (sk->sk_state == TCP_NEW_SYN_RECV) { 1603 + struct request_sock *req = inet_reqsk(sk); 1604 + struct sock *nsk = NULL; 1605 + 1606 + sk = req->rsk_listener; 1607 + if (tcp_v4_inbound_md5_hash(sk, skb)) 1608 + goto discard_and_relse; 1609 + if (sk->sk_state == TCP_LISTEN) 1610 + nsk = tcp_check_req(sk, skb, req, false); 1611 + if (!nsk) { 1612 + reqsk_put(req); 1613 + goto discard_it; 1614 + } 1615 + if (nsk == sk) { 1616 + sock_hold(sk); 1617 + reqsk_put(req); 1618 + } else if (tcp_child_process(sk, nsk, skb)) { 1619 + tcp_v4_send_reset(nsk, skb); 1620 + goto discard_it; 1621 + } else { 1622 + return 0; 1623 + } 1624 + } 1583 1625 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 1584 1626 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1585 1627 goto discard_and_relse; ··· 1611 1607 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1612 1608 goto discard_and_relse; 1613 1609 1614 - #ifdef CONFIG_TCP_MD5SIG 1615 - /* 1616 - * We really want to reject the packet as early as possible 1617 - * if: 1618 - * o We're expecting an MD5'd packet and this is no MD5 tcp option 1619 - * o There is an MD5 option and we're not expecting one 1620 - */ 1621 1610 if (tcp_v4_inbound_md5_hash(sk, skb)) 1622 1611 goto discard_and_relse; 1623 - #endif 1624 1612 1625 1613 nf_reset(skb); 1626 1614 1627 1615 if (sk_filter(sk, skb)) 1628 1616 goto discard_and_relse; 1629 1617 1630 - sk_incoming_cpu_update(sk); 1631 1618 skb->dev = NULL; 1619 + 1620 + if (sk->sk_state == TCP_LISTEN) { 1621 + ret = tcp_v4_do_rcv(sk, skb); 1622 + goto put_and_return; 1623 + } 1624 + 1625 + sk_incoming_cpu_update(sk); 1632 1626 1633 1627 bh_lock_sock_nested(sk); 1634 1628 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ··· 1642 1640 } 1643 1641 bh_unlock_sock(sk); 1644 1642 1643 + put_and_return: 1645 1644 sock_put(sk); 1646 1645 1647 1646 return ret; ··· 1837 1834 ++st->num; 1838 1835 ++st->offset; 1839 1836 1840 - if (st->state == TCP_SEQ_STATE_OPENREQ) { 1841 - struct request_sock *req = cur; 1842 - 1843 - icsk = inet_csk(st->syn_wait_sk); 1844 - req = req->dl_next; 1845 - while (1) { 1846 - while (req) { 1847 - if (req->rsk_ops->family == st->family) { 1848 - cur = req; 1849 - goto out; 1850 - } 1851 - req = req->dl_next; 1852 - } 1853 - if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 1854 - break; 1855 - get_req: 1856 - req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 1857 - } 1858 - sk = sk_nulls_next(st->syn_wait_sk); 1859 - st->state = TCP_SEQ_STATE_LISTENING; 1860 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1861 - } else { 1862 - icsk = inet_csk(sk); 1863 - spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1864 - if (reqsk_queue_len(&icsk->icsk_accept_queue)) 1865 - goto start_req; 1866 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1867 - sk = sk_nulls_next(sk); 1868 - } 1837 + sk = sk_nulls_next(sk); 1869 1838 get_sk: 1870 1839 sk_nulls_for_each_from(sk, node) { 1871 1840 if (!net_eq(sock_net(sk), net)) ··· 1847 1872 goto out; 1848 1873 } 1849 1874 icsk = inet_csk(sk); 1850 - spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1851 - if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 1852 - start_req: 1853 - st->uid = sock_i_uid(sk); 1854 - st->syn_wait_sk = sk; 1855 - st->state = TCP_SEQ_STATE_OPENREQ; 1856 - st->sbucket = 0; 1857 - goto get_req; 1858 - } 1859 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1860 1875 } 1861 1876 spin_unlock_bh(&ilb->lock); 1862 1877 st->offset = 0; ··· 1978 2013 void *rc = NULL; 1979 2014 1980 2015 switch (st->state) { 1981 - case TCP_SEQ_STATE_OPENREQ: 1982 2016 case TCP_SEQ_STATE_LISTENING: 1983 2017 if (st->bucket >= INET_LHTABLE_SIZE) 1984 2018 break; ··· 2036 2072 } 2037 2073 2038 2074 switch (st->state) { 2039 - case TCP_SEQ_STATE_OPENREQ: 2040 2075 case TCP_SEQ_STATE_LISTENING: 2041 2076 rc = listening_get_next(seq, v); 2042 2077 if (!rc) { ··· 2060 2097 struct tcp_iter_state *st = seq->private; 2061 2098 2062 2099 switch (st->state) { 2063 - case TCP_SEQ_STATE_OPENREQ: 2064 - if (v) { 2065 - struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2066 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2067 - } 2068 2100 case TCP_SEQ_STATE_LISTENING: 2069 2101 if (v != SEQ_START_TOKEN) 2070 2102 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); ··· 2113 2155 EXPORT_SYMBOL(tcp_proc_unregister); 2114 2156 2115 2157 static void get_openreq4(const struct request_sock *req, 2116 - struct seq_file *f, int i, kuid_t uid) 2158 + struct seq_file *f, int i) 2117 2159 { 2118 2160 const struct inet_request_sock *ireq = inet_rsk(req); 2119 2161 long delta = req->rsk_timer.expires - jiffies; ··· 2130 2172 1, /* timers active (only the expire timer) */ 2131 2173 jiffies_delta_to_clock_t(delta), 2132 2174 req->num_timeout, 2133 - from_kuid_munged(seq_user_ns(f), uid), 2175 + from_kuid_munged(seq_user_ns(f), 2176 + sock_i_uid(req->rsk_listener)), 2134 2177 0, /* non standard timer */ 2135 2178 0, /* open_requests have no inode */ 2136 2179 0, ··· 2232 2273 } 2233 2274 st = seq->private; 2234 2275 2235 - switch (st->state) { 2236 - case TCP_SEQ_STATE_LISTENING: 2237 - case TCP_SEQ_STATE_ESTABLISHED: 2238 - if (sk->sk_state == TCP_TIME_WAIT) 2239 - get_timewait4_sock(v, seq, st->num); 2240 - else 2241 - get_tcp4_sock(v, seq, st->num); 2242 - break; 2243 - case TCP_SEQ_STATE_OPENREQ: 2244 - get_openreq4(v, seq, st->num, st->uid); 2245 - break; 2246 - } 2276 + if (sk->sk_state == TCP_TIME_WAIT) 2277 + get_timewait4_sock(v, seq, st->num); 2278 + else if (sk->sk_state == TCP_NEW_SYN_RECV) 2279 + get_openreq4(v, seq, st->num); 2280 + else 2281 + get_tcp4_sock(v, seq, st->num); 2247 2282 out: 2248 2283 seq_pad(seq, '\n'); 2249 2284 return 0;
-2
net/ipv4/tcp_minisocks.c
··· 578 578 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 579 579 bool paws_reject = false; 580 580 581 - BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN)); 582 - 583 581 tmp_opt.saw_tstamp = 0; 584 582 if (th->doff > (sizeof(struct tcphdr)>>2)) { 585 583 tcp_parse_options(skb, &tmp_opt, 0, NULL);
+15 -7
net/ipv4/tcp_output.c
··· 2947 2947 */ 2948 2948 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, 2949 2949 struct request_sock *req, 2950 - struct tcp_fastopen_cookie *foc) 2950 + struct tcp_fastopen_cookie *foc, 2951 + bool attach_req) 2951 2952 { 2952 2953 struct inet_request_sock *ireq = inet_rsk(req); 2953 2954 const struct tcp_sock *tp = tcp_sk(sk); ··· 2960 2959 u16 user_mss; 2961 2960 int mss; 2962 2961 2963 - /* sk is a const pointer, because we want to express multiple cpus 2964 - * might call us concurrently. 2965 - * sock_wmalloc() will change sk->sk_wmem_alloc in an atomic way. 2966 - */ 2967 - skb = sock_wmalloc((struct sock *)sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); 2962 + skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 2968 2963 if (unlikely(!skb)) { 2969 2964 dst_release(dst); 2970 2965 return NULL; ··· 2968 2971 /* Reserve space for headers. */ 2969 2972 skb_reserve(skb, MAX_TCP_HEADER); 2970 2973 2974 + if (attach_req) { 2975 + skb->destructor = sock_edemux; 2976 + sock_hold(req_to_sk(req)); 2977 + skb->sk = req_to_sk(req); 2978 + } else { 2979 + /* sk is a const pointer, because we want to express multiple 2980 + * cpu might call us concurrently. 2981 + * sk->sk_wmem_alloc in an atomic, we can promote to rw. 2982 + */ 2983 + skb_set_owner_w(skb, (struct sock *)sk); 2984 + } 2971 2985 skb_dst_set(skb, dst); 2972 2986 2973 2987 mss = dst_metric_advmss(dst); ··· 3518 3510 int res; 3519 3511 3520 3512 tcp_rsk(req)->txhash = net_tx_rndhash(); 3521 - res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); 3513 + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL, true); 3522 3514 if (!res) { 3523 3515 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 3524 3516 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
-67
net/ipv6/inet6_connection_sock.c
··· 94 94 } 95 95 EXPORT_SYMBOL(inet6_csk_route_req); 96 96 97 - /* 98 - * request_sock (formerly open request) hash tables. 99 - */ 100 - static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, 101 - const u32 rnd, const u32 synq_hsize) 102 - { 103 - u32 c; 104 - 105 - c = jhash_3words((__force u32)raddr->s6_addr32[0], 106 - (__force u32)raddr->s6_addr32[1], 107 - (__force u32)raddr->s6_addr32[2], 108 - rnd); 109 - 110 - c = jhash_2words((__force u32)raddr->s6_addr32[3], 111 - (__force u32)rport, 112 - c); 113 - 114 - return c & (synq_hsize - 1); 115 - } 116 - 117 - struct request_sock *inet6_csk_search_req(struct sock *sk, 118 - const __be16 rport, 119 - const struct in6_addr *raddr, 120 - const struct in6_addr *laddr, 121 - const int iif) 122 - { 123 - struct inet_connection_sock *icsk = inet_csk(sk); 124 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 125 - struct request_sock *req; 126 - u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, 127 - lopt->nr_table_entries); 128 - 129 - spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); 130 - for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { 131 - const struct inet_request_sock *ireq = inet_rsk(req); 132 - 133 - if (ireq->ir_rmt_port == rport && 134 - req->rsk_ops->family == AF_INET6 && 135 - ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && 136 - ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && 137 - (!ireq->ir_iif || ireq->ir_iif == iif)) { 138 - atomic_inc(&req->rsk_refcnt); 139 - WARN_ON(req->sk != NULL); 140 - break; 141 - } 142 - } 143 - spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 144 - 145 - return req; 146 - } 147 - EXPORT_SYMBOL_GPL(inet6_csk_search_req); 148 - 149 - void inet6_csk_reqsk_queue_hash_add(struct sock *sk, 150 - struct request_sock *req, 151 - const unsigned long timeout) 152 - { 153 - struct inet_connection_sock *icsk = inet_csk(sk); 154 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 155 - const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr, 156 - inet_rsk(req)->ir_rmt_port, 157 - lopt->hash_rnd, lopt->nr_table_entries); 158 - 159 - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 160 - inet_csk_reqsk_queue_added(sk, timeout); 161 - } 162 - EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); 163 - 164 97 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 165 98 { 166 99 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+60 -59
net/ipv6/tcp_ipv6.c
··· 438 438 struct flowi *fl, 439 439 struct request_sock *req, 440 440 u16 queue_mapping, 441 - struct tcp_fastopen_cookie *foc) 441 + struct tcp_fastopen_cookie *foc, 442 + bool attach_req) 442 443 { 443 444 struct inet_request_sock *ireq = inet_rsk(req); 444 445 struct ipv6_pinfo *np = inet6_sk(sk); ··· 452 451 IPPROTO_TCP)) == NULL) 453 452 goto done; 454 453 455 - skb = tcp_make_synack(sk, dst, req, foc); 454 + skb = tcp_make_synack(sk, dst, req, foc, attach_req); 456 455 457 456 if (skb) { 458 457 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, ··· 623 622 return 1; 624 623 } 625 624 626 - static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 625 + #endif 626 + 627 + static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 628 + const struct sk_buff *skb) 627 629 { 630 + #ifdef CONFIG_TCP_MD5SIG 628 631 const __u8 *hash_location = NULL; 629 632 struct tcp_md5sig_key *hash_expected; 630 633 const struct ipv6hdr *ip6h = ipv6_hdr(skb); ··· 665 660 &ip6h->daddr, ntohs(th->dest)); 666 661 return true; 667 662 } 663 + #endif 668 664 return false; 669 665 } 670 - #endif 671 666 672 667 static void tcp_v6_init_req(struct request_sock *req, 673 668 const struct sock *sk_listener, ··· 728 723 .route_req = tcp_v6_route_req, 729 724 .init_seq = tcp_v6_init_sequence, 730 725 .send_synack = tcp_v6_send_synack, 731 - .queue_hash_add = inet6_csk_reqsk_queue_hash_add, 732 726 }; 733 727 734 728 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, ··· 938 934 } 939 935 940 936 941 - static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) 937 + static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 942 938 { 943 - const struct tcphdr *th = tcp_hdr(skb); 944 - struct request_sock *req; 945 - struct sock *nsk; 946 - 947 - /* Find possible connection requests. */ 948 - req = inet6_csk_search_req(sk, th->source, 949 - &ipv6_hdr(skb)->saddr, 950 - &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); 951 - if (req) { 952 - nsk = tcp_check_req(sk, skb, req, false); 953 - if (!nsk || nsk == sk) 954 - reqsk_put(req); 955 - return nsk; 956 - } 957 - nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 958 - &ipv6_hdr(skb)->saddr, th->source, 959 - &ipv6_hdr(skb)->daddr, ntohs(th->dest), 960 - tcp_v6_iif(skb)); 961 - 962 - if (nsk) { 963 - if (nsk->sk_state != TCP_TIME_WAIT) { 964 - bh_lock_sock(nsk); 965 - return nsk; 966 - } 967 - inet_twsk_put(inet_twsk(nsk)); 968 - return NULL; 969 - } 970 - 971 939 #ifdef CONFIG_SYN_COOKIES 940 + const struct tcphdr *th = tcp_hdr(skb); 941 + 972 942 if (!th->syn) 973 943 sk = cookie_v6_check(sk, skb); 974 944 #endif ··· 1161 1183 } 1162 1184 1163 1185 /* The socket must have it's spinlock held when we get 1164 - * here. 1186 + * here, unless it is a TCP_LISTEN socket. 1165 1187 * 1166 1188 * We have a potential double-lock case here, so even when 1167 1189 * doing backlog processing we use the BH locking scheme. ··· 1232 1254 goto csum_err; 1233 1255 1234 1256 if (sk->sk_state == TCP_LISTEN) { 1235 - struct sock *nsk = tcp_v6_hnd_req(sk, skb); 1257 + struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1258 + 1236 1259 if (!nsk) 1237 1260 goto discard; 1238 1261 1239 - /* 1240 - * Queue it on the new socket if the new socket is active, 1241 - * otherwise we just shortcircuit this and continue with 1242 - * the new socket.. 1243 - */ 1244 1262 if (nsk != sk) { 1245 1263 sock_rps_save_rxhash(nsk, skb); 1246 - sk_mark_napi_id(sk, skb); 1264 + sk_mark_napi_id(nsk, skb); 1247 1265 if (tcp_child_process(sk, nsk, skb)) 1248 1266 goto reset; 1249 1267 if (opt_skb) ··· 1372 1398 if (sk->sk_state == TCP_TIME_WAIT) 1373 1399 goto do_time_wait; 1374 1400 1401 + if (sk->sk_state == TCP_NEW_SYN_RECV) { 1402 + struct request_sock *req = inet_reqsk(sk); 1403 + struct sock *nsk = NULL; 1404 + 1405 + sk = req->rsk_listener; 1406 + tcp_v6_fill_cb(skb, hdr, th); 1407 + if (tcp_v6_inbound_md5_hash(sk, skb)) { 1408 + reqsk_put(req); 1409 + goto discard_it; 1410 + } 1411 + if (sk->sk_state == TCP_LISTEN) 1412 + nsk = tcp_check_req(sk, skb, req, false); 1413 + if (!nsk) { 1414 + reqsk_put(req); 1415 + goto discard_it; 1416 + } 1417 + if (nsk == sk) { 1418 + sock_hold(sk); 1419 + reqsk_put(req); 1420 + tcp_v6_restore_cb(skb); 1421 + } else if (tcp_child_process(sk, nsk, skb)) { 1422 + tcp_v6_send_reset(nsk, skb); 1423 + goto discard_it; 1424 + } else { 1425 + return 0; 1426 + } 1427 + } 1375 1428 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { 1376 1429 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1377 1430 goto discard_and_relse; ··· 1409 1408 1410 1409 tcp_v6_fill_cb(skb, hdr, th); 1411 1410 1412 - #ifdef CONFIG_TCP_MD5SIG 1413 1411 if (tcp_v6_inbound_md5_hash(sk, skb)) 1414 1412 goto discard_and_relse; 1415 - #endif 1416 1413 1417 1414 if (sk_filter(sk, skb)) 1418 1415 goto discard_and_relse; 1419 1416 1420 - sk_incoming_cpu_update(sk); 1421 1417 skb->dev = NULL; 1418 + 1419 + if (sk->sk_state == TCP_LISTEN) { 1420 + ret = tcp_v6_do_rcv(sk, skb); 1421 + goto put_and_return; 1422 + } 1423 + 1424 + sk_incoming_cpu_update(sk); 1422 1425 1423 1426 bh_lock_sock_nested(sk); 1424 1427 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ··· 1438 1433 } 1439 1434 bh_unlock_sock(sk); 1440 1435 1436 + put_and_return: 1441 1437 sock_put(sk); 1442 1438 return ret ? -1 : 0; 1443 1439 ··· 1639 1633 #ifdef CONFIG_PROC_FS 1640 1634 /* Proc filesystem TCPv6 sock list dumping. */ 1641 1635 static void get_openreq6(struct seq_file *seq, 1642 - struct request_sock *req, int i, kuid_t uid) 1636 + const struct request_sock *req, int i) 1643 1637 { 1644 1638 long ttd = req->rsk_timer.expires - jiffies; 1645 1639 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; ··· 1663 1657 1, /* timers active (only the expire timer) */ 1664 1658 jiffies_to_clock_t(ttd), 1665 1659 req->num_timeout, 1666 - from_kuid_munged(seq_user_ns(seq), uid), 1660 + from_kuid_munged(seq_user_ns(seq), 1661 + sock_i_uid(req->rsk_listener)), 1667 1662 0, /* non standard timer */ 1668 1663 0, /* open_requests have no inode */ 1669 1664 0, req); ··· 1769 1762 } 1770 1763 st = seq->private; 1771 1764 1772 - switch (st->state) { 1773 - case TCP_SEQ_STATE_LISTENING: 1774 - case TCP_SEQ_STATE_ESTABLISHED: 1775 - if (sk->sk_state == TCP_TIME_WAIT) 1776 - get_timewait6_sock(seq, v, st->num); 1777 - else 1778 - get_tcp6_sock(seq, v, st->num); 1779 - break; 1780 - case TCP_SEQ_STATE_OPENREQ: 1781 - get_openreq6(seq, v, st->num, st->uid); 1782 - break; 1783 - } 1765 + if (sk->sk_state == TCP_TIME_WAIT) 1766 + get_timewait6_sock(seq, v, st->num); 1767 + else if (sk->sk_state == TCP_NEW_SYN_RECV) 1768 + get_openreq6(seq, v, st->num); 1769 + else 1770 + get_tcp6_sock(seq, v, st->num); 1784 1771 out: 1785 1772 return 0; 1786 1773 }
+7 -5
net/sched/sch_fq.c
··· 224 224 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) 225 225 return &q->internal; 226 226 227 - /* SYNACK messages are attached to a listener socket. 228 - * 1) They are not part of a 'flow' yet 229 - * 2) We do not want to rate limit them (eg SYNFLOOD attack), 227 + /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket 228 + * 1) request sockets are not full blown, 229 + * they do not contain sk_pacing_rate 230 + * 2) They are not part of a 'flow' yet 231 + * 3) We do not want to rate limit them (eg SYNFLOOD attack), 230 232 * especially if the listener set SO_MAX_PACING_RATE 231 - * 3) We pretend they are orphaned 233 + * 4) We pretend they are orphaned 232 234 */ 233 - if (!sk || sk->sk_state == TCP_LISTEN) { 235 + if (!sk || sk->sk_state == TCP_NEW_SYN_RECV) { 234 236 unsigned long hash = skb_get_hash(skb) & q->orphan_mask; 235 237 236 238 /* By forcing low order bit to 1, we make sure to not