Merge branch 'tcp-lockless-listener' · tjh.dev/kernel@c3fc7ac

-9

include/net/inet6_connection_sock.h

··· 28 28 struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, 29 29 const struct request_sock *req, u8 proto); 30 30 31 - struct request_sock *inet6_csk_search_req(struct sock *sk, 32 - const __be16 rport, 33 - const struct in6_addr *raddr, 34 - const struct in6_addr *laddr, 35 - const int iif); 36 - 37 - void inet6_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 38 - const unsigned long timeout); 39 - 40 31 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr); 41 32 42 33 int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl);

+2 -7

include/net/inet_connection_sock.h

··· 258 258 259 259 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); 260 260 261 - struct request_sock *inet_csk_search_req(struct sock *sk, 262 - const __be16 rport, 263 - const __be32 raddr, 264 - const __be32 laddr); 265 261 int inet_csk_bind_conflict(const struct sock *sk, 266 262 const struct inet_bind_bucket *tb, bool relax); 267 263 int inet_csk_get_port(struct sock *sk, unsigned short snum); ··· 278 282 void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 279 283 unsigned long timeout); 280 284 281 - static inline void inet_csk_reqsk_queue_added(struct sock *sk, 282 - const unsigned long timeout) 285 + static inline void inet_csk_reqsk_queue_added(struct sock *sk) 283 286 { 284 287 reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue); 285 288 } ··· 295 300 296 301 static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) 297 302 { 298 - return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); 303 + return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; 299 304 } 300 305 301 306 void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);

+1

include/net/inet_hashtables.h

··· 205 205 206 206 void inet_hashinfo_init(struct inet_hashinfo *h); 207 207 208 + int inet_ehash_insert(struct sock *sk, struct sock *osk); 208 209 void __inet_hash_nolisten(struct sock *sk, struct sock *osk); 209 210 void __inet_hash(struct sock *sk, struct sock *osk); 210 211 void inet_hash(struct sock *sk);

+41 -91

include/net/request_sock.h

··· 69 69 u32 peer_secid; 70 70 }; 71 71 72 + static inline struct request_sock *inet_reqsk(struct sock *sk) 73 + { 74 + return (struct request_sock *)sk; 75 + } 76 + 77 + static inline struct sock *req_to_sk(struct request_sock *req) 78 + { 79 + return (struct sock *)req; 80 + } 81 + 72 82 static inline struct request_sock * 73 83 reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener) 74 84 { ··· 88 78 req->rsk_ops = ops; 89 79 sock_hold(sk_listener); 90 80 req->rsk_listener = sk_listener; 81 + req_to_sk(req)->sk_prot = sk_listener->sk_prot; 82 + sk_node_init(&req_to_sk(req)->sk_node); 91 83 req->saved_syn = NULL; 92 84 /* Following is temporary. It is coupled with debugging 93 85 * helpers in reqsk_put() & reqsk_free() ··· 97 85 atomic_set(&req->rsk_refcnt, 0); 98 86 } 99 87 return req; 100 - } 101 - 102 - static inline struct request_sock *inet_reqsk(struct sock *sk) 103 - { 104 - return (struct request_sock *)sk; 105 - } 106 - 107 - static inline struct sock *req_to_sk(struct request_sock *req) 108 - { 109 - return (struct sock *)req; 110 88 } 111 89 112 90 static inline void reqsk_free(struct request_sock *req) ··· 118 116 } 119 117 120 118 extern int sysctl_max_syn_backlog; 121 - 122 - /** struct listen_sock - listen state 123 - * 124 - * @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs 125 - */ 126 - struct listen_sock { 127 - int qlen_inc; /* protected by listener lock */ 128 - int young_inc;/* protected by listener lock */ 129 - 130 - /* following fields can be updated by timer */ 131 - atomic_t qlen_dec; /* qlen = qlen_inc - qlen_dec */ 132 - atomic_t young_dec; 133 - 134 - u32 max_qlen_log ____cacheline_aligned_in_smp; 135 - u32 synflood_warned; 136 - u32 hash_rnd; 137 - u32 nr_table_entries; 138 - struct request_sock *syn_table[0]; 139 - }; 140 119 141 120 /* 142 121 * For a TCP Fast Open listener - ··· 152 169 * @rskq_accept_head - FIFO head of established children 153 170 * @rskq_accept_tail - FIFO tail of established children 154 171 * @rskq_defer_accept - User waits for some data after accept() 155 - * @syn_wait_lock - serializer 156 - * 157 - * %syn_wait_lock is necessary only to avoid proc interface having to grab the main 158 - * lock sock while browsing the listening hash (otherwise it's deadlock prone). 159 172 * 160 173 */ 161 174 struct request_sock_queue { 175 + spinlock_t rskq_lock; 176 + u8 rskq_defer_accept; 177 + 178 + u32 synflood_warned; 179 + atomic_t qlen; 180 + atomic_t young; 181 + 162 182 struct request_sock *rskq_accept_head; 163 183 struct request_sock *rskq_accept_tail; 164 - u8 rskq_defer_accept; 165 - struct listen_sock *listen_opt; 166 184 struct fastopen_queue fastopenq; /* Check max_qlen != 0 to determine 167 185 * if TFO is enabled. 168 186 */ 169 - 170 - /* temporary alignment, our goal is to get rid of this lock */ 171 - spinlock_t syn_wait_lock ____cacheline_aligned_in_smp; 172 187 }; 173 188 174 - int reqsk_queue_alloc(struct request_sock_queue *queue, 175 - unsigned int nr_table_entries); 189 + void reqsk_queue_alloc(struct request_sock_queue *queue); 176 190 177 - void __reqsk_queue_destroy(struct request_sock_queue *queue); 178 - void reqsk_queue_destroy(struct request_sock_queue *queue); 179 191 void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, 180 192 bool reset); 181 193 182 - static inline struct request_sock * 183 - reqsk_queue_yank_acceptq(struct request_sock_queue *queue) 184 - { 185 - struct request_sock *req = queue->rskq_accept_head; 186 - 187 - queue->rskq_accept_head = NULL; 188 - return req; 189 - } 190 - 191 - static inline int reqsk_queue_empty(struct request_sock_queue *queue) 194 + static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) 192 195 { 193 196 return queue->rskq_accept_head == NULL; 194 197 } ··· 184 215 struct sock *parent, 185 216 struct sock *child) 186 217 { 218 + spin_lock(&queue->rskq_lock); 187 219 req->sk = child; 188 220 sk_acceptq_added(parent); 189 221 ··· 195 225 196 226 queue->rskq_accept_tail = req; 197 227 req->dl_next = NULL; 228 + spin_unlock(&queue->rskq_lock); 198 229 } 199 230 200 - static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue) 231 + static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, 232 + struct sock *parent) 201 233 { 202 - struct request_sock *req = queue->rskq_accept_head; 234 + struct request_sock *req; 203 235 204 - WARN_ON(req == NULL); 205 - 206 - queue->rskq_accept_head = req->dl_next; 207 - if (queue->rskq_accept_head == NULL) 208 - queue->rskq_accept_tail = NULL; 209 - 236 + spin_lock_bh(&queue->rskq_lock); 237 + req = queue->rskq_accept_head; 238 + if (req) { 239 + sk_acceptq_removed(parent); 240 + queue->rskq_accept_head = req->dl_next; 241 + if (queue->rskq_accept_head == NULL) 242 + queue->rskq_accept_tail = NULL; 243 + } 244 + spin_unlock_bh(&queue->rskq_lock); 210 245 return req; 211 246 } 212 247 213 248 static inline void reqsk_queue_removed(struct request_sock_queue *queue, 214 249 const struct request_sock *req) 215 250 { 216 - struct listen_sock *lopt = queue->listen_opt; 217 - 218 251 if (req->num_timeout == 0) 219 - atomic_inc(&lopt->young_dec); 220 - atomic_inc(&lopt->qlen_dec); 252 + atomic_dec(&queue->young); 253 + atomic_dec(&queue->qlen); 221 254 } 222 255 223 256 static inline void reqsk_queue_added(struct request_sock_queue *queue) 224 257 { 225 - struct listen_sock *lopt = queue->listen_opt; 226 - 227 - lopt->young_inc++; 228 - lopt->qlen_inc++; 229 - } 230 - 231 - static inline int listen_sock_qlen(const struct listen_sock *lopt) 232 - { 233 - return lopt->qlen_inc - atomic_read(&lopt->qlen_dec); 234 - } 235 - 236 - static inline int listen_sock_young(const struct listen_sock *lopt) 237 - { 238 - return lopt->young_inc - atomic_read(&lopt->young_dec); 258 + atomic_inc(&queue->young); 259 + atomic_inc(&queue->qlen); 239 260 } 240 261 241 262 static inline int reqsk_queue_len(const struct request_sock_queue *queue) 242 263 { 243 - const struct listen_sock *lopt = queue->listen_opt; 244 - 245 - return lopt ? listen_sock_qlen(lopt) : 0; 264 + return atomic_read(&queue->qlen); 246 265 } 247 266 248 267 static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) 249 268 { 250 - return listen_sock_young(queue->listen_opt); 269 + return atomic_read(&queue->young); 251 270 } 252 - 253 - static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) 254 - { 255 - return reqsk_queue_len(queue) >> queue->listen_opt->max_qlen_log; 256 - } 257 - 258 - void reqsk_queue_hash_req(struct request_sock_queue *queue, 259 - u32 hash, struct request_sock *req, 260 - unsigned long timeout); 261 271 262 272 #endif /* _REQUEST_SOCK_H */

+4 -6

include/net/tcp.h

··· 462 462 int tcp_connect(struct sock *sk); 463 463 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, 464 464 struct request_sock *req, 465 - struct tcp_fastopen_cookie *foc); 465 + struct tcp_fastopen_cookie *foc, 466 + bool attach_req); 466 467 int tcp_disconnect(struct sock *sk, int flags); 467 468 468 469 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); ··· 1619 1618 /* /proc */ 1620 1619 enum tcp_seq_states { 1621 1620 TCP_SEQ_STATE_LISTENING, 1622 - TCP_SEQ_STATE_OPENREQ, 1623 1621 TCP_SEQ_STATE_ESTABLISHED, 1624 1622 }; 1625 1623 ··· 1637 1637 enum tcp_seq_states state; 1638 1638 struct sock *syn_wait_sk; 1639 1639 int bucket, offset, sbucket, num; 1640 - kuid_t uid; 1641 1640 loff_t last_pos; 1642 1641 }; 1643 1642 ··· 1716 1717 __u32 (*init_seq)(const struct sk_buff *skb); 1717 1718 int (*send_synack)(const struct sock *sk, struct dst_entry *dst, 1718 1719 struct flowi *fl, struct request_sock *req, 1719 - u16 queue_mapping, struct tcp_fastopen_cookie *foc); 1720 - void (*queue_hash_add)(struct sock *sk, struct request_sock *req, 1721 - const unsigned long timeout); 1720 + u16 queue_mapping, struct tcp_fastopen_cookie *foc, 1721 + bool attach_req); 1722 1722 }; 1723 1723 1724 1724 #ifdef CONFIG_SYN_COOKIES

+2 -82

net/core/request_sock.c

··· 37 37 int sysctl_max_syn_backlog = 256; 38 38 EXPORT_SYMBOL(sysctl_max_syn_backlog); 39 39 40 - int reqsk_queue_alloc(struct request_sock_queue *queue, 41 - unsigned int nr_table_entries) 40 + void reqsk_queue_alloc(struct request_sock_queue *queue) 42 41 { 43 - size_t lopt_size = sizeof(struct listen_sock); 44 - struct listen_sock *lopt = NULL; 45 - 46 - nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); 47 - nr_table_entries = max_t(u32, nr_table_entries, 8); 48 - nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); 49 - lopt_size += nr_table_entries * sizeof(struct request_sock *); 50 - 51 - if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) 52 - lopt = kzalloc(lopt_size, GFP_KERNEL | 53 - __GFP_NOWARN | 54 - __GFP_NORETRY); 55 - if (!lopt) 56 - lopt = vzalloc(lopt_size); 57 - if (!lopt) 58 - return -ENOMEM; 59 - 60 - get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); 61 - spin_lock_init(&queue->syn_wait_lock); 42 + spin_lock_init(&queue->rskq_lock); 62 43 63 44 spin_lock_init(&queue->fastopenq.lock); 64 45 queue->fastopenq.rskq_rst_head = NULL; ··· 48 67 queue->fastopenq.max_qlen = 0; 49 68 50 69 queue->rskq_accept_head = NULL; 51 - lopt->nr_table_entries = nr_table_entries; 52 - lopt->max_qlen_log = ilog2(nr_table_entries); 53 - 54 - spin_lock_bh(&queue->syn_wait_lock); 55 - queue->listen_opt = lopt; 56 - spin_unlock_bh(&queue->syn_wait_lock); 57 - 58 - return 0; 59 - } 60 - 61 - void __reqsk_queue_destroy(struct request_sock_queue *queue) 62 - { 63 - /* This is an error recovery path only, no locking needed */ 64 - kvfree(queue->listen_opt); 65 - } 66 - 67 - static inline struct listen_sock *reqsk_queue_yank_listen_sk( 68 - struct request_sock_queue *queue) 69 - { 70 - struct listen_sock *lopt; 71 - 72 - spin_lock_bh(&queue->syn_wait_lock); 73 - lopt = queue->listen_opt; 74 - queue->listen_opt = NULL; 75 - spin_unlock_bh(&queue->syn_wait_lock); 76 - 77 - return lopt; 78 - } 79 - 80 - void reqsk_queue_destroy(struct request_sock_queue *queue) 81 - { 82 - /* make all the listen_opt local to us */ 83 - struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue); 84 - 85 - if (listen_sock_qlen(lopt) != 0) { 86 - unsigned int i; 87 - 88 - for (i = 0; i < lopt->nr_table_entries; i++) { 89 - struct request_sock *req; 90 - 91 - spin_lock_bh(&queue->syn_wait_lock); 92 - while ((req = lopt->syn_table[i]) != NULL) { 93 - lopt->syn_table[i] = req->dl_next; 94 - /* Because of following del_timer_sync(), 95 - * we must release the spinlock here 96 - * or risk a dead lock. 97 - */ 98 - spin_unlock_bh(&queue->syn_wait_lock); 99 - atomic_inc(&lopt->qlen_dec); 100 - if (del_timer_sync(&req->rsk_timer)) 101 - reqsk_put(req); 102 - reqsk_put(req); 103 - spin_lock_bh(&queue->syn_wait_lock); 104 - } 105 - spin_unlock_bh(&queue->syn_wait_lock); 106 - } 107 - } 108 - 109 - if (WARN_ON(listen_sock_qlen(lopt) != 0)) 110 - pr_err("qlen %u\n", listen_sock_qlen(lopt)); 111 - kvfree(lopt); 112 70 } 113 71 114 72 /*

+21 -43

net/dccp/ipv4.c

··· 444 444 } 445 445 EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); 446 446 447 - static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 448 - { 449 - const struct dccp_hdr *dh = dccp_hdr(skb); 450 - const struct iphdr *iph = ip_hdr(skb); 451 - struct sock *nsk; 452 - /* Find possible connection requests. */ 453 - struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport, 454 - iph->saddr, iph->daddr); 455 - if (req) { 456 - nsk = dccp_check_req(sk, skb, req); 457 - if (!nsk) 458 - reqsk_put(req); 459 - return nsk; 460 - } 461 - nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, 462 - iph->saddr, dh->dccph_sport, 463 - iph->daddr, dh->dccph_dport, 464 - inet_iif(skb)); 465 - if (nsk != NULL) { 466 - if (nsk->sk_state != DCCP_TIME_WAIT) { 467 - bh_lock_sock(nsk); 468 - return nsk; 469 - } 470 - inet_twsk_put(inet_twsk(nsk)); 471 - return NULL; 472 - } 473 - 474 - return sk; 475 - } 476 - 477 447 static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, 478 448 struct sk_buff *skb) 479 449 { ··· 675 705 * NOTE: the check for the packet types is done in 676 706 * dccp_rcv_state_process 677 707 */ 678 - if (sk->sk_state == DCCP_LISTEN) { 679 - struct sock *nsk = dccp_v4_hnd_req(sk, skb); 680 - 681 - if (nsk == NULL) 682 - goto discard; 683 - 684 - if (nsk != sk) { 685 - if (dccp_child_process(sk, nsk, skb)) 686 - goto reset; 687 - return 0; 688 - } 689 - } 690 708 691 709 if (dccp_rcv_state_process(sk, skb, dh, skb->len)) 692 710 goto reset; ··· 682 724 683 725 reset: 684 726 dccp_v4_ctl_send_reset(sk, skb); 685 - discard: 686 727 kfree_skb(skb); 687 728 return 0; 688 729 } ··· 825 868 goto no_dccp_socket; 826 869 } 827 870 871 + if (sk->sk_state == DCCP_NEW_SYN_RECV) { 872 + struct request_sock *req = inet_reqsk(sk); 873 + struct sock *nsk = NULL; 874 + 875 + sk = req->rsk_listener; 876 + if (sk->sk_state == DCCP_LISTEN) 877 + nsk = dccp_check_req(sk, skb, req); 878 + if (!nsk) { 879 + reqsk_put(req); 880 + goto discard_it; 881 + } 882 + if (nsk == sk) { 883 + sock_hold(sk); 884 + reqsk_put(req); 885 + } else if (dccp_child_process(sk, nsk, skb)) { 886 + dccp_v4_ctl_send_reset(sk, skb); 887 + goto discard_it; 888 + } else { 889 + return 0; 890 + } 891 + } 828 892 /* 829 893 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage 830 894 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted

+22 -50

net/dccp/ipv6.c

··· 290 290 .syn_ack_timeout = dccp_syn_ack_timeout, 291 291 }; 292 292 293 - static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) 294 - { 295 - const struct dccp_hdr *dh = dccp_hdr(skb); 296 - const struct ipv6hdr *iph = ipv6_hdr(skb); 297 - struct request_sock *req; 298 - struct sock *nsk; 299 - 300 - req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr, 301 - &iph->daddr, inet6_iif(skb)); 302 - if (req) { 303 - nsk = dccp_check_req(sk, skb, req); 304 - if (!nsk) 305 - reqsk_put(req); 306 - return nsk; 307 - } 308 - nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, 309 - &iph->saddr, dh->dccph_sport, 310 - &iph->daddr, ntohs(dh->dccph_dport), 311 - inet6_iif(skb)); 312 - if (nsk != NULL) { 313 - if (nsk->sk_state != DCCP_TIME_WAIT) { 314 - bh_lock_sock(nsk); 315 - return nsk; 316 - } 317 - inet_twsk_put(inet_twsk(nsk)); 318 - return NULL; 319 - } 320 - 321 - return sk; 322 - } 323 - 324 293 static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 325 294 { 326 295 struct request_sock *req; ··· 367 398 if (dccp_v6_send_response(sk, req)) 368 399 goto drop_and_free; 369 400 370 - inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 401 + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); 371 402 return 0; 372 403 373 404 drop_and_free: ··· 610 641 * NOTE: the check for the packet types is done in 611 642 * dccp_rcv_state_process 612 643 */ 613 - if (sk->sk_state == DCCP_LISTEN) { 614 - struct sock *nsk = dccp_v6_hnd_req(sk, skb); 615 - 616 - if (nsk == NULL) 617 - goto discard; 618 - /* 619 - * Queue it on the new socket if the new socket is active, 620 - * otherwise we just shortcircuit this and continue with 621 - * the new socket.. 622 - */ 623 - if (nsk != sk) { 624 - if (dccp_child_process(sk, nsk, skb)) 625 - goto reset; 626 - if (opt_skb != NULL) 627 - __kfree_skb(opt_skb); 628 - return 0; 629 - } 630 - } 631 644 632 645 if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) 633 646 goto reset; ··· 683 732 goto no_dccp_socket; 684 733 } 685 734 735 + if (sk->sk_state == DCCP_NEW_SYN_RECV) { 736 + struct request_sock *req = inet_reqsk(sk); 737 + struct sock *nsk = NULL; 738 + 739 + sk = req->rsk_listener; 740 + if (sk->sk_state == DCCP_LISTEN) 741 + nsk = dccp_check_req(sk, skb, req); 742 + if (!nsk) { 743 + reqsk_put(req); 744 + goto discard_it; 745 + } 746 + if (nsk == sk) { 747 + sock_hold(sk); 748 + reqsk_put(req); 749 + } else if (dccp_child_process(sk, nsk, skb)) { 750 + dccp_v6_ctl_send_reset(sk, skb); 751 + goto discard_it; 752 + } else { 753 + return 0; 754 + } 755 + } 686 756 /* 687 757 * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage 688 758 * o if MinCsCov = 0, only packets with CsCov = 0 are accepted

+37 -110

net/ipv4/inet_connection_sock.c

··· 330 330 if (error) 331 331 goto out_err; 332 332 } 333 - req = reqsk_queue_remove(queue); 333 + req = reqsk_queue_remove(queue, sk); 334 334 newsk = req->sk; 335 335 336 - sk_acceptq_removed(sk); 337 336 if (sk->sk_protocol == IPPROTO_TCP && 338 337 tcp_rsk(req)->tfo_listener) { 339 338 spin_lock_bh(&queue->fastopenq.lock); ··· 476 477 } 477 478 EXPORT_SYMBOL_GPL(inet_csk_route_child_sock); 478 479 479 - static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, 480 - const u32 rnd, const u32 synq_hsize) 481 - { 482 - return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); 483 - } 484 - 485 480 #if IS_ENABLED(CONFIG_IPV6) 486 481 #define AF_INET_FAMILY(fam) ((fam) == AF_INET) 487 482 #else 488 483 #define AF_INET_FAMILY(fam) true 489 484 #endif 490 - 491 - /* Note: this is temporary : 492 - * req sock will no longer be in listener hash table 493 - */ 494 - struct request_sock *inet_csk_search_req(struct sock *sk, 495 - const __be16 rport, 496 - const __be32 raddr, 497 - const __be32 laddr) 498 - { 499 - struct inet_connection_sock *icsk = inet_csk(sk); 500 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 501 - struct request_sock *req; 502 - u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd, 503 - lopt->nr_table_entries); 504 - 505 - spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); 506 - for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { 507 - const struct inet_request_sock *ireq = inet_rsk(req); 508 - 509 - if (ireq->ir_rmt_port == rport && 510 - ireq->ir_rmt_addr == raddr && 511 - ireq->ir_loc_addr == laddr && 512 - AF_INET_FAMILY(req->rsk_ops->family)) { 513 - atomic_inc(&req->rsk_refcnt); 514 - WARN_ON(req->sk); 515 - break; 516 - } 517 - } 518 - spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 519 - 520 - return req; 521 - } 522 - EXPORT_SYMBOL_GPL(inet_csk_search_req); 523 - 524 - void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 525 - unsigned long timeout) 526 - { 527 - struct inet_connection_sock *icsk = inet_csk(sk); 528 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 529 - const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr, 530 - inet_rsk(req)->ir_rmt_port, 531 - lopt->hash_rnd, lopt->nr_table_entries); 532 - 533 - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 534 - inet_csk_reqsk_queue_added(sk, timeout); 535 - } 536 - EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 537 485 538 486 /* Only thing we need from tcp.h */ 539 487 extern int sysctl_tcp_synack_retries; ··· 518 572 } 519 573 EXPORT_SYMBOL(inet_rtx_syn_ack); 520 574 521 - /* return true if req was found in the syn_table[] */ 575 + /* return true if req was found in the ehash table */ 522 576 static bool reqsk_queue_unlink(struct request_sock_queue *queue, 523 577 struct request_sock *req) 524 578 { 525 - struct listen_sock *lopt = queue->listen_opt; 526 - struct request_sock **prev; 527 - bool found = false; 579 + struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; 580 + spinlock_t *lock; 581 + bool found; 528 582 529 - spin_lock(&queue->syn_wait_lock); 583 + lock = inet_ehash_lockp(hashinfo, req->rsk_hash); 530 584 531 - for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; 532 - prev = &(*prev)->dl_next) { 533 - if (*prev == req) { 534 - *prev = req->dl_next; 535 - found = true; 536 - break; 537 - } 538 - } 585 + spin_lock(lock); 586 + found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); 587 + spin_unlock(lock); 539 588 540 - spin_unlock(&queue->syn_wait_lock); 541 589 if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) 542 590 reqsk_put(req); 543 591 return found; ··· 552 612 struct sock *sk_listener = req->rsk_listener; 553 613 struct inet_connection_sock *icsk = inet_csk(sk_listener); 554 614 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 555 - struct listen_sock *lopt = queue->listen_opt; 556 615 int qlen, expire = 0, resend = 0; 557 616 int max_retries, thresh; 558 617 u8 defer_accept; 559 618 560 - if (sk_listener->sk_state != TCP_LISTEN || !lopt) { 561 - reqsk_put(req); 562 - return; 563 - } 619 + if (sk_listener->sk_state != TCP_LISTEN) 620 + goto drop; 564 621 565 622 max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; 566 623 thresh = max_retries; ··· 578 641 * embrions; and abort old ones without pity, if old 579 642 * ones are about to clog our table. 580 643 */ 581 - qlen = listen_sock_qlen(lopt); 582 - if (qlen >> (lopt->max_qlen_log - 1)) { 583 - int young = listen_sock_young(lopt) << 1; 644 + qlen = reqsk_queue_len(queue); 645 + if ((qlen << 1) > sk_listener->sk_max_ack_backlog) { 646 + int young = reqsk_queue_len_young(queue) << 1; 584 647 585 648 while (thresh > 2) { 586 649 if (qlen < young) ··· 602 665 unsigned long timeo; 603 666 604 667 if (req->num_timeout++ == 0) 605 - atomic_inc(&lopt->young_dec); 668 + atomic_dec(&queue->young); 606 669 timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); 607 670 mod_timer_pinned(&req->rsk_timer, jiffies + timeo); 608 671 return; 609 672 } 673 + drop: 610 674 inet_csk_reqsk_queue_drop(sk_listener, req); 611 675 reqsk_put(req); 612 676 } 613 677 614 - void reqsk_queue_hash_req(struct request_sock_queue *queue, 615 - u32 hash, struct request_sock *req, 616 - unsigned long timeout) 678 + static void reqsk_queue_hash_req(struct request_sock *req, 679 + unsigned long timeout) 617 680 { 618 - struct listen_sock *lopt = queue->listen_opt; 619 - 620 681 req->num_retrans = 0; 621 682 req->num_timeout = 0; 622 683 req->sk = NULL; 623 684 624 685 setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req); 625 686 mod_timer_pinned(&req->rsk_timer, jiffies + timeout); 626 - req->rsk_hash = hash; 627 687 688 + inet_ehash_insert(req_to_sk(req), NULL); 628 689 /* before letting lookups find us, make sure all req fields 629 690 * are committed to memory and refcnt initialized. 630 691 */ 631 692 smp_wmb(); 632 - atomic_set(&req->rsk_refcnt, 2); 633 - 634 - spin_lock(&queue->syn_wait_lock); 635 - req->dl_next = lopt->syn_table[hash]; 636 - lopt->syn_table[hash] = req; 637 - spin_unlock(&queue->syn_wait_lock); 693 + atomic_set(&req->rsk_refcnt, 2 + 1); 638 694 } 639 - EXPORT_SYMBOL(reqsk_queue_hash_req); 695 + 696 + void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, 697 + unsigned long timeout) 698 + { 699 + reqsk_queue_hash_req(req, timeout); 700 + inet_csk_reqsk_queue_added(sk); 701 + } 702 + EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); 640 703 641 704 /** 642 705 * inet_csk_clone_lock - clone an inet socket, and lock its clone ··· 729 792 730 793 int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) 731 794 { 732 - struct inet_sock *inet = inet_sk(sk); 733 795 struct inet_connection_sock *icsk = inet_csk(sk); 734 - int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); 796 + struct inet_sock *inet = inet_sk(sk); 735 797 736 - if (rc != 0) 737 - return rc; 798 + reqsk_queue_alloc(&icsk->icsk_accept_queue); 738 799 739 800 sk->sk_max_ack_backlog = 0; 740 801 sk->sk_ack_backlog = 0; ··· 754 819 } 755 820 756 821 sk->sk_state = TCP_CLOSE; 757 - __reqsk_queue_destroy(&icsk->icsk_accept_queue); 758 822 return -EADDRINUSE; 759 823 } 760 824 EXPORT_SYMBOL_GPL(inet_csk_listen_start); ··· 766 832 { 767 833 struct inet_connection_sock *icsk = inet_csk(sk); 768 834 struct request_sock_queue *queue = &icsk->icsk_accept_queue; 769 - struct request_sock *acc_req; 770 - struct request_sock *req; 771 - 772 - /* make all the listen_opt local to us */ 773 - acc_req = reqsk_queue_yank_acceptq(queue); 835 + struct request_sock *next, *req; 774 836 775 837 /* Following specs, it would be better either to send FIN 776 838 * (and enter FIN-WAIT-1, it is normal close) ··· 776 846 * To be honest, we are not able to make either 777 847 * of the variants now. --ANK 778 848 */ 779 - reqsk_queue_destroy(queue); 780 - 781 - while ((req = acc_req) != NULL) { 849 + while ((req = reqsk_queue_remove(queue, sk)) != NULL) { 782 850 struct sock *child = req->sk; 783 - 784 - acc_req = req->dl_next; 785 851 786 852 local_bh_disable(); 787 853 bh_lock_sock(child); ··· 808 882 local_bh_enable(); 809 883 sock_put(child); 810 884 811 - sk_acceptq_removed(sk); 812 885 reqsk_put(req); 886 + cond_resched(); 813 887 } 814 888 if (queue->fastopenq.rskq_rst_head) { 815 889 /* Free all the reqs queued in rskq_rst_head. */ 816 890 spin_lock_bh(&queue->fastopenq.lock); 817 - acc_req = queue->fastopenq.rskq_rst_head; 891 + req = queue->fastopenq.rskq_rst_head; 818 892 queue->fastopenq.rskq_rst_head = NULL; 819 893 spin_unlock_bh(&queue->fastopenq.lock); 820 - while ((req = acc_req) != NULL) { 821 - acc_req = req->dl_next; 894 + while (req != NULL) { 895 + next = req->dl_next; 822 896 reqsk_put(req); 897 + req = next; 823 898 } 824 899 } 825 900 WARN_ON(sk->sk_ack_backlog);

+8 -88

net/ipv4/inet_diag.c

··· 730 730 #endif 731 731 } 732 732 733 - static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, 734 - struct netlink_callback *cb, 735 - const struct inet_diag_req_v2 *r, 736 - const struct nlattr *bc) 737 - { 738 - struct inet_connection_sock *icsk = inet_csk(sk); 739 - struct inet_sock *inet = inet_sk(sk); 740 - struct inet_diag_entry entry; 741 - int j, s_j, reqnum, s_reqnum; 742 - struct listen_sock *lopt; 743 - int err = 0; 744 - 745 - s_j = cb->args[3]; 746 - s_reqnum = cb->args[4]; 747 - 748 - if (s_j > 0) 749 - s_j--; 750 - 751 - entry.family = sk->sk_family; 752 - 753 - spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); 754 - 755 - lopt = icsk->icsk_accept_queue.listen_opt; 756 - if (!lopt || !listen_sock_qlen(lopt)) 757 - goto out; 758 - 759 - if (bc) { 760 - entry.sport = inet->inet_num; 761 - entry.userlocks = sk->sk_userlocks; 762 - } 763 - 764 - for (j = s_j; j < lopt->nr_table_entries; j++) { 765 - struct request_sock *req, *head = lopt->syn_table[j]; 766 - 767 - reqnum = 0; 768 - for (req = head; req; reqnum++, req = req->dl_next) { 769 - struct inet_request_sock *ireq = inet_rsk(req); 770 - 771 - if (reqnum < s_reqnum) 772 - continue; 773 - if (r->id.idiag_dport != ireq->ir_rmt_port && 774 - r->id.idiag_dport) 775 - continue; 776 - 777 - if (bc) { 778 - /* Note: entry.sport and entry.userlocks are already set */ 779 - entry_fill_addrs(&entry, req_to_sk(req)); 780 - entry.dport = ntohs(ireq->ir_rmt_port); 781 - 782 - if (!inet_diag_bc_run(bc, &entry)) 783 - continue; 784 - } 785 - 786 - err = inet_req_diag_fill(req_to_sk(req), skb, 787 - NETLINK_CB(cb->skb).portid, 788 - cb->nlh->nlmsg_seq, 789 - NLM_F_MULTI, cb->nlh); 790 - if (err < 0) { 791 - cb->args[3] = j + 1; 792 - cb->args[4] = reqnum; 793 - goto out; 794 - } 795 - } 796 - 797 - s_reqnum = 0; 798 - } 799 - 800 - out: 801 - spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 802 - 803 - return err; 804 - } 805 - 806 733 void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, 807 734 struct netlink_callback *cb, 808 735 const struct inet_diag_req_v2 *r, struct nlattr *bc) 809 736 { 810 737 struct net *net = sock_net(skb->sk); 811 738 int i, num, s_i, s_num; 739 + u32 idiag_states = r->idiag_states; 812 740 741 + if (idiag_states & TCPF_SYN_RECV) 742 + idiag_states |= TCPF_NEW_SYN_RECV; 813 743 s_i = cb->args[1]; 814 744 s_num = num = cb->args[2]; 815 745 816 746 if (cb->args[0] == 0) { 817 - if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) 747 + if (!(idiag_states & TCPF_LISTEN)) 818 748 goto skip_listen_ht; 819 749 820 750 for (i = s_i; i < INET_LHTABLE_SIZE; i++) { ··· 774 844 r->id.idiag_sport) 775 845 goto next_listen; 776 846 777 - if (!(r->idiag_states & TCPF_LISTEN) || 778 - r->id.idiag_dport || 847 + if (r->id.idiag_dport || 779 848 cb->args[3] > 0) 780 - goto syn_recv; 781 - 782 - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { 783 - spin_unlock_bh(&ilb->lock); 784 - goto done; 785 - } 786 - 787 - syn_recv: 788 - if (!(r->idiag_states & TCPF_SYN_RECV)) 789 849 goto next_listen; 790 850 791 - if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) { 851 + if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { 792 852 spin_unlock_bh(&ilb->lock); 793 853 goto done; 794 854 } ··· 799 879 s_i = num = s_num = 0; 800 880 } 801 881 802 - if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) 882 + if (!(idiag_states & ~TCPF_LISTEN)) 803 883 goto out; 804 884 805 885 for (i = s_i; i <= hashinfo->ehash_mask; i++) { ··· 826 906 goto next_normal; 827 907 state = (sk->sk_state == TCP_TIME_WAIT) ? 828 908 inet_twsk(sk)->tw_substate : sk->sk_state; 829 - if (!(r->idiag_states & (1 << state))) 909 + if (!(idiag_states & (1 << state))) 830 910 goto next_normal; 831 911 if (r->sdiag_family != AF_UNSPEC && 832 912 sk->sk_family != r->sdiag_family)

+12 -2

net/ipv4/inet_hashtables.c

··· 398 398 inet->inet_dport); 399 399 } 400 400 401 - void __inet_hash_nolisten(struct sock *sk, struct sock *osk) 401 + /* insert a socket into ehash, and eventually remove another one 402 + * (The another one can be a SYN_RECV or TIMEWAIT 403 + */ 404 + int inet_ehash_insert(struct sock *sk, struct sock *osk) 402 405 { 403 406 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 404 407 struct hlist_nulls_head *list; 405 408 struct inet_ehash_bucket *head; 406 409 spinlock_t *lock; 410 + int ret = 0; 407 411 408 - WARN_ON(!sk_unhashed(sk)); 412 + WARN_ON_ONCE(!sk_unhashed(sk)); 409 413 410 414 sk->sk_hash = sk_ehashfn(sk); 411 415 head = inet_ehash_bucket(hashinfo, sk->sk_hash); ··· 423 419 sk_nulls_del_node_init_rcu(osk); 424 420 } 425 421 spin_unlock(lock); 422 + return ret; 423 + } 424 + 425 + void __inet_hash_nolisten(struct sock *sk, struct sock *osk) 426 + { 427 + inet_ehash_insert(sk, osk); 426 428 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 427 429 } 428 430 EXPORT_SYMBOL_GPL(__inet_hash_nolisten);

+4

net/ipv4/syncookies.c

··· 284 284 } 285 285 EXPORT_SYMBOL(cookie_ecn_ok); 286 286 287 + /* On input, sk is a listener. 288 + * Output is listener if incoming packet would not create a child 289 + * NULL if memory could not be allocated. 290 + */ 287 291 struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) 288 292 { 289 293 struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;

+2 -2

net/ipv4/tcp_fastopen.c

··· 161 161 tp->snd_wnd = ntohs(tcp_hdr(skb)->window); 162 162 163 163 /* Activate the retrans timer so that SYNACK can be retransmitted. 164 - * The request socket is not added to the SYN table of the parent 164 + * The request socket is not added to the ehash 165 165 * because it's been added to the accept queue directly. 166 166 */ 167 167 inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, 168 168 TCP_TIMEOUT_INIT, TCP_RTO_MAX); 169 169 170 - atomic_set(&req->rsk_refcnt, 1); 170 + atomic_set(&req->rsk_refcnt, 2); 171 171 /* Add the child socket directly into the accept queue */ 172 172 inet_csk_reqsk_queue_add(sk, req, child); 173 173

+15 -15

net/ipv4/tcp_input.c

··· 6068 6068 const struct sk_buff *skb, 6069 6069 const char *proto) 6070 6070 { 6071 + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; 6071 6072 const char *msg = "Dropping request"; 6072 6073 bool want_cookie = false; 6073 - struct listen_sock *lopt; 6074 6074 6075 6075 #ifdef CONFIG_SYN_COOKIES 6076 6076 if (sysctl_tcp_syncookies) { ··· 6081 6081 #endif 6082 6082 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP); 6083 6083 6084 - lopt = inet_csk(sk)->icsk_accept_queue.listen_opt; 6085 - if (!lopt->synflood_warned && 6084 + if (!queue->synflood_warned && 6086 6085 sysctl_tcp_syncookies != 2 && 6087 - xchg(&lopt->synflood_warned, 1) == 0) 6086 + xchg(&queue->synflood_warned, 1) == 0) 6088 6087 pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", 6089 6088 proto, ntohs(tcp_hdr(skb)->dest), msg); 6090 6089 ··· 6120 6121 struct request_sock *req; 6121 6122 bool want_cookie = false; 6122 6123 struct flowi fl; 6123 - int err; 6124 - 6125 6124 6126 6125 /* TW buckets are converted to open requests without 6127 6126 * limitations, they conserve resources and peer is ··· 6228 6231 tcp_rsk(req)->snt_isn = isn; 6229 6232 tcp_rsk(req)->txhash = net_tx_rndhash(); 6230 6233 tcp_openreq_init_rwin(req, sk, dst); 6231 - if (!want_cookie) 6234 + if (!want_cookie) { 6232 6235 fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst); 6233 - err = af_ops->send_synack(fastopen_sk ?: sk, dst, &fl, req, 6234 - skb_get_queue_mapping(skb), &foc); 6236 + tcp_reqsk_record_syn(sk, req, skb); 6237 + } 6235 6238 if (fastopen_sk) { 6239 + af_ops->send_synack(fastopen_sk, dst, &fl, req, 6240 + skb_get_queue_mapping(skb), &foc, false); 6236 6241 sock_put(fastopen_sk); 6237 6242 } else { 6238 - if (err || want_cookie) 6239 - goto drop_and_free; 6240 - 6241 6243 tcp_rsk(req)->tfo_listener = false; 6242 - af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6244 + if (!want_cookie) 6245 + inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); 6246 + af_ops->send_synack(sk, dst, &fl, req, 6247 + skb_get_queue_mapping(skb), &foc, !want_cookie); 6248 + if (want_cookie) 6249 + goto drop_and_free; 6243 6250 } 6244 - tcp_reqsk_record_syn(sk, req, skb); 6245 - 6251 + reqsk_put(req); 6246 6252 return 0; 6247 6253 6248 6254 drop_and_release:

+57 -104

net/ipv4/tcp_ipv4.c

··· 822 822 struct flowi *fl, 823 823 struct request_sock *req, 824 824 u16 queue_mapping, 825 - struct tcp_fastopen_cookie *foc) 825 + struct tcp_fastopen_cookie *foc, 826 + bool attach_req) 826 827 { 827 828 const struct inet_request_sock *ireq = inet_rsk(req); 828 829 struct flowi4 fl4; ··· 834 833 if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) 835 834 return -1; 836 835 837 - skb = tcp_make_synack(sk, dst, req, foc); 836 + skb = tcp_make_synack(sk, dst, req, foc, attach_req); 838 837 839 838 if (skb) { 840 839 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); ··· 1113 1112 } 1114 1113 EXPORT_SYMBOL(tcp_v4_md5_hash_skb); 1115 1114 1115 + #endif 1116 + 1116 1117 /* Called with rcu_read_lock() */ 1117 - static bool tcp_v4_inbound_md5_hash(struct sock *sk, 1118 + static bool tcp_v4_inbound_md5_hash(const struct sock *sk, 1118 1119 const struct sk_buff *skb) 1119 1120 { 1121 + #ifdef CONFIG_TCP_MD5SIG 1120 1122 /* 1121 1123 * This gets called for each TCP segment that arrives 1122 1124 * so we want to be efficient. ··· 1169 1165 return true; 1170 1166 } 1171 1167 return false; 1172 - } 1173 1168 #endif 1169 + return false; 1170 + } 1174 1171 1175 1172 static void tcp_v4_init_req(struct request_sock *req, 1176 1173 const struct sock *sk_listener, ··· 1225 1220 .route_req = tcp_v4_route_req, 1226 1221 .init_seq = tcp_v4_init_sequence, 1227 1222 .send_synack = tcp_v4_send_synack, 1228 - .queue_hash_add = inet_csk_reqsk_queue_hash_add, 1229 1223 }; 1230 1224 1231 1225 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ··· 1343 1339 } 1344 1340 EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1345 1341 1346 - static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) 1342 + static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) 1347 1343 { 1348 - const struct tcphdr *th = tcp_hdr(skb); 1349 - const struct iphdr *iph = ip_hdr(skb); 1350 - struct request_sock *req; 1351 - struct sock *nsk; 1352 - 1353 - req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); 1354 - if (req) { 1355 - nsk = tcp_check_req(sk, skb, req, false); 1356 - if (!nsk || nsk == sk) 1357 - reqsk_put(req); 1358 - return nsk; 1359 - } 1360 - 1361 - nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, 1362 - th->source, iph->daddr, th->dest, inet_iif(skb)); 1363 - 1364 - if (nsk) { 1365 - if (nsk->sk_state != TCP_TIME_WAIT) { 1366 - bh_lock_sock(nsk); 1367 - return nsk; 1368 - } 1369 - inet_twsk_put(inet_twsk(nsk)); 1370 - return NULL; 1371 - } 1372 - 1373 1344 #ifdef CONFIG_SYN_COOKIES 1345 + const struct tcphdr *th = tcp_hdr(skb); 1346 + 1374 1347 if (!th->syn) 1375 1348 sk = cookie_v4_check(sk, skb); 1376 1349 #endif ··· 1355 1374 } 1356 1375 1357 1376 /* The socket must have it's spinlock held when we get 1358 - * here. 1377 + * here, unless it is a TCP_LISTEN socket. 1359 1378 * 1360 1379 * We have a potential double-lock case here, so even when 1361 1380 * doing backlog processing we use the BH locking scheme. ··· 1386 1405 goto csum_err; 1387 1406 1388 1407 if (sk->sk_state == TCP_LISTEN) { 1389 - struct sock *nsk = tcp_v4_hnd_req(sk, skb); 1408 + struct sock *nsk = tcp_v4_cookie_check(sk, skb); 1409 + 1390 1410 if (!nsk) 1391 1411 goto discard; 1392 - 1393 1412 if (nsk != sk) { 1394 1413 sock_rps_save_rxhash(nsk, skb); 1395 - sk_mark_napi_id(sk, skb); 1414 + sk_mark_napi_id(nsk, skb); 1396 1415 if (tcp_child_process(sk, nsk, skb)) { 1397 1416 rsk = nsk; 1398 1417 goto reset; ··· 1580 1599 if (sk->sk_state == TCP_TIME_WAIT) 1581 1600 goto do_time_wait; 1582 1601 1602 + if (sk->sk_state == TCP_NEW_SYN_RECV) { 1603 + struct request_sock *req = inet_reqsk(sk); 1604 + struct sock *nsk = NULL; 1605 + 1606 + sk = req->rsk_listener; 1607 + if (tcp_v4_inbound_md5_hash(sk, skb)) 1608 + goto discard_and_relse; 1609 + if (sk->sk_state == TCP_LISTEN) 1610 + nsk = tcp_check_req(sk, skb, req, false); 1611 + if (!nsk) { 1612 + reqsk_put(req); 1613 + goto discard_it; 1614 + } 1615 + if (nsk == sk) { 1616 + sock_hold(sk); 1617 + reqsk_put(req); 1618 + } else if (tcp_child_process(sk, nsk, skb)) { 1619 + tcp_v4_send_reset(nsk, skb); 1620 + goto discard_it; 1621 + } else { 1622 + return 0; 1623 + } 1624 + } 1583 1625 if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) { 1584 1626 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1585 1627 goto discard_and_relse; ··· 1611 1607 if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) 1612 1608 goto discard_and_relse; 1613 1609 1614 - #ifdef CONFIG_TCP_MD5SIG 1615 - /* 1616 - * We really want to reject the packet as early as possible 1617 - * if: 1618 - * o We're expecting an MD5'd packet and this is no MD5 tcp option 1619 - * o There is an MD5 option and we're not expecting one 1620 - */ 1621 1610 if (tcp_v4_inbound_md5_hash(sk, skb)) 1622 1611 goto discard_and_relse; 1623 - #endif 1624 1612 1625 1613 nf_reset(skb); 1626 1614 1627 1615 if (sk_filter(sk, skb)) 1628 1616 goto discard_and_relse; 1629 1617 1630 - sk_incoming_cpu_update(sk); 1631 1618 skb->dev = NULL; 1619 + 1620 + if (sk->sk_state == TCP_LISTEN) { 1621 + ret = tcp_v4_do_rcv(sk, skb); 1622 + goto put_and_return; 1623 + } 1624 + 1625 + sk_incoming_cpu_update(sk); 1632 1626 1633 1627 bh_lock_sock_nested(sk); 1634 1628 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ··· 1642 1640 } 1643 1641 bh_unlock_sock(sk); 1644 1642 1643 + put_and_return: 1645 1644 sock_put(sk); 1646 1645 1647 1646 return ret; ··· 1837 1834 ++st->num; 1838 1835 ++st->offset; 1839 1836 1840 - if (st->state == TCP_SEQ_STATE_OPENREQ) { 1841 - struct request_sock *req = cur; 1842 - 1843 - icsk = inet_csk(st->syn_wait_sk); 1844 - req = req->dl_next; 1845 - while (1) { 1846 - while (req) { 1847 - if (req->rsk_ops->family == st->family) { 1848 - cur = req; 1849 - goto out; 1850 - } 1851 - req = req->dl_next; 1852 - } 1853 - if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries) 1854 - break; 1855 - get_req: 1856 - req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; 1857 - } 1858 - sk = sk_nulls_next(st->syn_wait_sk); 1859 - st->state = TCP_SEQ_STATE_LISTENING; 1860 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1861 - } else { 1862 - icsk = inet_csk(sk); 1863 - spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1864 - if (reqsk_queue_len(&icsk->icsk_accept_queue)) 1865 - goto start_req; 1866 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1867 - sk = sk_nulls_next(sk); 1868 - } 1837 + sk = sk_nulls_next(sk); 1869 1838 get_sk: 1870 1839 sk_nulls_for_each_from(sk, node) { 1871 1840 if (!net_eq(sock_net(sk), net)) ··· 1847 1872 goto out; 1848 1873 } 1849 1874 icsk = inet_csk(sk); 1850 - spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1851 - if (reqsk_queue_len(&icsk->icsk_accept_queue)) { 1852 - start_req: 1853 - st->uid = sock_i_uid(sk); 1854 - st->syn_wait_sk = sk; 1855 - st->state = TCP_SEQ_STATE_OPENREQ; 1856 - st->sbucket = 0; 1857 - goto get_req; 1858 - } 1859 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 1860 1875 } 1861 1876 spin_unlock_bh(&ilb->lock); 1862 1877 st->offset = 0; ··· 1978 2013 void *rc = NULL; 1979 2014 1980 2015 switch (st->state) { 1981 - case TCP_SEQ_STATE_OPENREQ: 1982 2016 case TCP_SEQ_STATE_LISTENING: 1983 2017 if (st->bucket >= INET_LHTABLE_SIZE) 1984 2018 break; ··· 2036 2072 } 2037 2073 2038 2074 switch (st->state) { 2039 - case TCP_SEQ_STATE_OPENREQ: 2040 2075 case TCP_SEQ_STATE_LISTENING: 2041 2076 rc = listening_get_next(seq, v); 2042 2077 if (!rc) { ··· 2060 2097 struct tcp_iter_state *st = seq->private; 2061 2098 2062 2099 switch (st->state) { 2063 - case TCP_SEQ_STATE_OPENREQ: 2064 - if (v) { 2065 - struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); 2066 - spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); 2067 - } 2068 2100 case TCP_SEQ_STATE_LISTENING: 2069 2101 if (v != SEQ_START_TOKEN) 2070 2102 spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock); ··· 2113 2155 EXPORT_SYMBOL(tcp_proc_unregister); 2114 2156 2115 2157 static void get_openreq4(const struct request_sock *req, 2116 - struct seq_file *f, int i, kuid_t uid) 2158 + struct seq_file *f, int i) 2117 2159 { 2118 2160 const struct inet_request_sock *ireq = inet_rsk(req); 2119 2161 long delta = req->rsk_timer.expires - jiffies; ··· 2130 2172 1, /* timers active (only the expire timer) */ 2131 2173 jiffies_delta_to_clock_t(delta), 2132 2174 req->num_timeout, 2133 - from_kuid_munged(seq_user_ns(f), uid), 2175 + from_kuid_munged(seq_user_ns(f), 2176 + sock_i_uid(req->rsk_listener)), 2134 2177 0, /* non standard timer */ 2135 2178 0, /* open_requests have no inode */ 2136 2179 0, ··· 2232 2273 } 2233 2274 st = seq->private; 2234 2275 2235 - switch (st->state) { 2236 - case TCP_SEQ_STATE_LISTENING: 2237 - case TCP_SEQ_STATE_ESTABLISHED: 2238 - if (sk->sk_state == TCP_TIME_WAIT) 2239 - get_timewait4_sock(v, seq, st->num); 2240 - else 2241 - get_tcp4_sock(v, seq, st->num); 2242 - break; 2243 - case TCP_SEQ_STATE_OPENREQ: 2244 - get_openreq4(v, seq, st->num, st->uid); 2245 - break; 2246 - } 2276 + if (sk->sk_state == TCP_TIME_WAIT) 2277 + get_timewait4_sock(v, seq, st->num); 2278 + else if (sk->sk_state == TCP_NEW_SYN_RECV) 2279 + get_openreq4(v, seq, st->num); 2280 + else 2281 + get_tcp4_sock(v, seq, st->num); 2247 2282 out: 2248 2283 seq_pad(seq, '\n'); 2249 2284 return 0;

-2

net/ipv4/tcp_minisocks.c

··· 578 578 __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); 579 579 bool paws_reject = false; 580 580 581 - BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN)); 582 - 583 581 tmp_opt.saw_tstamp = 0; 584 582 if (th->doff > (sizeof(struct tcphdr)>>2)) { 585 583 tcp_parse_options(skb, &tmp_opt, 0, NULL);

+15 -7

net/ipv4/tcp_output.c

··· 2947 2947 */ 2948 2948 struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, 2949 2949 struct request_sock *req, 2950 - struct tcp_fastopen_cookie *foc) 2950 + struct tcp_fastopen_cookie *foc, 2951 + bool attach_req) 2951 2952 { 2952 2953 struct inet_request_sock *ireq = inet_rsk(req); 2953 2954 const struct tcp_sock *tp = tcp_sk(sk); ··· 2960 2959 u16 user_mss; 2961 2960 int mss; 2962 2961 2963 - /* sk is a const pointer, because we want to express multiple cpus 2964 - * might call us concurrently. 2965 - * sock_wmalloc() will change sk->sk_wmem_alloc in an atomic way. 2966 - */ 2967 - skb = sock_wmalloc((struct sock *)sk, MAX_TCP_HEADER, 1, GFP_ATOMIC); 2962 + skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 2968 2963 if (unlikely(!skb)) { 2969 2964 dst_release(dst); 2970 2965 return NULL; ··· 2968 2971 /* Reserve space for headers. */ 2969 2972 skb_reserve(skb, MAX_TCP_HEADER); 2970 2973 2974 + if (attach_req) { 2975 + skb->destructor = sock_edemux; 2976 + sock_hold(req_to_sk(req)); 2977 + skb->sk = req_to_sk(req); 2978 + } else { 2979 + /* sk is a const pointer, because we want to express multiple 2980 + * cpu might call us concurrently. 2981 + * sk->sk_wmem_alloc in an atomic, we can promote to rw. 2982 + */ 2983 + skb_set_owner_w(skb, (struct sock *)sk); 2984 + } 2971 2985 skb_dst_set(skb, dst); 2972 2986 2973 2987 mss = dst_metric_advmss(dst); ··· 3518 3510 int res; 3519 3511 3520 3512 tcp_rsk(req)->txhash = net_tx_rndhash(); 3521 - res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL); 3513 + res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL, true); 3522 3514 if (!res) { 3523 3515 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); 3524 3516 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);

-67

net/ipv6/inet6_connection_sock.c

··· 94 94 } 95 95 EXPORT_SYMBOL(inet6_csk_route_req); 96 96 97 - /* 98 - * request_sock (formerly open request) hash tables. 99 - */ 100 - static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, 101 - const u32 rnd, const u32 synq_hsize) 102 - { 103 - u32 c; 104 - 105 - c = jhash_3words((__force u32)raddr->s6_addr32[0], 106 - (__force u32)raddr->s6_addr32[1], 107 - (__force u32)raddr->s6_addr32[2], 108 - rnd); 109 - 110 - c = jhash_2words((__force u32)raddr->s6_addr32[3], 111 - (__force u32)rport, 112 - c); 113 - 114 - return c & (synq_hsize - 1); 115 - } 116 - 117 - struct request_sock *inet6_csk_search_req(struct sock *sk, 118 - const __be16 rport, 119 - const struct in6_addr *raddr, 120 - const struct in6_addr *laddr, 121 - const int iif) 122 - { 123 - struct inet_connection_sock *icsk = inet_csk(sk); 124 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 125 - struct request_sock *req; 126 - u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, 127 - lopt->nr_table_entries); 128 - 129 - spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); 130 - for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { 131 - const struct inet_request_sock *ireq = inet_rsk(req); 132 - 133 - if (ireq->ir_rmt_port == rport && 134 - req->rsk_ops->family == AF_INET6 && 135 - ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && 136 - ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && 137 - (!ireq->ir_iif || ireq->ir_iif == iif)) { 138 - atomic_inc(&req->rsk_refcnt); 139 - WARN_ON(req->sk != NULL); 140 - break; 141 - } 142 - } 143 - spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); 144 - 145 - return req; 146 - } 147 - EXPORT_SYMBOL_GPL(inet6_csk_search_req); 148 - 149 - void inet6_csk_reqsk_queue_hash_add(struct sock *sk, 150 - struct request_sock *req, 151 - const unsigned long timeout) 152 - { 153 - struct inet_connection_sock *icsk = inet_csk(sk); 154 - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; 155 - const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr, 156 - inet_rsk(req)->ir_rmt_port, 157 - lopt->hash_rnd, lopt->nr_table_entries); 158 - 159 - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); 160 - inet_csk_reqsk_queue_added(sk, timeout); 161 - } 162 - EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add); 163 - 164 97 void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) 165 98 { 166 99 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;

+60 -59

net/ipv6/tcp_ipv6.c

··· 438 438 struct flowi *fl, 439 439 struct request_sock *req, 440 440 u16 queue_mapping, 441 - struct tcp_fastopen_cookie *foc) 441 + struct tcp_fastopen_cookie *foc, 442 + bool attach_req) 442 443 { 443 444 struct inet_request_sock *ireq = inet_rsk(req); 444 445 struct ipv6_pinfo *np = inet6_sk(sk); ··· 452 451 IPPROTO_TCP)) == NULL) 453 452 goto done; 454 453 455 - skb = tcp_make_synack(sk, dst, req, foc); 454 + skb = tcp_make_synack(sk, dst, req, foc, attach_req); 456 455 457 456 if (skb) { 458 457 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, ··· 623 622 return 1; 624 623 } 625 624 626 - static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) 625 + #endif 626 + 627 + static bool tcp_v6_inbound_md5_hash(const struct sock *sk, 628 + const struct sk_buff *skb) 627 629 { 630 + #ifdef CONFIG_TCP_MD5SIG 628 631 const __u8 *hash_location = NULL; 629 632 struct tcp_md5sig_key *hash_expected; 630 633 const struct ipv6hdr *ip6h = ipv6_hdr(skb); ··· 665 660 &ip6h->daddr, ntohs(th->dest)); 666 661 return true; 667 662 } 663 + #endif 668 664 return false; 669 665 } 670 - #endif 671 666 672 667 static void tcp_v6_init_req(struct request_sock *req, 673 668 const struct sock *sk_listener, ··· 728 723 .route_req = tcp_v6_route_req, 729 724 .init_seq = tcp_v6_init_sequence, 730 725 .send_synack = tcp_v6_send_synack, 731 - .queue_hash_add = inet6_csk_reqsk_queue_hash_add, 732 726 }; 733 727 734 728 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, ··· 938 934 } 939 935 940 936 941 - static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) 937 + static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 942 938 { 943 - const struct tcphdr *th = tcp_hdr(skb); 944 - struct request_sock *req; 945 - struct sock *nsk; 946 - 947 - /* Find possible connection requests. */ 948 - req = inet6_csk_search_req(sk, th->source, 949 - &ipv6_hdr(skb)->saddr, 950 - &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); 951 - if (req) { 952 - nsk = tcp_check_req(sk, skb, req, false); 953 - if (!nsk || nsk == sk) 954 - reqsk_put(req); 955 - return nsk; 956 - } 957 - nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, 958 - &ipv6_hdr(skb)->saddr, th->source, 959 - &ipv6_hdr(skb)->daddr, ntohs(th->dest), 960 - tcp_v6_iif(skb)); 961 - 962 - if (nsk) { 963 - if (nsk->sk_state != TCP_TIME_WAIT) { 964 - bh_lock_sock(nsk); 965 - return nsk; 966 - } 967 - inet_twsk_put(inet_twsk(nsk)); 968 - return NULL; 969 - } 970 - 971 939 #ifdef CONFIG_SYN_COOKIES 940 + const struct tcphdr *th = tcp_hdr(skb); 941 + 972 942 if (!th->syn) 973 943 sk = cookie_v6_check(sk, skb); 974 944 #endif ··· 1161 1183 } 1162 1184 1163 1185 /* The socket must have it's spinlock held when we get 1164 - * here. 1186 + * here, unless it is a TCP_LISTEN socket. 1165 1187 * 1166 1188 * We have a potential double-lock case here, so even when 1167 1189 * doing backlog processing we use the BH locking scheme. ··· 1232 1254 goto csum_err; 1233 1255 1234 1256 if (sk->sk_state == TCP_LISTEN) { 1235 - struct sock *nsk = tcp_v6_hnd_req(sk, skb); 1257 + struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1258 + 1236 1259 if (!nsk) 1237 1260 goto discard; 1238 1261 1239 - /* 1240 - * Queue it on the new socket if the new socket is active, 1241 - * otherwise we just shortcircuit this and continue with 1242 - * the new socket.. 1243 - */ 1244 1262 if (nsk != sk) { 1245 1263 sock_rps_save_rxhash(nsk, skb); 1246 - sk_mark_napi_id(sk, skb); 1264 + sk_mark_napi_id(nsk, skb); 1247 1265 if (tcp_child_process(sk, nsk, skb)) 1248 1266 goto reset; 1249 1267 if (opt_skb) ··· 1372 1398 if (sk->sk_state == TCP_TIME_WAIT) 1373 1399 goto do_time_wait; 1374 1400 1401 + if (sk->sk_state == TCP_NEW_SYN_RECV) { 1402 + struct request_sock *req = inet_reqsk(sk); 1403 + struct sock *nsk = NULL; 1404 + 1405 + sk = req->rsk_listener; 1406 + tcp_v6_fill_cb(skb, hdr, th); 1407 + if (tcp_v6_inbound_md5_hash(sk, skb)) { 1408 + reqsk_put(req); 1409 + goto discard_it; 1410 + } 1411 + if (sk->sk_state == TCP_LISTEN) 1412 + nsk = tcp_check_req(sk, skb, req, false); 1413 + if (!nsk) { 1414 + reqsk_put(req); 1415 + goto discard_it; 1416 + } 1417 + if (nsk == sk) { 1418 + sock_hold(sk); 1419 + reqsk_put(req); 1420 + tcp_v6_restore_cb(skb); 1421 + } else if (tcp_child_process(sk, nsk, skb)) { 1422 + tcp_v6_send_reset(nsk, skb); 1423 + goto discard_it; 1424 + } else { 1425 + return 0; 1426 + } 1427 + } 1375 1428 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) { 1376 1429 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP); 1377 1430 goto discard_and_relse; ··· 1409 1408 1410 1409 tcp_v6_fill_cb(skb, hdr, th); 1411 1410 1412 - #ifdef CONFIG_TCP_MD5SIG 1413 1411 if (tcp_v6_inbound_md5_hash(sk, skb)) 1414 1412 goto discard_and_relse; 1415 - #endif 1416 1413 1417 1414 if (sk_filter(sk, skb)) 1418 1415 goto discard_and_relse; 1419 1416 1420 - sk_incoming_cpu_update(sk); 1421 1417 skb->dev = NULL; 1418 + 1419 + if (sk->sk_state == TCP_LISTEN) { 1420 + ret = tcp_v6_do_rcv(sk, skb); 1421 + goto put_and_return; 1422 + } 1423 + 1424 + sk_incoming_cpu_update(sk); 1422 1425 1423 1426 bh_lock_sock_nested(sk); 1424 1427 tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ··· 1438 1433 } 1439 1434 bh_unlock_sock(sk); 1440 1435 1436 + put_and_return: 1441 1437 sock_put(sk); 1442 1438 return ret ? -1 : 0; 1443 1439 ··· 1639 1633 #ifdef CONFIG_PROC_FS 1640 1634 /* Proc filesystem TCPv6 sock list dumping. */ 1641 1635 static void get_openreq6(struct seq_file *seq, 1642 - struct request_sock *req, int i, kuid_t uid) 1636 + const struct request_sock *req, int i) 1643 1637 { 1644 1638 long ttd = req->rsk_timer.expires - jiffies; 1645 1639 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; ··· 1663 1657 1, /* timers active (only the expire timer) */ 1664 1658 jiffies_to_clock_t(ttd), 1665 1659 req->num_timeout, 1666 - from_kuid_munged(seq_user_ns(seq), uid), 1660 + from_kuid_munged(seq_user_ns(seq), 1661 + sock_i_uid(req->rsk_listener)), 1667 1662 0, /* non standard timer */ 1668 1663 0, /* open_requests have no inode */ 1669 1664 0, req); ··· 1769 1762 } 1770 1763 st = seq->private; 1771 1764 1772 - switch (st->state) { 1773 - case TCP_SEQ_STATE_LISTENING: 1774 - case TCP_SEQ_STATE_ESTABLISHED: 1775 - if (sk->sk_state == TCP_TIME_WAIT) 1776 - get_timewait6_sock(seq, v, st->num); 1777 - else 1778 - get_tcp6_sock(seq, v, st->num); 1779 - break; 1780 - case TCP_SEQ_STATE_OPENREQ: 1781 - get_openreq6(seq, v, st->num, st->uid); 1782 - break; 1783 - } 1765 + if (sk->sk_state == TCP_TIME_WAIT) 1766 + get_timewait6_sock(seq, v, st->num); 1767 + else if (sk->sk_state == TCP_NEW_SYN_RECV) 1768 + get_openreq6(seq, v, st->num); 1769 + else 1770 + get_tcp6_sock(seq, v, st->num); 1784 1771 out: 1785 1772 return 0; 1786 1773 }

+7 -5

net/sched/sch_fq.c

··· 224 224 if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL)) 225 225 return &q->internal; 226 226 227 - /* SYNACK messages are attached to a listener socket. 228 - * 1) They are not part of a 'flow' yet 229 - * 2) We do not want to rate limit them (eg SYNFLOOD attack), 227 + /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket 228 + * 1) request sockets are not full blown, 229 + * they do not contain sk_pacing_rate 230 + * 2) They are not part of a 'flow' yet 231 + * 3) We do not want to rate limit them (eg SYNFLOOD attack), 230 232 * especially if the listener set SO_MAX_PACING_RATE 231 - * 3) We pretend they are orphaned 233 + * 4) We pretend they are orphaned 232 234 */ 233 - if (!sk || sk->sk_state == TCP_LISTEN) { 235 + if (!sk || sk->sk_state == TCP_NEW_SYN_RECV) { 234 236 unsigned long hash = skb_get_hash(skb) & q->orphan_mask; 235 237 236 238 /* By forcing low order bit to 1, we make sure to not