Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-fix-MP_JOIN-failure-handling'

Paolo Abeni says:

====================
mptcp: fix MP_JOIN failure handling

Currently if we hit an MP_JOIN failure on the third ack, the child socket is
closed with reset, but the request socket is not deleted, causing weird
behaviors.

The main problem is that MPTCP's MP_JOIN code needs to plug it's own
'valid 3rd ack' checks and the current TCP callbacks do not allow that.

This series tries to address the above shortcoming introducing a new MPTCP
specific bit in a 'struct tcp_request_sock' hole, and leveraging that to allow
tcp_check_req releasing the request socket when needed.

The above allows cleaning-up a bit current MPTCP hooking in tcp_check_req().

An alternative solution, possibly cleaner but more invasive, would be
changing the 'bool *own_req' syn_recv_sock() argument into 'int *req_status'
and let MPTCP set it to 'REQ_DROP'.

v1 -> v2:
- be more conservative about drop_req initialization

RFC -> v1:
- move the drop_req bit inside tcp_request_sock (Eric)
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+35 -26
+3
include/linux/tcp.h
··· 120 120 u64 snt_synack; /* first SYNACK sent time */ 121 121 bool tfo_listener; 122 122 bool is_mptcp; 123 + #if IS_ENABLED(CONFIG_MPTCP) 124 + bool drop_req; 125 + #endif 123 126 u32 txhash; 124 127 u32 rcv_isn; 125 128 u32 snt_isn;
+8
include/net/inet_connection_sock.h
··· 287 287 void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); 288 288 void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); 289 289 290 + static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) 291 + { 292 + /* The below has to be done to allow calling inet_csk_destroy_sock */ 293 + sock_set_flag(sk, SOCK_DEAD); 294 + percpu_counter_inc(sk->sk_prot->orphan_count); 295 + inet_sk(sk)->inet_num = 0; 296 + } 297 + 290 298 void inet_csk_destroy_sock(struct sock *sk); 291 299 void inet_csk_prepare_forced_close(struct sock *sk); 292 300
+10 -7
include/net/mptcp.h
··· 68 68 return tcp_rsk(req)->is_mptcp; 69 69 } 70 70 71 + static inline bool rsk_drop_req(const struct request_sock *req) 72 + { 73 + return tcp_rsk(req)->is_mptcp && tcp_rsk(req)->drop_req; 74 + } 75 + 71 76 void mptcp_space(const struct sock *ssk, int *space, int *full_space); 72 77 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, 73 78 unsigned int *size, struct mptcp_out_options *opts); ··· 126 121 skb_ext_find(from, SKB_EXT_MPTCP)); 127 122 } 128 123 129 - bool mptcp_sk_is_subflow(const struct sock *sk); 130 - 131 124 void mptcp_seq_show(struct seq_file *seq); 132 125 #else 133 126 ··· 139 136 } 140 137 141 138 static inline bool rsk_is_mptcp(const struct request_sock *req) 139 + { 140 + return false; 141 + } 142 + 143 + static inline bool rsk_drop_req(const struct request_sock *req) 142 144 { 143 145 return false; 144 146 } ··· 196 188 const struct sk_buff *from) 197 189 { 198 190 return true; 199 - } 200 - 201 - static inline bool mptcp_sk_is_subflow(const struct sock *sk) 202 - { 203 - return false; 204 191 } 205 192 206 193 static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { }
+1 -5
net/ipv4/inet_connection_sock.c
··· 896 896 /* sk_clone_lock locked the socket and set refcnt to 2 */ 897 897 bh_unlock_sock(sk); 898 898 sock_put(sk); 899 - 900 - /* The below has to be done to allow calling inet_csk_destroy_sock */ 901 - sock_set_flag(sk, SOCK_DEAD); 902 - percpu_counter_inc(sk->sk_prot->orphan_count); 903 - inet_sk(sk)->inet_num = 0; 899 + inet_csk_prepare_for_destroy_sock(sk); 904 900 } 905 901 EXPORT_SYMBOL(inet_csk_prepare_forced_close); 906 902
+1 -1
net/ipv4/tcp_minisocks.c
··· 774 774 if (!child) 775 775 goto listen_overflow; 776 776 777 - if (own_req && sk_is_mptcp(child) && mptcp_sk_is_subflow(child)) { 777 + if (own_req && rsk_drop_req(req)) { 778 778 reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); 779 779 inet_csk_reqsk_queue_drop_and_put(sk, req); 780 780 return child;
-7
net/mptcp/protocol.c
··· 1638 1638 return ret; 1639 1639 } 1640 1640 1641 - bool mptcp_sk_is_subflow(const struct sock *sk) 1642 - { 1643 - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 1644 - 1645 - return subflow->mp_join == 1; 1646 - } 1647 - 1648 1641 static bool mptcp_memory_free(const struct sock *sk, int wake) 1649 1642 { 1650 1643 struct mptcp_sock *msk = mptcp_sk(sk);
+12 -6
net/mptcp/subflow.c
··· 470 470 if (child && *own_req) { 471 471 struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); 472 472 473 + tcp_rsk(req)->drop_req = false; 474 + 473 475 /* we need to fallback on ctx allocation failure and on pre-reqs 474 476 * checking above. In the latter scenario we additionally need 475 477 * to reset the context to non MPTCP status. 476 478 */ 477 479 if (!ctx || fallback) { 478 480 if (fallback_is_fatal) 479 - goto close_child; 481 + goto dispose_child; 480 482 481 483 if (ctx) { 482 484 subflow_ulp_fallback(child, ctx); ··· 507 505 508 506 owner = mptcp_token_get_sock(ctx->token); 509 507 if (!owner) 510 - goto close_child; 508 + goto dispose_child; 511 509 512 510 ctx->conn = (struct sock *)owner; 513 511 if (!mptcp_finish_join(child)) 514 - goto close_child; 512 + goto dispose_child; 515 513 516 514 SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); 515 + tcp_rsk(req)->drop_req = true; 517 516 } 518 517 } 519 518 ··· 531 528 !mptcp_subflow_ctx(child)->conn)); 532 529 return child; 533 530 534 - close_child: 531 + dispose_child: 532 + tcp_rsk(req)->drop_req = true; 535 533 tcp_send_active_reset(child, GFP_ATOMIC); 536 - inet_csk_prepare_forced_close(child); 534 + inet_csk_prepare_for_destroy_sock(child); 537 535 tcp_done(child); 538 - return NULL; 536 + 537 + /* The last child reference will be released by the caller */ 538 + return child; 539 539 } 540 540 541 541 static struct inet_connection_sock_af_ops subflow_specific;