Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-remove-msk-subflow'

Matthieu Baerts says:

====================
mptcp: get rid of msk->subflow

The MPTCP protocol maintains an additional struct socket per connection,
mainly to be able to easily use tcp-level struct socket operations.

This leads to several side effects, beyond the quite unfortunate /
confusing 'subflow' field name:

- active and passive sockets behaviour is inconsistent: only active ones
have a not NULL msk->subflow, leading to different error handling and
different error code returned to the user-space in several places.

- active sockets uses an unneeded, larger amount of memory

- passive sockets can't successfully go through accept(), disconnect(),
accept() sequence, see [1] for more details.

The 13 first patches of this series are from Paolo and address all the
above, finally getting rid of the blamed field:

- The first patch is a minor clean-up.

- In the next 11 patches, msk->subflow usage is systematically removed
from the MPTCP protocol, replacing it with direct msk->first usage,
eventually introducing new core helpers when needed.

- The 13th patch finally disposes the field, and it's the only patch in
the series intended to produce functional changes.

The last and 14th patch is from Kuniyuki and it is not linked to the
previous ones: it is a small clean-up to get rid of an unnecessary check
in mptcp_init_sock().

[1] https://github.com/multipath-tcp/mptcp_net-next/issues/290
====================

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>

+186 -177
+2
include/net/inet_common.h
··· 40 40 int flags); 41 41 int inet_shutdown(struct socket *sock, int how); 42 42 int inet_listen(struct socket *sock, int backlog); 43 + int __inet_listen_sk(struct sock *sk, int backlog); 43 44 void inet_sock_destruct(struct sock *sk); 44 45 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 46 + int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); 45 47 /* Don't allocate port at this moment, defer to connect. */ 46 48 #define BIND_FORCE_ADDRESS_NO_PORT (1 << 0) 47 49 /* Grab and release socket lock. */
+1
include/net/ipv6.h
··· 1216 1216 void inet6_sock_destruct(struct sock *sk); 1217 1217 int inet6_release(struct socket *sock); 1218 1218 int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); 1219 + int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len); 1219 1220 int inet6_getname(struct socket *sock, struct sockaddr *uaddr, 1220 1221 int peer); 1221 1222 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+28 -18
net/ipv4/af_inet.c
··· 187 187 return 0; 188 188 } 189 189 190 - /* 191 - * Move a socket into listening state. 192 - */ 193 - int inet_listen(struct socket *sock, int backlog) 190 + int __inet_listen_sk(struct sock *sk, int backlog) 194 191 { 195 - struct sock *sk = sock->sk; 196 - unsigned char old_state; 192 + unsigned char old_state = sk->sk_state; 197 193 int err, tcp_fastopen; 198 194 199 - lock_sock(sk); 200 - 201 - err = -EINVAL; 202 - if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) 203 - goto out; 204 - 205 - old_state = sk->sk_state; 206 195 if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN))) 207 - goto out; 196 + return -EINVAL; 208 197 209 198 WRITE_ONCE(sk->sk_max_ack_backlog, backlog); 210 199 /* Really, if the socket is already in listen state ··· 216 227 217 228 err = inet_csk_listen_start(sk); 218 229 if (err) 219 - goto out; 230 + return err; 231 + 220 232 tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_LISTEN_CB, 0, NULL); 221 233 } 222 - err = 0; 234 + return 0; 235 + } 236 + 237 + /* 238 + * Move a socket into listening state. 239 + */ 240 + int inet_listen(struct socket *sock, int backlog) 241 + { 242 + struct sock *sk = sock->sk; 243 + int err = -EINVAL; 244 + 245 + lock_sock(sk); 246 + 247 + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) 248 + goto out; 249 + 250 + err = __inet_listen_sk(sk, backlog); 223 251 224 252 out: 225 253 release_sock(sk); ··· 437 431 } 438 432 EXPORT_SYMBOL(inet_release); 439 433 440 - int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 434 + int inet_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) 441 435 { 442 - struct sock *sk = sock->sk; 443 436 u32 flags = BIND_WITH_LOCK; 444 437 int err; 445 438 ··· 458 453 return err; 459 454 460 455 return __inet_bind(sk, uaddr, addr_len, flags); 456 + } 457 + 458 + int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 459 + { 460 + return inet_bind_sk(sock->sk, uaddr, addr_len); 461 461 } 462 462 EXPORT_SYMBOL(inet_bind); 463 463
+7 -3
net/ipv6/af_inet6.c
··· 435 435 goto out; 436 436 } 437 437 438 - /* bind for INET6 API */ 439 - int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 438 + int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) 440 439 { 441 - struct sock *sk = sock->sk; 442 440 u32 flags = BIND_WITH_LOCK; 443 441 const struct proto *prot; 444 442 int err = 0; ··· 459 461 return err; 460 462 461 463 return __inet6_bind(sk, uaddr, addr_len, flags); 464 + } 465 + 466 + /* bind for INET6 API */ 467 + int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 468 + { 469 + return inet6_bind_sk(sock->sk, uaddr, addr_len); 462 470 } 463 471 EXPORT_SYMBOL(inet6_bind); 464 472
+17 -13
net/mptcp/pm_netlink.c
··· 9 9 #include <linux/inet.h> 10 10 #include <linux/kernel.h> 11 11 #include <net/tcp.h> 12 + #include <net/inet_common.h> 12 13 #include <net/netns/generic.h> 13 14 #include <net/mptcp.h> 14 15 #include <net/genetlink.h> ··· 1006 1005 bool is_ipv6 = sk->sk_family == AF_INET6; 1007 1006 int addrlen = sizeof(struct sockaddr_in); 1008 1007 struct sockaddr_storage addr; 1009 - struct socket *ssock; 1010 - struct sock *newsk; 1008 + struct sock *newsk, *ssk; 1011 1009 int backlog = 1024; 1012 1010 int err; 1013 1011 ··· 1032 1032 &mptcp_keys[is_ipv6]); 1033 1033 1034 1034 lock_sock(newsk); 1035 - ssock = __mptcp_nmpc_socket(mptcp_sk(newsk)); 1035 + ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); 1036 1036 release_sock(newsk); 1037 - if (IS_ERR(ssock)) 1038 - return PTR_ERR(ssock); 1037 + if (IS_ERR(ssk)) 1038 + return PTR_ERR(ssk); 1039 1039 1040 1040 mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 1041 1041 #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1042 1042 if (entry->addr.family == AF_INET6) 1043 1043 addrlen = sizeof(struct sockaddr_in6); 1044 1044 #endif 1045 - err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen); 1045 + if (ssk->sk_family == AF_INET) 1046 + err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 1047 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 1048 + else if (ssk->sk_family == AF_INET6) 1049 + err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 1050 + #endif 1046 1051 if (err) 1047 1052 return err; 1048 1053 1049 1054 inet_sk_state_store(newsk, TCP_LISTEN); 1050 - err = kernel_listen(ssock, backlog); 1051 - if (err) 1052 - return err; 1053 - 1054 - mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED); 1055 - 1056 - return 0; 1055 + lock_sock(ssk); 1056 + err = __inet_listen_sk(ssk, backlog); 1057 + if (!err) 1058 + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 1059 + release_sock(ssk); 1060 + return err; 1057 1061 } 1058 1062 1059 1063 int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
+92 -102
net/mptcp/protocol.c
··· 92 92 93 93 msk->scaling_ratio = tcp_sk(ssock->sk)->scaling_ratio; 94 94 WRITE_ONCE(msk->first, ssock->sk); 95 - WRITE_ONCE(msk->subflow, ssock); 96 95 subflow = mptcp_subflow_ctx(ssock->sk); 97 96 list_add(&subflow->node, &msk->conn_list); 98 97 sock_hold(ssock->sk); ··· 101 102 /* This is the first subflow, always with id 0 */ 102 103 subflow->local_id_valid = 1; 103 104 mptcp_sock_graft(msk->first, sk->sk_socket); 105 + iput(SOCK_INODE(ssock)); 104 106 105 107 return 0; 106 108 } ··· 109 109 /* If the MPC handshake is not started, returns the first subflow, 110 110 * eventually allocating it. 111 111 */ 112 - struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk) 112 + struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk) 113 113 { 114 114 struct sock *sk = (struct sock *)msk; 115 115 int ret; ··· 117 117 if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) 118 118 return ERR_PTR(-EINVAL); 119 119 120 - if (!msk->subflow) { 121 - if (msk->first) 122 - return ERR_PTR(-EINVAL); 123 - 120 + if (!msk->first) { 124 121 ret = __mptcp_socket_create(msk); 125 122 if (ret) 126 123 return ERR_PTR(ret); ··· 125 128 mptcp_sockopt_sync(msk, msk->first); 126 129 } 127 130 128 - return msk->subflow; 131 + return msk->first; 129 132 } 130 133 131 134 static void mptcp_drop(struct sock *sk, struct sk_buff *skb) ··· 1640 1643 { 1641 1644 unsigned int saved_flags = msg->msg_flags; 1642 1645 struct mptcp_sock *msk = mptcp_sk(sk); 1643 - struct socket *ssock; 1644 1646 struct sock *ssk; 1645 1647 int ret; 1646 1648 ··· 1650 1654 * fastopen attempt, no need to check for additional subflow status. 1651 1655 */ 1652 1656 if (msg->msg_flags & MSG_FASTOPEN) { 1653 - ssock = __mptcp_nmpc_socket(msk); 1654 - if (IS_ERR(ssock)) 1655 - return PTR_ERR(ssock); 1657 + ssk = __mptcp_nmpc_sk(msk); 1658 + if (IS_ERR(ssk)) 1659 + return PTR_ERR(ssk); 1656 1660 } 1657 1661 if (!msk->first) 1658 1662 return -EINVAL; ··· 2238 2242 return min_stale_count > 1 ? backup : NULL; 2239 2243 } 2240 2244 2241 - static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) 2242 - { 2243 - if (msk->subflow) { 2244 - iput(SOCK_INODE(msk->subflow)); 2245 - WRITE_ONCE(msk->subflow, NULL); 2246 - } 2247 - } 2248 - 2249 2245 bool __mptcp_retransmit_pending_data(struct sock *sk) 2250 2246 { 2251 2247 struct mptcp_data_frag *cur, *rtx_head; ··· 2316 2328 goto out_release; 2317 2329 } 2318 2330 2319 - dispose_it = !msk->subflow || ssk != msk->subflow->sk; 2331 + dispose_it = msk->free_first || ssk != msk->first; 2320 2332 if (dispose_it) 2321 2333 list_del(&subflow->node); 2322 2334 ··· 2337 2349 * disconnect should never fail 2338 2350 */ 2339 2351 WARN_ON_ONCE(tcp_disconnect(ssk, 0)); 2340 - msk->subflow->state = SS_UNCONNECTED; 2341 2352 mptcp_subflow_ctx_reset(subflow); 2342 2353 release_sock(ssk); 2343 2354 ··· 2649 2662 sock_put(sk); 2650 2663 } 2651 2664 2652 - static int __mptcp_init_sock(struct sock *sk) 2665 + static void __mptcp_init_sock(struct sock *sk) 2653 2666 { 2654 2667 struct mptcp_sock *msk = mptcp_sk(sk); 2655 2668 ··· 2676 2689 /* re-use the csk retrans timer for MPTCP-level retrans */ 2677 2690 timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); 2678 2691 timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0); 2679 - 2680 - return 0; 2681 2692 } 2682 2693 2683 2694 static void mptcp_ca_reset(struct sock *sk) ··· 2693 2708 static int mptcp_init_sock(struct sock *sk) 2694 2709 { 2695 2710 struct net *net = sock_net(sk); 2696 - int ret; 2697 2711 2698 - ret = __mptcp_init_sock(sk); 2699 - if (ret) 2700 - return ret; 2712 + __mptcp_init_sock(sk); 2701 2713 2702 2714 if (!mptcp_is_enabled(net)) 2703 2715 return -ENOPROTOOPT; ··· 3092 3110 msk = mptcp_sk(nsk); 3093 3111 msk->local_key = subflow_req->local_key; 3094 3112 msk->token = subflow_req->token; 3095 - WRITE_ONCE(msk->subflow, NULL); 3096 3113 msk->in_accept_queue = 1; 3097 3114 WRITE_ONCE(msk->fully_established, false); 3098 3115 if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD) ··· 3155 3174 WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd); 3156 3175 } 3157 3176 3158 - static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, 3177 + static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err, 3159 3178 bool kern) 3160 3179 { 3161 - struct mptcp_sock *msk = mptcp_sk(sk); 3162 - struct socket *listener; 3163 3180 struct sock *newsk; 3164 3181 3165 - listener = READ_ONCE(msk->subflow); 3166 - if (WARN_ON_ONCE(!listener)) { 3167 - *err = -EINVAL; 3168 - return NULL; 3169 - } 3170 - 3171 - pr_debug("msk=%p, listener=%p", msk, mptcp_subflow_ctx(listener->sk)); 3172 - newsk = inet_csk_accept(listener->sk, flags, err, kern); 3182 + pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); 3183 + newsk = inet_csk_accept(ssk, flags, err, kern); 3173 3184 if (!newsk) 3174 3185 return NULL; 3175 3186 3176 - pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk)); 3187 + pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); 3177 3188 if (sk_is_mptcp(newsk)) { 3178 3189 struct mptcp_subflow_context *subflow; 3179 3190 struct sock *new_mptcp_sock; ··· 3182 3209 } 3183 3210 3184 3211 newsk = new_mptcp_sock; 3185 - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK); 3212 + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); 3186 3213 } else { 3187 - MPTCP_INC_STATS(sock_net(sk), 3214 + MPTCP_INC_STATS(sock_net(ssk), 3188 3215 MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); 3189 3216 } 3190 3217 ··· 3225 3252 { 3226 3253 struct mptcp_sock *msk = mptcp_sk(sk); 3227 3254 3228 - /* clears msk->subflow, allowing the following to close 3229 - * even the initial subflow 3230 - */ 3231 - mptcp_dispose_initial_subflow(msk); 3255 + /* allow the following to close even the initial subflow */ 3256 + msk->free_first = 1; 3232 3257 mptcp_destroy_common(msk, 0); 3233 3258 sk_sockets_allocated_dec(sk); 3234 3259 } ··· 3376 3405 static int mptcp_get_port(struct sock *sk, unsigned short snum) 3377 3406 { 3378 3407 struct mptcp_sock *msk = mptcp_sk(sk); 3379 - struct socket *ssock; 3380 3408 3381 - ssock = msk->subflow; 3382 - pr_debug("msk=%p, subflow=%p", msk, ssock); 3383 - if (WARN_ON_ONCE(!ssock)) 3409 + pr_debug("msk=%p, ssk=%p", msk, msk->first); 3410 + if (WARN_ON_ONCE(!msk->first)) 3384 3411 return -EINVAL; 3385 3412 3386 - return inet_csk_get_port(ssock->sk, snum); 3413 + return inet_csk_get_port(msk->first, snum); 3387 3414 } 3388 3415 3389 3416 void mptcp_finish_connect(struct sock *ssk) ··· 3556 3587 { 3557 3588 struct mptcp_subflow_context *subflow; 3558 3589 struct mptcp_sock *msk = mptcp_sk(sk); 3559 - struct socket *ssock; 3560 3590 int err = -EINVAL; 3591 + struct sock *ssk; 3561 3592 3562 - ssock = __mptcp_nmpc_socket(msk); 3563 - if (IS_ERR(ssock)) 3564 - return PTR_ERR(ssock); 3593 + ssk = __mptcp_nmpc_sk(msk); 3594 + if (IS_ERR(ssk)) 3595 + return PTR_ERR(ssk); 3565 3596 3566 - mptcp_token_destroy(msk); 3567 3597 inet_sk_state_store(sk, TCP_SYN_SENT); 3568 - subflow = mptcp_subflow_ctx(ssock->sk); 3598 + subflow = mptcp_subflow_ctx(ssk); 3569 3599 #ifdef CONFIG_TCP_MD5SIG 3570 3600 /* no MPTCP if MD5SIG is enabled on this socket or we may run out of 3571 3601 * TCP option space. 3572 3602 */ 3573 - if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info)) 3603 + if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info)) 3574 3604 mptcp_subflow_early_fallback(msk, subflow); 3575 3605 #endif 3576 - if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) { 3577 - MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); 3606 + if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) { 3607 + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); 3578 3608 mptcp_subflow_early_fallback(msk, subflow); 3579 3609 } 3580 3610 if (likely(!__mptcp_check_fallback(msk))) ··· 3582 3614 /* if reaching here via the fastopen/sendmsg path, the caller already 3583 3615 * acquired the subflow socket lock, too. 3584 3616 */ 3585 - if (msk->fastopening) 3586 - err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1); 3587 - else 3588 - err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK); 3589 - inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect; 3617 + if (!msk->fastopening) 3618 + lock_sock(ssk); 3619 + 3620 + /* the following mirrors closely a very small chunk of code from 3621 + * __inet_stream_connect() 3622 + */ 3623 + if (ssk->sk_state != TCP_CLOSE) 3624 + goto out; 3625 + 3626 + if (BPF_CGROUP_PRE_CONNECT_ENABLED(ssk)) { 3627 + err = ssk->sk_prot->pre_connect(ssk, uaddr, addr_len); 3628 + if (err) 3629 + goto out; 3630 + } 3631 + 3632 + err = ssk->sk_prot->connect(ssk, uaddr, addr_len); 3633 + if (err < 0) 3634 + goto out; 3635 + 3636 + inet_sk(sk)->defer_connect = inet_sk(ssk)->defer_connect; 3637 + 3638 + out: 3639 + if (!msk->fastopening) 3640 + release_sock(ssk); 3590 3641 3591 3642 /* on successful connect, the msk state will be moved to established by 3592 3643 * subflow_finish_connect() 3593 3644 */ 3594 - if (unlikely(err && err != -EINPROGRESS)) { 3595 - inet_sk_state_store(sk, inet_sk_state_load(ssock->sk)); 3645 + if (unlikely(err)) { 3646 + /* avoid leaving a dangling token in an unconnected socket */ 3647 + mptcp_token_destroy(msk); 3648 + inet_sk_state_store(sk, TCP_CLOSE); 3596 3649 return err; 3597 3650 } 3598 3651 3599 - mptcp_copy_inaddrs(sk, ssock->sk); 3600 - 3601 - /* silence EINPROGRESS and let the caller inet_stream_connect 3602 - * handle the connection in progress 3603 - */ 3652 + mptcp_copy_inaddrs(sk, ssk); 3604 3653 return 0; 3605 3654 } 3606 3655 ··· 3658 3673 static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 3659 3674 { 3660 3675 struct mptcp_sock *msk = mptcp_sk(sock->sk); 3661 - struct socket *ssock; 3662 - int err; 3676 + struct sock *ssk, *sk = sock->sk; 3677 + int err = -EINVAL; 3663 3678 3664 - lock_sock(sock->sk); 3665 - ssock = __mptcp_nmpc_socket(msk); 3666 - if (IS_ERR(ssock)) { 3667 - err = PTR_ERR(ssock); 3679 + lock_sock(sk); 3680 + ssk = __mptcp_nmpc_sk(msk); 3681 + if (IS_ERR(ssk)) { 3682 + err = PTR_ERR(ssk); 3668 3683 goto unlock; 3669 3684 } 3670 3685 3671 - err = READ_ONCE(ssock->ops)->bind(ssock, uaddr, addr_len); 3686 + if (sk->sk_family == AF_INET) 3687 + err = inet_bind_sk(ssk, uaddr, addr_len); 3688 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 3689 + else if (sk->sk_family == AF_INET6) 3690 + err = inet6_bind_sk(ssk, uaddr, addr_len); 3691 + #endif 3672 3692 if (!err) 3673 - mptcp_copy_inaddrs(sock->sk, ssock->sk); 3693 + mptcp_copy_inaddrs(sk, ssk); 3674 3694 3675 3695 unlock: 3676 - release_sock(sock->sk); 3696 + release_sock(sk); 3677 3697 return err; 3678 3698 } 3679 3699 ··· 3686 3696 { 3687 3697 struct mptcp_sock *msk = mptcp_sk(sock->sk); 3688 3698 struct sock *sk = sock->sk; 3689 - struct socket *ssock; 3699 + struct sock *ssk; 3690 3700 int err; 3691 3701 3692 3702 pr_debug("msk=%p", msk); ··· 3697 3707 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM) 3698 3708 goto unlock; 3699 3709 3700 - ssock = __mptcp_nmpc_socket(msk); 3701 - if (IS_ERR(ssock)) { 3702 - err = PTR_ERR(ssock); 3710 + ssk = __mptcp_nmpc_sk(msk); 3711 + if (IS_ERR(ssk)) { 3712 + err = PTR_ERR(ssk); 3703 3713 goto unlock; 3704 3714 } 3705 3715 3706 - mptcp_token_destroy(msk); 3707 3716 inet_sk_state_store(sk, TCP_LISTEN); 3708 3717 sock_set_flag(sk, SOCK_RCU_FREE); 3709 3718 3710 - err = READ_ONCE(ssock->ops)->listen(ssock, backlog); 3711 - inet_sk_state_store(sk, inet_sk_state_load(ssock->sk)); 3719 + lock_sock(ssk); 3720 + err = __inet_listen_sk(ssk, backlog); 3721 + release_sock(ssk); 3722 + inet_sk_state_store(sk, inet_sk_state_load(ssk)); 3723 + 3712 3724 if (!err) { 3713 3725 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 3714 - mptcp_copy_inaddrs(sk, ssock->sk); 3715 - mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED); 3726 + mptcp_copy_inaddrs(sk, ssk); 3727 + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 3716 3728 } 3717 3729 3718 3730 unlock: ··· 3726 3734 int flags, bool kern) 3727 3735 { 3728 3736 struct mptcp_sock *msk = mptcp_sk(sock->sk); 3729 - struct socket *ssock; 3730 - struct sock *newsk; 3737 + struct sock *ssk, *newsk; 3731 3738 int err; 3732 3739 3733 3740 pr_debug("msk=%p", msk); ··· 3734 3743 /* Buggy applications can call accept on socket states other then LISTEN 3735 3744 * but no need to allocate the first subflow just to error out. 3736 3745 */ 3737 - ssock = READ_ONCE(msk->subflow); 3738 - if (!ssock) 3746 + ssk = READ_ONCE(msk->first); 3747 + if (!ssk) 3739 3748 return -EINVAL; 3740 3749 3741 - newsk = mptcp_accept(sock->sk, flags, &err, kern); 3750 + newsk = mptcp_accept(ssk, flags, &err, kern); 3742 3751 if (!newsk) 3743 3752 return err; 3744 3753 ··· 3765 3774 /* Do late cleanup for the first subflow as necessary. Also 3766 3775 * deal with bad peers not doing a complete shutdown. 3767 3776 */ 3768 - if (msk->first && 3769 - unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { 3777 + if (unlikely(inet_sk_state_load(msk->first) == TCP_CLOSE)) { 3770 3778 __mptcp_close_ssk(newsk, msk->first, 3771 3779 mptcp_subflow_ctx(msk->first), 0); 3772 - if (unlikely(list_empty(&msk->conn_list))) 3780 + if (unlikely(list_is_singular(&msk->conn_list))) 3773 3781 inet_sk_state_store(newsk, TCP_CLOSE); 3774 3782 } 3775 3783 } ··· 3807 3817 state = inet_sk_state_load(sk); 3808 3818 pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags); 3809 3819 if (state == TCP_LISTEN) { 3810 - struct socket *ssock = READ_ONCE(msk->subflow); 3820 + struct sock *ssk = READ_ONCE(msk->first); 3811 3821 3812 - if (WARN_ON_ONCE(!ssock || !ssock->sk)) 3822 + if (WARN_ON_ONCE(!ssk)) 3813 3823 return 0; 3814 3824 3815 - return inet_csk_listen_poll(ssock->sk); 3825 + return inet_csk_listen_poll(ssk); 3816 3826 } 3817 3827 3818 3828 shutdown = READ_ONCE(sk->sk_shutdown);
+7 -8
net/mptcp/protocol.h
··· 299 299 cork:1, 300 300 nodelay:1, 301 301 fastopening:1, 302 - in_accept_queue:1; 302 + in_accept_queue:1, 303 + free_first:1; 303 304 struct work_struct work; 304 305 struct sk_buff *ooo_last_skb; 305 306 struct rb_root out_of_order_queue; ··· 309 308 struct list_head rtx_queue; 310 309 struct mptcp_data_frag *first_pending; 311 310 struct list_head join_list; 312 - struct socket *subflow; /* outgoing connect/listener/!mp_capable 313 - * The mptcp ops can safely dereference, using suitable 314 - * ONCE annotation, the subflow outside the socket 315 - * lock as such sock is freed after close(). 316 - */ 317 - struct sock *first; 311 + struct sock *first; /* The mptcp ops can safely dereference, using suitable 312 + * ONCE annotation, the subflow outside the socket 313 + * lock as such sock is freed after close(). 314 + */ 318 315 struct mptcp_pm_data pm; 319 316 struct { 320 317 u32 space; /* bytes copied in last measurement window */ ··· 639 640 void mptcp_subflow_reset(struct sock *ssk); 640 641 void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk); 641 642 void mptcp_sock_graft(struct sock *sk, struct socket *parent); 642 - struct socket *__mptcp_nmpc_socket(struct mptcp_sock *msk); 643 + struct sock *__mptcp_nmpc_sk(struct mptcp_sock *msk); 643 644 bool __mptcp_close(struct sock *sk, long timeout); 644 645 void mptcp_cancel_work(struct sock *sk); 645 646 void __mptcp_unaccepted_force_close(struct sock *sk);
+32 -33
net/mptcp/sockopt.c
··· 292 292 sockptr_t optval, unsigned int optlen) 293 293 { 294 294 struct sock *sk = (struct sock *)msk; 295 - struct socket *ssock; 295 + struct sock *ssk; 296 296 int ret; 297 297 298 298 switch (optname) { ··· 301 301 case SO_BINDTODEVICE: 302 302 case SO_BINDTOIFINDEX: 303 303 lock_sock(sk); 304 - ssock = __mptcp_nmpc_socket(msk); 305 - if (IS_ERR(ssock)) { 304 + ssk = __mptcp_nmpc_sk(msk); 305 + if (IS_ERR(ssk)) { 306 306 release_sock(sk); 307 - return PTR_ERR(ssock); 307 + return PTR_ERR(ssk); 308 308 } 309 309 310 - ret = sock_setsockopt(ssock, SOL_SOCKET, optname, optval, optlen); 310 + ret = sk_setsockopt(ssk, SOL_SOCKET, optname, optval, optlen); 311 311 if (ret == 0) { 312 312 if (optname == SO_REUSEPORT) 313 - sk->sk_reuseport = ssock->sk->sk_reuseport; 313 + sk->sk_reuseport = ssk->sk_reuseport; 314 314 else if (optname == SO_REUSEADDR) 315 - sk->sk_reuse = ssock->sk->sk_reuse; 315 + sk->sk_reuse = ssk->sk_reuse; 316 316 else if (optname == SO_BINDTODEVICE) 317 - sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 317 + sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 318 318 else if (optname == SO_BINDTOIFINDEX) 319 - sk->sk_bound_dev_if = ssock->sk->sk_bound_dev_if; 319 + sk->sk_bound_dev_if = ssk->sk_bound_dev_if; 320 320 } 321 321 release_sock(sk); 322 322 return ret; ··· 390 390 { 391 391 struct sock *sk = (struct sock *)msk; 392 392 int ret = -EOPNOTSUPP; 393 - struct socket *ssock; 393 + struct sock *ssk; 394 394 395 395 switch (optname) { 396 396 case IPV6_V6ONLY: 397 397 case IPV6_TRANSPARENT: 398 398 case IPV6_FREEBIND: 399 399 lock_sock(sk); 400 - ssock = __mptcp_nmpc_socket(msk); 401 - if (IS_ERR(ssock)) { 400 + ssk = __mptcp_nmpc_sk(msk); 401 + if (IS_ERR(ssk)) { 402 402 release_sock(sk); 403 - return PTR_ERR(ssock); 403 + return PTR_ERR(ssk); 404 404 } 405 405 406 - ret = tcp_setsockopt(ssock->sk, SOL_IPV6, optname, optval, optlen); 406 + ret = tcp_setsockopt(ssk, SOL_IPV6, optname, optval, optlen); 407 407 if (ret != 0) { 408 408 release_sock(sk); 409 409 return ret; ··· 413 413 414 414 switch (optname) { 415 415 case IPV6_V6ONLY: 416 - sk->sk_ipv6only = ssock->sk->sk_ipv6only; 416 + sk->sk_ipv6only = ssk->sk_ipv6only; 417 417 break; 418 418 case IPV6_TRANSPARENT: 419 - inet_sk(sk)->transparent = inet_sk(ssock->sk)->transparent; 419 + inet_sk(sk)->transparent = inet_sk(ssk)->transparent; 420 420 break; 421 421 case IPV6_FREEBIND: 422 - inet_sk(sk)->freebind = inet_sk(ssock->sk)->freebind; 422 + inet_sk(sk)->freebind = inet_sk(ssk)->freebind; 423 423 break; 424 424 } 425 425 ··· 685 685 { 686 686 struct sock *sk = (struct sock *)msk; 687 687 struct inet_sock *issk; 688 - struct socket *ssock; 688 + struct sock *ssk; 689 689 int err; 690 690 691 691 err = ip_setsockopt(sk, SOL_IP, optname, optval, optlen); ··· 694 694 695 695 lock_sock(sk); 696 696 697 - ssock = __mptcp_nmpc_socket(msk); 698 - if (IS_ERR(ssock)) { 697 + ssk = __mptcp_nmpc_sk(msk); 698 + if (IS_ERR(ssk)) { 699 699 release_sock(sk); 700 - return PTR_ERR(ssock); 700 + return PTR_ERR(ssk); 701 701 } 702 702 703 - issk = inet_sk(ssock->sk); 703 + issk = inet_sk(ssk); 704 704 705 705 switch (optname) { 706 706 case IP_FREEBIND: ··· 763 763 sockptr_t optval, unsigned int optlen) 764 764 { 765 765 struct sock *sk = (struct sock *)msk; 766 - struct socket *sock; 766 + struct sock *ssk; 767 767 int ret; 768 768 769 769 /* Limit to first subflow, before the connection establishment */ 770 770 lock_sock(sk); 771 - sock = __mptcp_nmpc_socket(msk); 772 - if (IS_ERR(sock)) { 773 - ret = PTR_ERR(sock); 771 + ssk = __mptcp_nmpc_sk(msk); 772 + if (IS_ERR(ssk)) { 773 + ret = PTR_ERR(ssk); 774 774 goto unlock; 775 775 } 776 776 777 - ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen); 777 + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); 778 778 779 779 unlock: 780 780 release_sock(sk); ··· 864 864 char __user *optval, int __user *optlen) 865 865 { 866 866 struct sock *sk = (struct sock *)msk; 867 - struct socket *ssock; 868 - int ret; 869 867 struct sock *ssk; 868 + int ret; 870 869 871 870 lock_sock(sk); 872 871 ssk = msk->first; ··· 874 875 goto out; 875 876 } 876 877 877 - ssock = __mptcp_nmpc_socket(msk); 878 - if (IS_ERR(ssock)) { 879 - ret = PTR_ERR(ssock); 878 + ssk = __mptcp_nmpc_sk(msk); 879 + if (IS_ERR(ssk)) { 880 + ret = PTR_ERR(ssk); 880 881 goto out; 881 882 } 882 883 883 - ret = tcp_getsockopt(ssock->sk, level, optname, optval, optlen); 884 + ret = tcp_getsockopt(ssk, level, optname, optval, optlen); 884 885 885 886 out: 886 887 release_sock(sk);