Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mptcp: enable JOIN requests even if cookies are in use

JOIN requests do not work in syncookie mode -- for HMAC validation, the
peers nonce and the mptcp token (to obtain the desired connection socket
the join is for) are required, but this information is only present in the
initial syn.

So either we need to drop all JOIN requests once a listening socket enters
syncookie mode, or we need to store enough state to reconstruct the request
socket later.

This adds a state table (1024 entries) to store the data present in the
MP_JOIN syn request and the random nonce used for the cookie syn/ack.

When a MP_JOIN ACK passed cookie validation, the table is consulted
to rebuild the request socket from it.

An alternate approach would be to "cancel" syn-cookie mode and force
MP_JOIN to always use a syn queue entry.

However, doing so brings the backlog over the configured queue limit.

v2: use req->syncookie, not (removed) want_cookie arg

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Florian Westphal and committed by
David S. Miller
9466a1cc 6fc8c827

+174
+6
net/ipv4/syncookies.c
··· 212 212 refcount_set(&req->rsk_refcnt, 1); 213 213 tcp_sk(child)->tsoffset = tsoff; 214 214 sock_rps_save_rxhash(child, skb); 215 + 216 + if (tcp_rsk(req)->drop_req) { 217 + refcount_set(&req->rsk_refcnt, 2); 218 + return child; 219 + } 220 + 215 221 if (inet_csk_reqsk_queue_add(sk, req, child)) 216 222 return child; 217 223
+1
net/mptcp/Makefile
··· 4 4 mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ 5 5 mib.o pm_netlink.o 6 6 7 + obj-$(CONFIG_SYN_COOKIES) += syncookies.o 7 8 obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o 8 9 9 10 mptcp_crypto_test-objs := crypto_test.o
+1
net/mptcp/ctrl.c
··· 112 112 113 113 void __init mptcp_init(void) 114 114 { 115 + mptcp_join_cookie_init(); 115 116 mptcp_proto_init(); 116 117 117 118 if (register_pernet_subsys(&mptcp_pernet_ops) < 0)
+20
net/mptcp/protocol.h
··· 506 506 !subflow->conn_finished; 507 507 } 508 508 509 + #ifdef CONFIG_SYN_COOKIES 510 + void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, 511 + struct sk_buff *skb); 512 + bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, 513 + struct sk_buff *skb); 514 + void __init mptcp_join_cookie_init(void); 515 + #else 516 + static inline void 517 + subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, 518 + struct sk_buff *skb) {} 519 + static inline bool 520 + mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, 521 + struct sk_buff *skb) 522 + { 523 + return false; 524 + } 525 + 526 + static inline void mptcp_join_cookie_init(void) {} 527 + #endif 528 + 509 529 #endif /* __MPTCP_PROTOCOL_H */
+14
net/mptcp/subflow.c
··· 173 173 subflow_req->token = mp_opt.token; 174 174 subflow_req->remote_nonce = mp_opt.nonce; 175 175 subflow_req->msk = subflow_token_join_request(req, skb); 176 + 177 + if (unlikely(req->syncookie) && subflow_req->msk) { 178 + if (mptcp_can_accept_new_subflow(subflow_req->msk)) 179 + subflow_init_req_cookie_join_save(subflow_req, skb); 180 + } 181 + 176 182 pr_debug("token=%u, remote_nonce=%u msk=%p", subflow_req->token, 177 183 subflow_req->remote_nonce, subflow_req->msk); 178 184 } ··· 212 206 return err; 213 207 214 208 subflow_req->mp_capable = 1; 209 + subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 210 + } else if (mp_opt.mp_join && listener->request_mptcp) { 211 + if (!mptcp_token_join_cookie_init_state(subflow_req, skb)) 212 + return -EINVAL; 213 + 214 + if (mptcp_can_accept_new_subflow(subflow_req->msk)) 215 + subflow_req->mp_join = 1; 216 + 215 217 subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq - 1; 216 218 } 217 219
+132
net/mptcp/syncookies.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/skbuff.h> 3 + 4 + #include "protocol.h" 5 + 6 + /* Syncookies do not work for JOIN requests. 7 + * 8 + * Unlike MP_CAPABLE, where the ACK cookie contains the needed MPTCP 9 + * options to reconstruct the initial syn state, MP_JOIN does not contain 10 + * the token to obtain the mptcp socket nor the server-generated nonce 11 + * that was used in the cookie SYN/ACK response. 12 + * 13 + * Keep a small best effort state table to store the syn/synack data, 14 + * indexed by skb hash. 15 + * 16 + * A MP_JOIN SYN packet handled by syn cookies is only stored if the 32bit 17 + * token matches a known mptcp connection that can still accept more subflows. 18 + * 19 + * There is no timeout handling -- state is only re-constructed 20 + * when the TCP ACK passed the cookie validation check. 21 + */ 22 + 23 + struct join_entry { 24 + u32 token; 25 + u32 remote_nonce; 26 + u32 local_nonce; 27 + u8 join_id; 28 + u8 local_id; 29 + u8 backup; 30 + u8 valid; 31 + }; 32 + 33 + #define COOKIE_JOIN_SLOTS 1024 34 + 35 + static struct join_entry join_entries[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp; 36 + static spinlock_t join_entry_locks[COOKIE_JOIN_SLOTS] __cacheline_aligned_in_smp; 37 + 38 + static u32 mptcp_join_entry_hash(struct sk_buff *skb, struct net *net) 39 + { 40 + u32 i = skb_get_hash(skb) ^ net_hash_mix(net); 41 + 42 + return i % ARRAY_SIZE(join_entries); 43 + } 44 + 45 + static void mptcp_join_store_state(struct join_entry *entry, 46 + const struct mptcp_subflow_request_sock *subflow_req) 47 + { 48 + entry->token = subflow_req->token; 49 + entry->remote_nonce = subflow_req->remote_nonce; 50 + entry->local_nonce = subflow_req->local_nonce; 51 + entry->backup = subflow_req->backup; 52 + entry->join_id = subflow_req->remote_id; 53 + entry->local_id = subflow_req->local_id; 54 + entry->valid = 1; 55 + } 56 + 57 + void subflow_init_req_cookie_join_save(const struct mptcp_subflow_request_sock *subflow_req, 58 + struct sk_buff *skb) 59 + { 60 + struct net *net = read_pnet(&subflow_req->sk.req.ireq_net); 61 + u32 i = mptcp_join_entry_hash(skb, net); 62 + 63 + /* No use in waiting if other cpu is already using this slot -- 64 + * would overwrite the data that got stored. 65 + */ 66 + spin_lock_bh(&join_entry_locks[i]); 67 + mptcp_join_store_state(&join_entries[i], subflow_req); 68 + spin_unlock_bh(&join_entry_locks[i]); 69 + } 70 + 71 + /* Called for a cookie-ack with MP_JOIN option present. 72 + * Look up the saved state based on skb hash & check token matches msk 73 + * in same netns. 74 + * 75 + * Caller will check msk can still accept another subflow. The hmac 76 + * present in the cookie ACK mptcp option space will be checked later. 77 + */ 78 + bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subflow_req, 79 + struct sk_buff *skb) 80 + { 81 + struct net *net = read_pnet(&subflow_req->sk.req.ireq_net); 82 + u32 i = mptcp_join_entry_hash(skb, net); 83 + struct mptcp_sock *msk; 84 + struct join_entry *e; 85 + 86 + e = &join_entries[i]; 87 + 88 + spin_lock_bh(&join_entry_locks[i]); 89 + 90 + if (e->valid == 0) { 91 + spin_unlock_bh(&join_entry_locks[i]); 92 + return false; 93 + } 94 + 95 + e->valid = 0; 96 + 97 + msk = mptcp_token_get_sock(e->token); 98 + if (!msk) { 99 + spin_unlock_bh(&join_entry_locks[i]); 100 + return false; 101 + } 102 + 103 + /* If this fails, the token got re-used in the mean time by another 104 + * mptcp socket in a different netns, i.e. entry is outdated. 105 + */ 106 + if (!net_eq(sock_net((struct sock *)msk), net)) 107 + goto err_put; 108 + 109 + subflow_req->remote_nonce = e->remote_nonce; 110 + subflow_req->local_nonce = e->local_nonce; 111 + subflow_req->backup = e->backup; 112 + subflow_req->remote_id = e->join_id; 113 + subflow_req->token = e->token; 114 + subflow_req->msk = msk; 115 + spin_unlock_bh(&join_entry_locks[i]); 116 + return true; 117 + 118 + err_put: 119 + spin_unlock_bh(&join_entry_locks[i]); 120 + sock_put((struct sock *)msk); 121 + return false; 122 + } 123 + 124 + void __init mptcp_join_cookie_init(void) 125 + { 126 + int i; 127 + 128 + for (i = 0; i < ARRAY_SIZE(join_entry_locks); i++) 129 + spin_lock_init(&join_entry_locks[i]); 130 + 131 + BUILD_BUG_ON(ARRAY_SIZE(join_entry_locks) != ARRAY_SIZE(join_entries)); 132 + }