Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-userspace-path-manager-prerequisites'

Mat Martineau says:

====================
mptcp: Userspace path manager prerequisites

This series builds upon the path manager mode selection changes merged
in 4994d4fa99ba ("Merge branch 'mptcp-path-manager-mode-selection'") to
further modify the path manager code in preparation for adding the new
netlink commands to announce/remove advertised addresses and
create/destroy subflows of an MPTCP connection. The third and final
patch series for the userspace path manager will implement those
commands as discussed in
https://lore.kernel.org/netdev/23ff3b49-2563-1874-fa35-3af55d3088e7@linux.intel.com/#r

Patches 1, 5, and 7 remove some internal constraints on path managers
(in general) without changing in-kernel PM behavior.

Patch 2 adds a self test to validate MPTCP address advertisement ack
behavior.

Patches 3, 4, and 6 add new attributes to existing MPTCP netlink events
and track internal state for populating those attributes.
====================

Link: https://lore.kernel.org/r/20220502205237.129297-1-mathew.j.martineau@linux.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+81 -20
+1
include/uapi/linux/mptcp.h
··· 188 188 MPTCP_ATTR_IF_IDX, /* s32 */ 189 189 MPTCP_ATTR_RESET_REASON,/* u32 */ 190 190 MPTCP_ATTR_RESET_FLAGS, /* u32 */ 191 + MPTCP_ATTR_SERVER_SIDE, /* u8 */ 191 192 192 193 __MPTCP_ATTR_AFTER_LAST 193 194 };
+2 -2
net/mptcp/options.c
··· 931 931 if (TCP_SKB_CB(skb)->seq == subflow->ssn_offset + 1 && 932 932 TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq && 933 933 subflow->mp_join && (mp_opt->suboptions & OPTIONS_MPTCP_MPJ) && 934 - READ_ONCE(msk->pm.server_side)) 934 + !subflow->request_join) 935 935 tcp_send_ack(ssk); 936 936 goto fully_established; 937 937 } ··· 1133 1133 if ((mp_opt.suboptions & OPTION_MPTCP_ADD_ADDR) && 1134 1134 add_addr_hmac_valid(msk, &mp_opt)) { 1135 1135 if (!mp_opt.echo) { 1136 - mptcp_pm_add_addr_received(msk, &mp_opt.addr); 1136 + mptcp_pm_add_addr_received(sk, &mp_opt.addr); 1137 1137 MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDR); 1138 1138 } else { 1139 1139 mptcp_pm_add_addr_echoed(msk, &mp_opt.addr);
+17 -4
net/mptcp/pm.c
··· 87 87 unsigned int subflows_max; 88 88 int ret = 0; 89 89 90 + if (mptcp_pm_is_userspace(msk)) 91 + return mptcp_userspace_pm_active(msk); 92 + 90 93 subflows_max = mptcp_pm_get_subflows_max(msk); 91 94 92 95 pr_debug("msk=%p subflows=%d max=%d allow=%d", msk, pm->subflows, ··· 182 179 bool update_subflows; 183 180 184 181 update_subflows = (ssk->sk_state == TCP_CLOSE) && 185 - (subflow->request_join || subflow->mp_join); 182 + (subflow->request_join || subflow->mp_join) && 183 + mptcp_pm_is_kernel(msk); 186 184 if (!READ_ONCE(pm->work_pending) && !update_subflows) 187 185 return; 188 186 ··· 200 196 spin_unlock_bh(&pm->lock); 201 197 } 202 198 203 - void mptcp_pm_add_addr_received(struct mptcp_sock *msk, 199 + void mptcp_pm_add_addr_received(const struct sock *ssk, 204 200 const struct mptcp_addr_info *addr) 205 201 { 202 + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 203 + struct mptcp_sock *msk = mptcp_sk(subflow->conn); 206 204 struct mptcp_pm_data *pm = &msk->pm; 207 205 208 206 pr_debug("msk=%p remote_id=%d accept=%d", msk, addr->id, 209 207 READ_ONCE(pm->accept_addr)); 210 208 211 - mptcp_event_addr_announced(msk, addr); 209 + mptcp_event_addr_announced(ssk, addr); 212 210 213 211 spin_lock_bh(&pm->lock); 214 212 215 - if (!READ_ONCE(pm->accept_addr) || mptcp_pm_is_userspace(msk)) { 213 + if (mptcp_pm_is_userspace(msk)) { 214 + if (mptcp_userspace_pm_active(msk)) { 215 + mptcp_pm_announce_addr(msk, addr, true); 216 + mptcp_pm_add_addr_send_ack(msk); 217 + } else { 218 + __MPTCP_INC_STATS(sock_net((struct sock *)msk), MPTCP_MIB_ADDADDRDROP); 219 + } 220 + } else if (!READ_ONCE(pm->accept_addr)) { 216 221 mptcp_pm_announce_addr(msk, addr, true); 217 222 mptcp_pm_add_addr_send_ack(msk); 218 223 } else if (mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_RECEIVED)) {
+31 -5
net/mptcp/pm_netlink.c
··· 369 369 370 370 lockdep_assert_held(&msk->pm.lock); 371 371 372 - if (mptcp_lookup_anno_list_by_saddr(msk, &entry->addr)) 373 - return false; 372 + add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr); 373 + 374 + if (add_entry) { 375 + if (mptcp_pm_is_kernel(msk)) 376 + return false; 377 + 378 + sk_reset_timer(sk, &add_entry->add_timer, 379 + jiffies + mptcp_get_add_addr_timeout(net)); 380 + return true; 381 + } 374 382 375 383 add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); 376 384 if (!add_entry) ··· 811 803 } 812 804 __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); 813 805 if (!removed) 806 + continue; 807 + 808 + if (!mptcp_pm_is_kernel(msk)) 814 809 continue; 815 810 816 811 if (rm_type == MPTCP_MIB_RMADDR) { ··· 1866 1855 nlskb, 0, MPTCP_PM_EV_GRP_OFFSET, gfp); 1867 1856 } 1868 1857 1858 + bool mptcp_userspace_pm_active(const struct mptcp_sock *msk) 1859 + { 1860 + return genl_has_listeners(&mptcp_genl_family, 1861 + sock_net((const struct sock *)msk), 1862 + MPTCP_PM_EV_GRP_OFFSET); 1863 + } 1864 + 1869 1865 static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk) 1870 1866 { 1871 1867 const struct inet_sock *issk = inet_sk(ssk); ··· 1993 1975 if (err) 1994 1976 return err; 1995 1977 1978 + if (nla_put_u8(skb, MPTCP_ATTR_SERVER_SIDE, READ_ONCE(msk->pm.server_side))) 1979 + return -EMSGSIZE; 1980 + 1996 1981 return mptcp_event_add_subflow(skb, ssk); 1997 1982 } 1998 1983 ··· 2030 2009 kfree_skb(skb); 2031 2010 } 2032 2011 2033 - void mptcp_event_addr_announced(const struct mptcp_sock *msk, 2012 + void mptcp_event_addr_announced(const struct sock *ssk, 2034 2013 const struct mptcp_addr_info *info) 2035 2014 { 2036 - struct net *net = sock_net((const struct sock *)msk); 2015 + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); 2016 + struct mptcp_sock *msk = mptcp_sk(subflow->conn); 2017 + struct net *net = sock_net(ssk); 2037 2018 struct nlmsghdr *nlh; 2038 2019 struct sk_buff *skb; 2039 2020 ··· 2057 2034 if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, info->id)) 2058 2035 goto nla_put_failure; 2059 2036 2060 - if (nla_put_be16(skb, MPTCP_ATTR_DPORT, info->port)) 2037 + if (nla_put_be16(skb, MPTCP_ATTR_DPORT, 2038 + info->port == 0 ? 2039 + inet_sk(ssk)->inet_dport : 2040 + info->port)) 2061 2041 goto nla_put_failure; 2062 2042 2063 2043 switch (info->family) {
+1 -4
net/mptcp/protocol.c
··· 3321 3321 return false; 3322 3322 } 3323 3323 3324 - if (!msk->pm.server_side) 3324 + if (!list_empty(&subflow->node)) 3325 3325 goto out; 3326 3326 3327 3327 if (!mptcp_pm_allow_new_subflow(msk)) 3328 - goto err_prohibited; 3329 - 3330 - if (WARN_ON_ONCE(!list_empty(&subflow->node))) 3331 3328 goto err_prohibited; 3332 3329 3333 3330 /* active connections are already on conn_list.
+14 -4
net/mptcp/protocol.h
··· 753 753 bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk); 754 754 void mptcp_pm_subflow_check_next(struct mptcp_sock *msk, const struct sock *ssk, 755 755 const struct mptcp_subflow_context *subflow); 756 - void mptcp_pm_add_addr_received(struct mptcp_sock *msk, 756 + void mptcp_pm_add_addr_received(const struct sock *ssk, 757 757 const struct mptcp_addr_info *addr); 758 758 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, 759 759 const struct mptcp_addr_info *addr); ··· 782 782 783 783 void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, 784 784 const struct sock *ssk, gfp_t gfp); 785 - void mptcp_event_addr_announced(const struct mptcp_sock *msk, const struct mptcp_addr_info *info); 785 + void mptcp_event_addr_announced(const struct sock *ssk, const struct mptcp_addr_info *info); 786 786 void mptcp_event_addr_removed(const struct mptcp_sock *msk, u8 id); 787 + bool mptcp_userspace_pm_active(const struct mptcp_sock *msk); 787 788 788 789 static inline bool mptcp_pm_should_add_signal(struct mptcp_sock *msk) 789 790 { ··· 810 809 static inline bool mptcp_pm_is_userspace(const struct mptcp_sock *msk) 811 810 { 812 811 return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_USERSPACE; 812 + } 813 + 814 + static inline bool mptcp_pm_is_kernel(const struct mptcp_sock *msk) 815 + { 816 + return READ_ONCE(msk->pm.pm_type) == MPTCP_PM_TYPE_KERNEL; 813 817 } 814 818 815 819 static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) ··· 911 905 return false; 912 906 } 913 907 908 + static inline bool is_active_ssk(struct mptcp_subflow_context *subflow) 909 + { 910 + return (subflow->request_mptcp || subflow->request_join); 911 + } 912 + 914 913 static inline bool subflow_simultaneous_connect(struct sock *sk) 915 914 { 916 915 struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); 917 - struct sock *parent = subflow->conn; 918 916 919 917 return sk->sk_state == TCP_ESTABLISHED && 920 - !mptcp_sk(parent)->pm.server_side && 918 + is_active_ssk(subflow) && 921 919 !subflow->conn_finished; 922 920 } 923 921
+4 -1
net/mptcp/subflow.c
··· 62 62 static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk) 63 63 { 64 64 return mptcp_is_fully_established((void *)msk) && 65 - READ_ONCE(msk->pm.accept_subflow); 65 + ((mptcp_pm_is_userspace(msk) && 66 + mptcp_userspace_pm_active(msk)) || 67 + READ_ONCE(msk->pm.accept_subflow)); 66 68 } 67 69 68 70 /* validate received token and create truncated hmac and nonce for SYN-ACK */ ··· 443 441 subflow->backup = mp_opt.backup; 444 442 subflow->thmac = mp_opt.thmac; 445 443 subflow->remote_nonce = mp_opt.nonce; 444 + subflow->remote_id = mp_opt.join_id; 446 445 pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d", 447 446 subflow, subflow->thmac, subflow->remote_nonce, 448 447 subflow->backup);
+11
tools/testing/selftests/net/mptcp/mptcp_join.sh
··· 2719 2719 chk_add_nr 0 0 2720 2720 fi 2721 2721 2722 + # userspace pm type does not echo add_addr without daemon 2723 + if reset "userspace pm no echo w/o daemon"; then 2724 + set_userspace_pm $ns2 2725 + pm_nl_set_limits $ns1 0 2 2726 + pm_nl_set_limits $ns2 0 2 2727 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 2728 + run_tests $ns1 $ns2 10.0.1.1 2729 + chk_join_nr 0 0 0 2730 + chk_add_nr 1 0 2731 + fi 2732 + 2722 2733 # userspace pm type rejects join 2723 2734 if reset "userspace pm type rejects join"; then 2724 2735 set_userspace_pm $ns1