Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

mptcp: faster active backup recovery

The msk can use backup subflows to transmit in-sequence data
only if there are no other active subflow. On active backup
scenario, the MPTCP connection can do forward progress only
due to MPTCP retransmissions - rtx can pick backup subflows.

This patch introduces a new flag flow MPTCP subflows: if the
underlying TCP connection made no progresses for long time,
and there are other less problematic subflows available, the
given subflow become stale.

Stale subflows are not considered active: if all non backup
subflows become stale, the MPTCP scheduler can pick backup
subflows for plain transmissions.

Stale subflows can return in active state, as soon as any reply
from the peer is observed.

Active backup scenarios can now leverage the available b/w
with no restrinction.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/207
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Paolo Abeni and committed by
David S. Miller
ff5a0b42 6da14d74

+100 -5
+12
Documentation/networking/mptcp-sysctl.rst
··· 45 45 This is a per-namespace sysctl. 46 46 47 47 Default: 1 48 + 49 + stale_loss_cnt - INTEGER 50 + The number of MPTCP-level retransmission intervals with no traffic and 51 + pending outstanding data on a given subflow required to declare it stale. 52 + The packet scheduler ignores stale subflows. 53 + A low stale_loss_cnt value allows for fast active-backup switch-over, 54 + an high value maximize links utilization on edge scenarios e.g. lossy 55 + link with high BER or peer pausing the data processing. 56 + 57 + This is a per-namespace sysctl. 58 + 59 + Default: 4
+14
net/mptcp/ctrl.c
··· 22 22 #endif 23 23 24 24 unsigned int add_addr_timeout; 25 + unsigned int stale_loss_cnt; 25 26 u8 mptcp_enabled; 26 27 u8 checksum_enabled; 27 28 u8 allow_join_initial_addr_port; ··· 53 52 return mptcp_get_pernet(net)->allow_join_initial_addr_port; 54 53 } 55 54 55 + unsigned int mptcp_stale_loss_cnt(const struct net *net) 56 + { 57 + return mptcp_get_pernet(net)->stale_loss_cnt; 58 + } 59 + 56 60 static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) 57 61 { 58 62 pernet->mptcp_enabled = 1; 59 63 pernet->add_addr_timeout = TCP_RTO_MAX; 60 64 pernet->checksum_enabled = 0; 61 65 pernet->allow_join_initial_addr_port = 1; 66 + pernet->stale_loss_cnt = 4; 62 67 } 63 68 64 69 #ifdef CONFIG_SYSCTL ··· 102 95 .extra1 = SYSCTL_ZERO, 103 96 .extra2 = SYSCTL_ONE 104 97 }, 98 + { 99 + .procname = "stale_loss_cnt", 100 + .maxlen = sizeof(unsigned int), 101 + .mode = 0644, 102 + .proc_handler = proc_douintvec_minmax, 103 + }, 105 104 {} 106 105 }; 107 106 ··· 127 114 table[1].data = &pernet->add_addr_timeout; 128 115 table[2].data = &pernet->checksum_enabled; 129 116 table[3].data = &pernet->allow_join_initial_addr_port; 117 + table[4].data = &pernet->stale_loss_cnt; 130 118 131 119 hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); 132 120 if (!hdr)
+2
net/mptcp/pm.c
··· 320 320 } else if (subflow->stale_rcv_tstamp == rcv_tstamp) { 321 321 if (subflow->stale_count < U8_MAX) 322 322 subflow->stale_count++; 323 + mptcp_pm_nl_subflow_chk_stale(msk, ssk); 323 324 } else { 324 325 subflow->stale_count = 0; 326 + mptcp_subflow_set_active(subflow); 325 327 } 326 328 } 327 329
+38
net/mptcp/pm_netlink.c
··· 46 46 spinlock_t lock; 47 47 struct list_head local_addr_list; 48 48 unsigned int addrs; 49 + unsigned int stale_loss_cnt; 49 50 unsigned int add_addr_signal_max; 50 51 unsigned int add_addr_accept_max; 51 52 unsigned int local_addr_max; ··· 899 898 [MPTCP_PM_ATTR_RCV_ADD_ADDRS] = { .type = NLA_U32, }, 900 899 [MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, }, 901 900 }; 901 + 902 + void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 903 + { 904 + struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); 905 + struct sock *sk = (struct sock *)msk; 906 + unsigned int active_max_loss_cnt; 907 + struct net *net = sock_net(sk); 908 + unsigned int stale_loss_cnt; 909 + bool slow; 910 + 911 + stale_loss_cnt = mptcp_stale_loss_cnt(net); 912 + if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) 913 + return; 914 + 915 + /* look for another available subflow not in loss state */ 916 + active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); 917 + mptcp_for_each_subflow(msk, iter) { 918 + if (iter != subflow && mptcp_subflow_active(iter) && 919 + iter->stale_count < active_max_loss_cnt) { 920 + /* we have some alternatives, try to mark this subflow as idle ...*/ 921 + slow = lock_sock_fast(ssk); 922 + if (!tcp_rtx_and_write_queues_empty(ssk)) { 923 + subflow->stale = 1; 924 + __mptcp_retransmit_pending_data(sk); 925 + } 926 + unlock_sock_fast(ssk, slow); 927 + 928 + /* always try to push the pending data regarless of re-injections: 929 + * we can possibly use backup subflows now, and subflow selection 930 + * is cheap under the msk socket lock 931 + */ 932 + __mptcp_push_pending(sk, 0); 933 + return; 934 + } 935 + } 936 + } 902 937 903 938 static int mptcp_pm_family_to_addr(int family) 904 939 { ··· 1959 1922 1960 1923 INIT_LIST_HEAD_RCU(&pernet->local_addr_list); 1961 1924 pernet->next_id = 1; 1925 + pernet->stale_loss_cnt = 4; 1962 1926 spin_lock_init(&pernet->lock); 1963 1927 1964 1928 /* No need to initialize other pernet fields, the struct is zeroed at
+24 -3
net/mptcp/protocol.c
··· 1391 1391 u64 ratio; 1392 1392 }; 1393 1393 1394 + void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow) 1395 + { 1396 + if (!subflow->stale) 1397 + return; 1398 + 1399 + subflow->stale = 0; 1400 + } 1401 + 1402 + bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) 1403 + { 1404 + if (unlikely(subflow->stale)) { 1405 + u32 rcv_tstamp = READ_ONCE(tcp_sk(mptcp_subflow_tcp_sock(subflow))->rcv_tstamp); 1406 + 1407 + if (subflow->stale_rcv_tstamp == rcv_tstamp) 1408 + return false; 1409 + 1410 + mptcp_subflow_set_active(subflow); 1411 + } 1412 + return __mptcp_subflow_active(subflow); 1413 + } 1414 + 1394 1415 /* implement the mptcp packet scheduler; 1395 1416 * returns the subflow that will transmit the next DSS 1396 1417 * additionally updates the rtx timeout ··· 1493 1472 release_sock(ssk); 1494 1473 } 1495 1474 1496 - static void __mptcp_push_pending(struct sock *sk, unsigned int flags) 1475 + void __mptcp_push_pending(struct sock *sk, unsigned int flags) 1497 1476 { 1498 1477 struct sock *prev_ssk = NULL, *ssk = NULL; 1499 1478 struct mptcp_sock *msk = mptcp_sk(sk); ··· 2136 2115 * 2137 2116 * A backup subflow is returned only if that is the only kind available. 2138 2117 */ 2139 - static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) 2118 + static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk) 2140 2119 { 2141 2120 struct sock *backup = NULL, *pick = NULL; 2142 2121 struct mptcp_subflow_context *subflow; ··· 2150 2129 mptcp_for_each_subflow(msk, subflow) { 2151 2130 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 2152 2131 2153 - if (!mptcp_subflow_active(subflow)) 2132 + if (!__mptcp_subflow_active(subflow)) 2154 2133 continue; 2155 2134 2156 2135 /* still data outstanding at TCP level? skip this */
+10 -2
net/mptcp/protocol.h
··· 432 432 send_mp_prio : 1, 433 433 rx_eof : 1, 434 434 can_ack : 1, /* only after processing the remote a key */ 435 - disposable : 1; /* ctx can be free at ulp release time */ 435 + disposable : 1, /* ctx can be free at ulp release time */ 436 + stale : 1; /* unable to snd/rcv data, do not use for xmit */ 436 437 enum mptcp_data_avail data_avail; 437 438 u32 remote_nonce; 438 439 u64 thmac; ··· 561 560 unsigned int mptcp_get_add_addr_timeout(const struct net *net); 562 561 int mptcp_is_checksum_enabled(const struct net *net); 563 562 int mptcp_allow_join_id0(const struct net *net); 563 + unsigned int mptcp_stale_loss_cnt(const struct net *net); 564 564 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, 565 565 struct mptcp_options_received *mp_opt); 566 566 bool __mptcp_retransmit_pending_data(struct sock *sk); 567 + void __mptcp_push_pending(struct sock *sk, unsigned int flags); 567 568 bool mptcp_subflow_data_available(struct sock *sk); 568 569 void __init mptcp_subflow_init(void); 569 570 void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); ··· 584 581 struct sockaddr_storage *addr, 585 582 unsigned short family); 586 583 587 - static inline bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) 584 + static inline bool __mptcp_subflow_active(struct mptcp_subflow_context *subflow) 588 585 { 589 586 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 590 587 ··· 595 592 /* only send if our side has not closed yet */ 596 593 return ((1 << ssk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)); 597 594 } 595 + 596 + void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow); 597 + 598 + bool mptcp_subflow_active(struct mptcp_subflow_context *subflow); 598 599 599 600 static inline void mptcp_subflow_tcp_fallback(struct sock *sk, 600 601 struct mptcp_subflow_context *ctx) ··· 706 699 void __init mptcp_pm_init(void); 707 700 void mptcp_pm_data_init(struct mptcp_sock *msk); 708 701 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); 702 + void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); 709 703 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); 710 704 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk, gfp_t gfp); 711 705 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk);