Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-fix-endpoints-with-signal-and-subflow-flags'

Matthieu Baerts says:

====================
mptcp: fix endpoints with 'signal' and 'subflow' flags

When looking at improving the user experience around the MPTCP endpoints
setup, I noticed that setting an endpoint with both the 'signal' and the
'subflow' flags -- as it has been done in the past by users according to
bug reports we got -- was resulting on only announcing the endpoint, but
not using it to create subflows: the 'subflow' flag was then ignored.

My initial thought was to modify IPRoute2 to warn the user when the two
flags were set, but it doesn't sound normal to ignore one of them. I
then looked at modifying the kernel not to allow having the two flags
set, but when discussing about that with Mat, we thought it was maybe
not ideal to do that, as there might be use-cases, we might break some
configs. Then I saw it was working before v5.17. So instead, I fixed the
support on the kernel side (patch 5) using Paolo's suggestion. This also
includes a fix on the options side (patch 1: for v5.11+), an explicit
deny of some options combinations (patch 2: for v5.18+), and some
refactoring (patches 3 and 4) to ease the inclusion of the patch 5.

While at it, I added a new selftest (patch 7) to validate this case --
including a modification of the chk_add_nr helper to inverse the sides
were the counters are checked (patch 6) -- and allowed ADD_ADDR echo
just after the MP_JOIN 3WHS.

The selftests modification have the same Fixes tag as the previous
commit, but no 'Cc: Stable': if the backport can work, that's good --
but it still need to be verified by running the selftests -- if not, no
need to worry, many CIs will use the selftests from the last stable
version to validate previous stable releases.
====================

Link: https://patch.msgid.link/20240731-upstream-net-20240731-mptcp-endp-subflow-signal-v1-0-c8a9b036493b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+73 -32
+2 -1
net/mptcp/options.c
··· 958 958 959 959 if (subflow->remote_key_valid && 960 960 (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || 961 - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { 961 + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && 962 + (!mp_opt->echo || subflow->mp_join)))) { 962 963 /* subflows are fully established as soon as we get any 963 964 * additional ack, including ADD_ADDR. 964 965 */
+30 -17
net/mptcp/pm_netlink.c
··· 348 348 add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 349 349 350 350 if (add_entry) { 351 - if (mptcp_pm_is_kernel(msk)) 351 + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) 352 352 return false; 353 353 354 354 sk_reset_timer(sk, &add_entry->add_timer, ··· 512 512 513 513 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 514 514 { 515 + struct mptcp_pm_addr_entry *local, *signal_and_subflow = NULL; 515 516 struct sock *sk = (struct sock *)msk; 516 - struct mptcp_pm_addr_entry *local; 517 517 unsigned int add_addr_signal_max; 518 518 unsigned int local_addr_max; 519 519 struct pm_nl_pernet *pernet; ··· 555 555 556 556 /* check first for announce */ 557 557 if (msk->pm.add_addr_signaled < add_addr_signal_max) { 558 - local = select_signal_address(pernet, msk); 559 - 560 558 /* due to racing events on both ends we can reach here while 561 559 * previous add address is still running: if we invoke now 562 560 * mptcp_pm_announce_addr(), that will fail and the ··· 565 567 if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 566 568 return; 567 569 568 - if (local) { 569 - if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { 570 - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 571 - msk->pm.add_addr_signaled++; 572 - mptcp_pm_announce_addr(msk, &local->addr, false); 573 - mptcp_pm_nl_addr_send_ack(msk); 574 - } 575 - } 570 + local = select_signal_address(pernet, msk); 571 + if (!local) 572 + goto subflow; 573 + 574 + /* If the alloc fails, we are on memory pressure, not worth 575 + * continuing, and trying to create subflows. 576 + */ 577 + if (!mptcp_pm_alloc_anno_list(msk, &local->addr)) 578 + return; 579 + 580 + __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); 581 + msk->pm.add_addr_signaled++; 582 + mptcp_pm_announce_addr(msk, &local->addr, false); 583 + mptcp_pm_nl_addr_send_ack(msk); 584 + 585 + if (local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) 586 + signal_and_subflow = local; 576 587 } 577 588 589 + subflow: 578 590 /* check if should create a new subflow */ 579 591 while (msk->pm.local_addr_used < local_addr_max && 580 592 msk->pm.subflows < subflows_max) { ··· 592 584 bool fullmesh; 593 585 int i, nr; 594 586 595 - local = select_local_address(pernet, msk); 596 - if (!local) 597 - break; 587 + if (signal_and_subflow) { 588 + local = signal_and_subflow; 589 + signal_and_subflow = NULL; 590 + } else { 591 + local = select_local_address(pernet, msk); 592 + if (!local) 593 + break; 594 + } 598 595 599 596 fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 600 597 ··· 1341 1328 if (ret < 0) 1342 1329 return ret; 1343 1330 1344 - if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { 1345 - GENL_SET_ERR_MSG(info, "flags must have signal when using port"); 1331 + if (addr.addr.port && !address_use_port(&addr)) { 1332 + GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); 1346 1333 return -EINVAL; 1347 1334 } 1348 1335
+41 -14
tools/testing/selftests/net/mptcp/mptcp_join.sh
··· 1415 1415 local add_nr=$1 1416 1416 local echo_nr=$2 1417 1417 local port_nr=${3:-0} 1418 - local syn_nr=${4:-$port_nr} 1419 - local syn_ack_nr=${5:-$port_nr} 1420 - local ack_nr=${6:-$port_nr} 1421 - local mis_syn_nr=${7:-0} 1422 - local mis_ack_nr=${8:-0} 1418 + local ns_invert=${4:-""} 1419 + local syn_nr=$port_nr 1420 + local syn_ack_nr=$port_nr 1421 + local ack_nr=$port_nr 1422 + local mis_syn_nr=0 1423 + local mis_ack_nr=0 1424 + local ns_tx=$ns1 1425 + local ns_rx=$ns2 1426 + local extra_msg="" 1423 1427 local count 1424 1428 local timeout 1425 1429 1426 - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) 1430 + if [[ $ns_invert = "invert" ]]; then 1431 + ns_tx=$ns2 1432 + ns_rx=$ns1 1433 + extra_msg="invert" 1434 + fi 1435 + 1436 + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) 1427 1437 1428 1438 print_check "add" 1429 - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") 1439 + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") 1430 1440 if [ -z "$count" ]; then 1431 1441 print_skip 1432 1442 # if the test configured a short timeout tolerate greater then expected ··· 1448 1438 fi 1449 1439 1450 1440 print_check "echo" 1451 - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") 1441 + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") 1452 1442 if [ -z "$count" ]; then 1453 1443 print_skip 1454 1444 elif [ "$count" != "$echo_nr" ]; then ··· 1459 1449 1460 1450 if [ $port_nr -gt 0 ]; then 1461 1451 print_check "pt" 1462 - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") 1452 + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") 1463 1453 if [ -z "$count" ]; then 1464 1454 print_skip 1465 1455 elif [ "$count" != "$port_nr" ]; then ··· 1469 1459 fi 1470 1460 1471 1461 print_check "syn" 1472 - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") 1462 + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") 1473 1463 if [ -z "$count" ]; then 1474 1464 print_skip 1475 1465 elif [ "$count" != "$syn_nr" ]; then ··· 1480 1470 fi 1481 1471 1482 1472 print_check "synack" 1483 - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") 1473 + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") 1484 1474 if [ -z "$count" ]; then 1485 1475 print_skip 1486 1476 elif [ "$count" != "$syn_ack_nr" ]; then ··· 1491 1481 fi 1492 1482 1493 1483 print_check "ack" 1494 - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") 1484 + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") 1495 1485 if [ -z "$count" ]; then 1496 1486 print_skip 1497 1487 elif [ "$count" != "$ack_nr" ]; then ··· 1502 1492 fi 1503 1493 1504 1494 print_check "syn" 1505 - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") 1495 + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") 1506 1496 if [ -z "$count" ]; then 1507 1497 print_skip 1508 1498 elif [ "$count" != "$mis_syn_nr" ]; then ··· 1513 1503 fi 1514 1504 1515 1505 print_check "ack" 1516 - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") 1506 + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") 1517 1507 if [ -z "$count" ]; then 1518 1508 print_skip 1519 1509 elif [ "$count" != "$mis_ack_nr" ]; then ··· 1523 1513 print_ok 1524 1514 fi 1525 1515 fi 1516 + 1517 + print_info "$extra_msg" 1526 1518 } 1527 1519 1528 1520 chk_add_tx_nr() ··· 1987 1975 run_tests $ns1 $ns2 10.0.1.1 1988 1976 chk_join_nr 2 2 2 1989 1977 chk_add_nr 1 1 1978 + fi 1979 + 1980 + # uncommon: subflow and signal flags on the same endpoint 1981 + # or because the user wrongly picked both, but still expects the client 1982 + # to create additional subflows 1983 + if reset "subflow and signal together"; then 1984 + pm_nl_set_limits $ns1 0 2 1985 + pm_nl_set_limits $ns2 0 2 1986 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow 1987 + run_tests $ns1 $ns2 10.0.1.1 1988 + chk_join_nr 1 1 1 1989 + chk_add_nr 1 1 0 invert # only initiated by ns2 1990 + chk_add_nr 0 0 0 # none initiated by ns1 1991 + chk_rst_nr 0 0 invert # no RST sent by the client 1992 + chk_rst_nr 0 0 # no RST sent by the server 1990 1993 fi 1991 1994 1992 1995 # accept and use add_addr with additional subflows