Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-pm-in-kernel-fullmesh-endp-nb-bind-cases'

Matthieu Baerts says:

====================
mptcp: pm: in-kernel: fullmesh endp nb + bind cases

Here is a small optimisation for the in-kernel PM, joined by a small
behavioural change to avoid confusions, and followed by a few more
tests.

- Patch 1: record fullmesh endpoints numbers, not to iterate over all
endpoints to check if one is marked as fullmesh.

- Patch 2: when at least one endpoint is marked as fullmesh, only use
these endpoints when reacting to an ADD_ADDR, even if there are no
endpoints for this IP family: this is less confusing.

- Patch 3: reduce duplicated code to prepare the next patch.

- Patch 4: extra "bind" cases: the listen socket restrict the bind to
one IP address, not allowing MP_JOIN to extra IP addresses, except if
another listening socket accepts them.
====================

Link: https://patch.msgid.link/20251101-net-next-mptcp-fm-endp-nb-bind-v1-0-b4166772d6bb@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+213 -26
+2 -1
include/uapi/linux/mptcp.h
··· 70 70 __u64 mptcpi_bytes_acked; 71 71 __u8 mptcpi_subflows_total; 72 72 __u8 mptcpi_endp_laminar_max; 73 - __u8 reserved[2]; 73 + __u8 mptcpi_endp_fullmesh_max; 74 + __u8 reserved; 74 75 __u32 mptcpi_last_data_sent; 75 76 __u32 mptcpi_last_data_recv; 76 77 __u32 mptcpi_last_ack_recv;
+32 -4
net/mptcp/pm_kernel.c
··· 22 22 u8 endp_signal_max; 23 23 u8 endp_subflow_max; 24 24 u8 endp_laminar_max; 25 + u8 endp_fullmesh_max; 25 26 u8 limit_add_addr_accepted; 26 27 u8 limit_extra_subflows; 27 28 u8 next_id; ··· 70 69 return READ_ONCE(pernet->endp_laminar_max); 71 70 } 72 71 EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_laminar_max); 72 + 73 + u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk) 74 + { 75 + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 76 + 77 + return READ_ONCE(pernet->endp_fullmesh_max); 78 + } 79 + EXPORT_SYMBOL_GPL(mptcp_pm_get_endp_fullmesh_max); 73 80 74 81 u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk) 75 82 { ··· 609 600 struct mptcp_pm_local *locals) 610 601 { 611 602 bool c_flag_case = remote->id && mptcp_pm_add_addr_c_flag_case(msk); 612 - int i; 613 603 614 604 /* If there is at least one MPTCP endpoint with a fullmesh flag */ 615 - i = fill_local_addresses_vec_fullmesh(msk, remote, locals, c_flag_case); 616 - if (i) 617 - return i; 605 + if (mptcp_pm_get_endp_fullmesh_max(msk)) 606 + return fill_local_addresses_vec_fullmesh(msk, remote, locals, 607 + c_flag_case); 618 608 619 609 /* If there is at least one MPTCP endpoint with a laminar flag */ 620 610 if (mptcp_pm_get_endp_laminar_max(msk)) ··· 797 789 if (entry->flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { 798 790 addr_max = pernet->endp_laminar_max; 799 791 WRITE_ONCE(pernet->endp_laminar_max, addr_max + 1); 792 + } 793 + if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 794 + addr_max = pernet->endp_fullmesh_max; 795 + WRITE_ONCE(pernet->endp_fullmesh_max, addr_max + 1); 800 796 } 801 797 802 798 pernet->endpoints++; ··· 1199 1187 addr_max = pernet->endp_laminar_max; 1200 1188 WRITE_ONCE(pernet->endp_laminar_max, addr_max - 1); 1201 1189 } 1190 + if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 1191 + addr_max = pernet->endp_fullmesh_max; 1192 + WRITE_ONCE(pernet->endp_fullmesh_max, addr_max - 1); 1193 + } 1202 1194 1203 1195 pernet->endpoints--; 1204 1196 list_del_rcu(&entry->list); ··· 1518 1502 changed = (local->flags ^ entry->flags) & mask; 1519 1503 entry->flags = (entry->flags & ~mask) | (local->flags & mask); 1520 1504 *local = *entry; 1505 + 1506 + if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH) { 1507 + u8 addr_max = pernet->endp_fullmesh_max; 1508 + 1509 + if (entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) 1510 + addr_max++; 1511 + else 1512 + addr_max--; 1513 + 1514 + WRITE_ONCE(pernet->endp_fullmesh_max, addr_max); 1515 + } 1516 + 1521 1517 spin_unlock_bh(&pernet->lock); 1522 1518 1523 1519 mptcp_pm_nl_set_flags_all(net, local, changed);
+1
net/mptcp/protocol.h
··· 1183 1183 u8 mptcp_pm_get_endp_signal_max(const struct mptcp_sock *msk); 1184 1184 u8 mptcp_pm_get_endp_subflow_max(const struct mptcp_sock *msk); 1185 1185 u8 mptcp_pm_get_endp_laminar_max(const struct mptcp_sock *msk); 1186 + u8 mptcp_pm_get_endp_fullmesh_max(const struct mptcp_sock *msk); 1186 1187 u8 mptcp_pm_get_limit_add_addr_accepted(const struct mptcp_sock *msk); 1187 1188 u8 mptcp_pm_get_limit_extra_subflows(const struct mptcp_sock *msk); 1188 1189
+2
net/mptcp/sockopt.c
··· 982 982 mptcp_pm_get_endp_subflow_max(msk); 983 983 info->mptcpi_endp_laminar_max = 984 984 mptcp_pm_get_endp_laminar_max(msk); 985 + info->mptcpi_endp_fullmesh_max = 986 + mptcp_pm_get_endp_fullmesh_max(msk); 985 987 } 986 988 987 989 if (__mptcp_check_fallback(msk))
+9 -1
tools/testing/selftests/net/mptcp/mptcp_connect.c
··· 1064 1064 socklen_t salen = sizeof(ss); 1065 1065 char a[INET6_ADDRSTRLEN]; 1066 1066 char b[INET6_ADDRSTRLEN]; 1067 + const char *iface; 1068 + size_t len; 1067 1069 1068 1070 if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { 1069 1071 perror("getpeername"); ··· 1075 1073 xgetnameinfo((struct sockaddr *)&ss, salen, 1076 1074 a, sizeof(a), b, sizeof(b)); 1077 1075 1078 - if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) 1076 + iface = strchr(cfg_host, '%'); 1077 + if (iface) 1078 + len = iface - cfg_host; 1079 + else 1080 + len = strlen(cfg_host) + 1; 1081 + 1082 + if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b)) 1079 1083 fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, 1080 1084 cfg_host, a, cfg_port, b); 1081 1085 }
+167 -20
tools/testing/selftests/net/mptcp/mptcp_join.sh
··· 62 62 unset fastclose 63 63 unset fullmesh 64 64 unset speed 65 + unset bind_addr 65 66 unset join_syn_rej 66 67 unset join_csum_ns1 67 68 unset join_csum_ns2 ··· 646 645 done 647 646 } 648 647 648 + wait_ll_ready() 649 + { 650 + local ns="${1}" 651 + 652 + local i 653 + for i in $(seq 50); do 654 + ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" | 655 + grep -qw "tentative" || break 656 + sleep 0.1 657 + done 658 + } 659 + 660 + get_ll_addr() 661 + { 662 + local ns="${1}" 663 + local iface="${2}" 664 + 665 + ip -n "${ns}" -6 addr show dev "${iface}" scope link | 666 + grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#' 667 + } 668 + 649 669 kill_events_pids() 650 670 { 651 671 mptcp_lib_kill_wait $evts_ns1_pid ··· 973 951 local FAILING_LINKS=${FAILING_LINKS:-""} 974 952 local fastclose=${fastclose:-""} 975 953 local speed=${speed:-"fast"} 954 + local bind_addr=${bind_addr:-"::"} 955 + local listener_in="${sin}" 956 + local connector_in="${cin}" 976 957 port=$(get_port) 977 958 978 959 :> "$cout" ··· 1024 999 1025 1000 extra_srv_args="$extra_args $extra_srv_args" 1026 1001 if [ "$test_linkfail" -gt 1 ];then 1027 - timeout ${timeout_test} \ 1028 - ip netns exec ${listener_ns} \ 1029 - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ 1030 - $extra_srv_args "::" < "$sinfail" > "$sout" & 1031 - else 1032 - timeout ${timeout_test} \ 1033 - ip netns exec ${listener_ns} \ 1034 - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ 1035 - $extra_srv_args "::" < "$sin" > "$sout" & 1002 + listener_in="${sinfail}" 1036 1003 fi 1004 + timeout ${timeout_test} \ 1005 + ip netns exec ${listener_ns} \ 1006 + ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ 1007 + ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" & 1037 1008 local spid=$! 1038 1009 1039 1010 mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" ··· 1041 1020 ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ 1042 1021 $extra_cl_args $connect_addr < "$cin" > "$cout" & 1043 1022 elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then 1023 + connector_in="${cinsent}" 1044 1024 ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ 1045 1025 tee "$cinsent" | \ 1046 1026 timeout ${timeout_test} \ ··· 1049 1027 ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ 1050 1028 $extra_cl_args $connect_addr > "$cout" & 1051 1029 else 1030 + connector_in="${cinsent}" 1052 1031 tee "$cinsent" < "$cinfail" | \ 1053 1032 timeout ${timeout_test} \ 1054 1033 ip netns exec ${connector_ns} \ ··· 1080 1057 return 1 1081 1058 fi 1082 1059 1083 - if [ "$test_linkfail" -gt 1 ];then 1084 - check_transfer $sinfail $cout "file received by client" $trunc_size 1085 - else 1086 - check_transfer $sin $cout "file received by client" $trunc_size 1087 - fi 1060 + check_transfer $listener_in $cout "file received by client" $trunc_size 1088 1061 retc=$? 1089 - if [ "$test_linkfail" -eq 0 ];then 1090 - check_transfer $cin $sout "file received by server" $trunc_size 1091 - else 1092 - check_transfer $cinsent $sout "file received by server" $trunc_size 1093 - fi 1062 + check_transfer $connector_in $sout "file received by server" $trunc_size 1094 1063 rets=$? 1095 1064 1096 1065 [ $retc -eq 0 ] && [ $rets -eq 0 ] ··· 2967 2952 pm_nl_add_endpoint $ns1 10.0.1.1 flags signal 2968 2953 speed=slow \ 2969 2954 run_tests $ns1 $ns2 dead:beef:2::1 2970 - chk_join_nr 1 1 1 2955 + if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then 2956 + chk_join_nr 0 0 0 2957 + else 2958 + chk_join_nr 1 1 1 2959 + fi 2971 2960 fi 2972 2961 2973 2962 # fullmesh still tries to create all the possibly subflows with ··· 3249 3230 cond_stop_capture 3250 3231 3251 3232 chk_mpc_endp_attempt ${retl} 1 3233 + fi 3234 + } 3235 + 3236 + bind_tests() 3237 + { 3238 + # bind to one address should not allow extra subflows to other addresses 3239 + if reset "bind main address v4, no join v4"; then 3240 + pm_nl_set_limits $ns1 0 2 3241 + pm_nl_set_limits $ns2 2 2 3242 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 3243 + bind_addr="10.0.1.1" \ 3244 + run_tests $ns1 $ns2 10.0.1.1 3245 + join_syn_tx=1 \ 3246 + chk_join_nr 0 0 0 3247 + chk_add_nr 1 1 3248 + fi 3249 + 3250 + # bind to one address should not allow extra subflows to other addresses 3251 + if reset "bind main address v6, no join v6"; then 3252 + pm_nl_set_limits $ns1 0 2 3253 + pm_nl_set_limits $ns2 2 2 3254 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal 3255 + bind_addr="dead:beef:1::1" \ 3256 + run_tests $ns1 $ns2 dead:beef:1::1 3257 + join_syn_tx=1 \ 3258 + chk_join_nr 0 0 0 3259 + chk_add_nr 1 1 3260 + fi 3261 + 3262 + # multiple binds to allow extra subflows to other addresses 3263 + if reset "multiple bind to allow joins v4"; then 3264 + local extra_bind 3265 + 3266 + pm_nl_set_limits $ns1 0 2 3267 + pm_nl_set_limits $ns2 2 2 3268 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal 3269 + 3270 + # Launching another app listening on a different address 3271 + # Note: it could be a totally different app, e.g. nc, socat, ... 3272 + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ 3273 + -s MPTCP 10.0.2.1 & 3274 + extra_bind=$! 3275 + 3276 + bind_addr="10.0.1.1" \ 3277 + run_tests $ns1 $ns2 10.0.1.1 3278 + chk_join_nr 1 1 1 3279 + chk_add_nr 1 1 3280 + 3281 + kill ${extra_bind} 3282 + fi 3283 + 3284 + # multiple binds to allow extra subflows to other addresses 3285 + if reset "multiple bind to allow joins v6"; then 3286 + local extra_bind 3287 + 3288 + pm_nl_set_limits $ns1 0 2 3289 + pm_nl_set_limits $ns2 2 2 3290 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal 3291 + 3292 + # Launching another app listening on a different address 3293 + # Note: it could be a totally different app, e.g. nc, socat, ... 3294 + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ 3295 + -s MPTCP dead:beef:2::1 & 3296 + extra_bind=$! 3297 + 3298 + bind_addr="dead:beef:1::1" \ 3299 + run_tests $ns1 $ns2 dead:beef:1::1 3300 + chk_join_nr 1 1 1 3301 + chk_add_nr 1 1 3302 + 3303 + kill ${extra_bind} 3304 + fi 3305 + 3306 + # multiple binds to allow extra subflows to other addresses: v6 LL case 3307 + if reset "multiple bind to allow joins v6 link-local routing"; then 3308 + local extra_bind ns1ll1 ns1ll2 3309 + 3310 + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" 3311 + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" 3312 + 3313 + pm_nl_set_limits $ns1 0 2 3314 + pm_nl_set_limits $ns2 2 2 3315 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal 3316 + 3317 + wait_ll_ready $ns1 # to be able to bind 3318 + wait_ll_ready $ns2 # also needed to bind on the client side 3319 + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ 3320 + -s MPTCP "${ns1ll2}%ns1eth2" & 3321 + extra_bind=$! 3322 + 3323 + bind_addr="${ns1ll1}%ns1eth1" \ 3324 + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" 3325 + # it is not possible to connect to the announced LL addr without 3326 + # specifying the outgoing interface. 3327 + join_connect_err=1 \ 3328 + chk_join_nr 0 0 0 3329 + chk_add_nr 1 1 3330 + 3331 + kill ${extra_bind} 3332 + fi 3333 + 3334 + # multiple binds to allow extra subflows to v6 LL addresses: laminar 3335 + if reset "multiple bind to allow joins v6 link-local laminar" && 3336 + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then 3337 + local extra_bind ns1ll1 ns1ll2 ns2ll2 3338 + 3339 + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" 3340 + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" 3341 + ns2ll2="$(get_ll_addr $ns2 ns2eth2)" 3342 + 3343 + pm_nl_set_limits $ns1 0 2 3344 + pm_nl_set_limits $ns2 2 2 3345 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal 3346 + pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2 3347 + 3348 + wait_ll_ready $ns1 # to be able to bind 3349 + wait_ll_ready $ns2 # also needed to bind on the client side 3350 + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ 3351 + -s MPTCP "${ns1ll2}%ns1eth2" & 3352 + extra_bind=$! 3353 + 3354 + bind_addr="${ns1ll1}%ns1eth1" \ 3355 + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" 3356 + chk_join_nr 1 1 1 3357 + chk_add_nr 1 1 3358 + 3359 + kill ${extra_bind} 3252 3360 fi 3253 3361 } 3254 3362 ··· 4333 4187 M@mixed_tests 4334 4188 b@backup_tests 4335 4189 p@add_addr_ports_tests 4190 + B@bind_tests 4336 4191 k@syncookies_tests 4337 4192 S@checksum_tests 4338 4193 d@deny_join_id0_tests