Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'mptcp-support-changes-to-initial-subflow-priority'

Mat Martineau says:

====================
mptcp: Support changes to initial subflow priority

This series updates the in-kernel MPTCP path manager to allow changes to
subflow priority for the first subflow created for each MPTCP connection
(the one created with the MP_CAPABLE handshake).

Patches 1 and 2 do some refactoring to simplify the new functionality.

Patch 3 introduces the new feature to change the initial subflow
priority and send the MP_PRIO header on that subflow.

Patch 4 cleans up code related to tracking endpoint ids on the initial
subflow.

Patch 5 adds a selftest to confirm that subflow priorities are updated
as expected.
====================

Link: https://lore.kernel.org/r/20220711191633.80826-1-mathew.j.martineau@linux.intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+105 -58
+73 -56
net/mptcp/pm_netlink.c
··· 413 413 int i; 414 414 415 415 for (i = 0; i < nr; i++) { 416 - if (mptcp_addresses_equal(&addrs[i], addr, addr->port)) 416 + if (addrs[i].id == addr->id) 417 417 return true; 418 418 } 419 419 ··· 449 449 mptcp_for_each_subflow(msk, subflow) { 450 450 ssk = mptcp_subflow_tcp_sock(subflow); 451 451 remote_address((struct sock_common *)ssk, &addrs[i]); 452 - if (deny_id0 && mptcp_addresses_equal(&addrs[i], &remote, false)) 452 + addrs[i].id = subflow->remote_id; 453 + if (deny_id0 && !addrs[i].id) 453 454 continue; 454 455 455 456 if (!lookup_address_in_vec(addrs, i, &addrs[i]) && ··· 462 461 } 463 462 464 463 return i; 464 + } 465 + 466 + static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, 467 + bool prio, bool backup) 468 + { 469 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 470 + bool slow; 471 + 472 + pr_debug("send ack for %s", 473 + prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); 474 + 475 + slow = lock_sock_fast(ssk); 476 + if (prio) { 477 + if (subflow->backup != backup) 478 + msk->last_snd = NULL; 479 + 480 + subflow->send_mp_prio = 1; 481 + subflow->backup = backup; 482 + subflow->request_bkup = backup; 483 + } 484 + 485 + __mptcp_subflow_send_ack(ssk); 486 + unlock_sock_fast(ssk, slow); 487 + } 488 + 489 + static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, 490 + bool prio, bool backup) 491 + { 492 + spin_unlock_bh(&msk->pm.lock); 493 + __mptcp_pm_send_ack(msk, subflow, prio, backup); 494 + spin_lock_bh(&msk->pm.lock); 465 495 } 466 496 467 497 static struct mptcp_pm_addr_entry * ··· 514 482 struct mptcp_pm_addr_entry *entry; 515 483 516 484 list_for_each_entry(entry, &pernet->local_addr_list, list) { 517 - if ((!lookup_by_id && mptcp_addresses_equal(&entry->addr, info, true)) || 485 + if ((!lookup_by_id && 486 + mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) || 518 487 (lookup_by_id && entry->addr.id == info->id)) 519 488 return entry; 520 489 } 521 490 return NULL; 522 - } 523 - 524 - static int 525 - lookup_id_by_addr(const struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) 526 - { 527 - const struct mptcp_pm_addr_entry *entry; 528 - int ret = -1; 529 - 530 - rcu_read_lock(); 531 - list_for_each_entry(entry, &pernet->local_addr_list, list) { 532 - if (mptcp_addresses_equal(&entry->addr, addr, entry->addr.port)) { 533 - ret = entry->addr.id; 534 - break; 535 - } 536 - } 537 - rcu_read_unlock(); 538 - return ret; 539 491 } 540 492 541 493 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) ··· 539 523 540 524 /* do lazy endpoint usage accounting for the MPC subflows */ 541 525 if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { 526 + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); 527 + struct mptcp_pm_addr_entry *entry; 542 528 struct mptcp_addr_info mpc_addr; 543 - int mpc_id; 529 + bool backup = false; 544 530 545 531 local_address((struct sock_common *)msk->first, &mpc_addr); 546 - mpc_id = lookup_id_by_addr(pernet, &mpc_addr); 547 - if (mpc_id >= 0) 548 - __clear_bit(mpc_id, msk->pm.id_avail_bitmap); 532 + rcu_read_lock(); 533 + entry = __lookup_addr(pernet, &mpc_addr, false); 534 + if (entry) { 535 + __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); 536 + msk->mpc_endpoint_id = entry->addr.id; 537 + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 538 + } 539 + rcu_read_unlock(); 540 + 541 + if (backup) 542 + mptcp_pm_send_ack(msk, subflow, true, backup); 549 543 550 544 msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 551 545 } ··· 731 705 return; 732 706 733 707 subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); 734 - if (subflow) { 735 - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 736 - 737 - spin_unlock_bh(&msk->pm.lock); 738 - pr_debug("send ack for %s", 739 - mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"); 740 - 741 - mptcp_subflow_send_ack(ssk); 742 - spin_lock_bh(&msk->pm.lock); 743 - } 708 + if (subflow) 709 + mptcp_pm_send_ack(msk, subflow, false, false); 744 710 } 745 711 746 712 int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, ··· 747 729 mptcp_for_each_subflow(msk, subflow) { 748 730 struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 749 731 struct mptcp_addr_info local, remote; 750 - bool slow; 751 732 752 733 local_address((struct sock_common *)ssk, &local); 753 734 if (!mptcp_addresses_equal(&local, addr, addr->port)) ··· 758 741 continue; 759 742 } 760 743 761 - slow = lock_sock_fast(ssk); 762 - if (subflow->backup != bkup) 763 - msk->last_snd = NULL; 764 - subflow->backup = bkup; 765 - subflow->send_mp_prio = 1; 766 - subflow->request_bkup = bkup; 767 - 768 - pr_debug("send ack for mp_prio"); 769 - __mptcp_subflow_send_ack(ssk); 770 - unlock_sock_fast(ssk, slow); 771 - 744 + __mptcp_pm_send_ack(msk, subflow, true, bkup); 772 745 return 0; 773 746 } 774 747 775 748 return -EINVAL; 749 + } 750 + 751 + static bool mptcp_local_id_match(const struct mptcp_sock *msk, u8 local_id, u8 id) 752 + { 753 + return local_id == id || (!local_id && msk->mpc_endpoint_id == id); 776 754 } 777 755 778 756 static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, ··· 793 781 return; 794 782 795 783 for (i = 0; i < rm_list->nr; i++) { 784 + u8 rm_id = rm_list->ids[i]; 796 785 bool removed = false; 797 786 798 787 list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { ··· 801 788 int how = RCV_SHUTDOWN | SEND_SHUTDOWN; 802 789 u8 id = subflow->local_id; 803 790 804 - if (rm_type == MPTCP_MIB_RMADDR) 805 - id = subflow->remote_id; 806 - 807 - if (rm_list->ids[i] != id) 791 + if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id) 792 + continue; 793 + if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id)) 808 794 continue; 809 795 810 - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u", 796 + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u", 811 797 rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", 812 - i, rm_list->ids[i], subflow->local_id, subflow->remote_id); 798 + i, rm_id, subflow->local_id, subflow->remote_id, 799 + msk->mpc_endpoint_id); 813 800 spin_unlock_bh(&msk->pm.lock); 814 801 mptcp_subflow_shutdown(sk, ssk, how); 815 802 ··· 821 808 __MPTCP_INC_STATS(sock_net(sk), rm_type); 822 809 } 823 810 if (rm_type == MPTCP_MIB_RMSUBFLOW) 824 - __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); 811 + __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); 825 812 if (!removed) 826 813 continue; 827 814 ··· 920 907 /* do not insert duplicate address, differentiate on port only 921 908 * singled addresses 922 909 */ 910 + if (!address_use_port(entry)) 911 + entry->addr.port = 0; 923 912 list_for_each_entry(cur, &pernet->local_addr_list, list) { 924 913 if (mptcp_addresses_equal(&cur->addr, &entry->addr, 925 - address_use_port(entry) && 926 - address_use_port(cur))) { 914 + cur->addr.port || entry->addr.port)) { 927 915 /* allow replacing the exiting endpoint only if such 928 916 * endpoint is an implicit one and the user-space 929 917 * did not provide an endpoint id ··· 970 956 } 971 957 972 958 pernet->addrs++; 973 - list_add_tail_rcu(&entry->list, &pernet->local_addr_list); 959 + if (!entry->addr.port) 960 + list_add_tail_rcu(&entry->list, &pernet->local_addr_list); 961 + else 962 + list_add_rcu(&entry->list, &pernet->local_addr_list); 974 963 ret = entry->addr.id; 975 964 976 965 out:
+1 -1
net/mptcp/protocol.c
··· 508 508 tcp_send_ack(ssk); 509 509 } 510 510 511 - void mptcp_subflow_send_ack(struct sock *ssk) 511 + static void mptcp_subflow_send_ack(struct sock *ssk) 512 512 { 513 513 bool slow; 514 514
+1 -1
net/mptcp/protocol.h
··· 282 282 bool use_64bit_ack; /* Set when we received a 64-bit DSN */ 283 283 bool csum_enabled; 284 284 bool allow_infinite_fallback; 285 + u8 mpc_endpoint_id; 285 286 u8 recvmsg_inq:1, 286 287 cork:1, 287 288 nodelay:1; ··· 608 607 void mptcp_close_ssk(struct sock *sk, struct sock *ssk, 609 608 struct mptcp_subflow_context *subflow); 610 609 void __mptcp_subflow_send_ack(struct sock *ssk); 611 - void mptcp_subflow_send_ack(struct sock *ssk); 612 610 void mptcp_subflow_reset(struct sock *ssk); 613 611 void mptcp_subflow_queue_clean(struct sock *ssk); 614 612 void mptcp_sock_graft(struct sock *sk, struct socket *parent);
+30
tools/testing/selftests/net/mptcp/mptcp_join.sh
··· 2428 2428 chk_add_nr 1 1 2429 2429 chk_prio_nr 1 1 2430 2430 fi 2431 + 2432 + if reset "mpc backup"; then 2433 + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup 2434 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2435 + chk_join_nr 0 0 0 2436 + chk_prio_nr 0 1 2437 + fi 2438 + 2439 + if reset "mpc backup both sides"; then 2440 + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup 2441 + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup 2442 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2443 + chk_join_nr 0 0 0 2444 + chk_prio_nr 1 1 2445 + fi 2446 + 2447 + if reset "mpc switch to backup"; then 2448 + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow 2449 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup 2450 + chk_join_nr 0 0 0 2451 + chk_prio_nr 0 1 2452 + fi 2453 + 2454 + if reset "mpc switch to backup both sides"; then 2455 + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow 2456 + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow 2457 + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup 2458 + chk_join_nr 0 0 0 2459 + chk_prio_nr 1 1 2460 + fi 2431 2461 } 2432 2462 2433 2463 add_addr_ports_tests()