Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mptcp: pm: move generic PM helpers to pm.c

Before this patch, the PM code was dispersed in different places:

- pm.c had common code for all PMs

- pm_netlink.c was supposed to be about the in-kernel PM, but also had
exported common helpers, callbacks used by the different PMs, NL
events for PM userspace daemon, etc. quite confusing.

- pm_userspace.c had userspace PM only code, but using specific
in-kernel PM helpers

To clarify the code, a reorganisation is suggested here, only by moving
code around, and (un)exporting functions:

- helpers used from both PMs and not linked to Netlink
- callbacks used by different PMs, e.g. ADD_ADDR management
- some helpers have been marked as 'static'
- protocol.h has been updated accordingly
- (while at it, a needless if before a kfree(), spot by checkpatch in
mptcp_remove_anno_list_by_saddr(), has been removed)

The code around the PM is now less confusing, which should help for the
maintenance in the long term.

This will certainly impact future backports, but because other cleanups
have already done recently, and more are coming to ease the addition of
a new path-manager controlled with BPF (struct_ops), doing that now
seems to be a good time. Also, many issues around the PM have been fixed
a few months ago while increasing the code coverage in the selftests, so
such big reorganisation can be done with more confidence now.

No behavioural changes intended.

Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-13-abef20ada03b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Matthieu Baerts (NGI0) and committed by
Jakub Kicinski
e4c28e3d bcc32640

+467 -468
+460
net/mptcp/pm.c
··· 12 12 #include "mib.h" 13 13 #include "mptcp_pm_gen.h" 14 14 15 + #define ADD_ADDR_RETRANS_MAX 3 16 + 17 + struct mptcp_pm_add_entry { 18 + struct list_head list; 19 + struct mptcp_addr_info addr; 20 + u8 retrans_times; 21 + struct timer_list add_timer; 22 + struct mptcp_sock *sock; 23 + }; 24 + 15 25 /* path manager helpers */ 16 26 17 27 /* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, ··· 47 37 #else 48 38 return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET; 49 39 #endif 40 + } 41 + 42 + bool mptcp_addresses_equal(const struct mptcp_addr_info *a, 43 + const struct mptcp_addr_info *b, bool use_port) 44 + { 45 + bool addr_equals = false; 46 + 47 + if (a->family == b->family) { 48 + if (a->family == AF_INET) 49 + addr_equals = a->addr.s_addr == b->addr.s_addr; 50 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 51 + else 52 + addr_equals = ipv6_addr_equal(&a->addr6, &b->addr6); 53 + } else if (a->family == AF_INET) { 54 + if (ipv6_addr_v4mapped(&b->addr6)) 55 + addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; 56 + } else if (b->family == AF_INET) { 57 + if (ipv6_addr_v4mapped(&a->addr6)) 58 + addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; 59 + #endif 60 + } 61 + 62 + if (!addr_equals) 63 + return false; 64 + if (!use_port) 65 + return true; 66 + 67 + return a->port == b->port; 68 + } 69 + 70 + void mptcp_local_address(const struct sock_common *skc, 71 + struct mptcp_addr_info *addr) 72 + { 73 + addr->family = skc->skc_family; 74 + addr->port = htons(skc->skc_num); 75 + if (addr->family == AF_INET) 76 + addr->addr.s_addr = skc->skc_rcv_saddr; 77 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 78 + else if (addr->family == AF_INET6) 79 + addr->addr6 = skc->skc_v6_rcv_saddr; 80 + #endif 81 + } 82 + 83 + void mptcp_remote_address(const struct sock_common *skc, 84 + struct mptcp_addr_info *addr) 85 + { 86 + addr->family = skc->skc_family; 87 + addr->port = skc->skc_dport; 88 + if (addr->family == AF_INET) 89 + addr->addr.s_addr = skc->skc_daddr; 90 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 91 + else if (addr->family == AF_INET6) 92 + addr->addr6 = skc->skc_v6_daddr; 93 + #endif 94 + } 95 + 96 + static bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, 97 + const struct mptcp_addr_info *remote) 98 + { 99 + struct mptcp_addr_info mpc_remote; 100 + 101 + mptcp_remote_address((struct sock_common *)msk, &mpc_remote); 102 + return mptcp_addresses_equal(&mpc_remote, remote, remote->port); 103 + } 104 + 105 + bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, 106 + const struct mptcp_addr_info *saddr) 107 + { 108 + struct mptcp_subflow_context *subflow; 109 + struct mptcp_addr_info cur; 110 + struct sock_common *skc; 111 + 112 + list_for_each_entry(subflow, list, node) { 113 + skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); 114 + 115 + mptcp_local_address(skc, &cur); 116 + if (mptcp_addresses_equal(&cur, saddr, saddr->port)) 117 + return true; 118 + } 119 + 120 + return false; 121 + } 122 + 123 + static struct mptcp_pm_add_entry * 124 + mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, 125 + const struct mptcp_addr_info *addr) 126 + { 127 + struct mptcp_pm_add_entry *entry; 128 + 129 + lockdep_assert_held(&msk->pm.lock); 130 + 131 + list_for_each_entry(entry, &msk->pm.anno_list, list) { 132 + if (mptcp_addresses_equal(&entry->addr, addr, true)) 133 + return entry; 134 + } 135 + 136 + return NULL; 137 + } 138 + 139 + bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, 140 + const struct mptcp_addr_info *addr) 141 + { 142 + struct mptcp_pm_add_entry *entry; 143 + 144 + entry = mptcp_pm_del_add_timer(msk, addr, false); 145 + kfree(entry); 146 + return entry; 147 + } 148 + 149 + bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) 150 + { 151 + struct mptcp_pm_add_entry *entry; 152 + struct mptcp_addr_info saddr; 153 + bool ret = false; 154 + 155 + mptcp_local_address((struct sock_common *)sk, &saddr); 156 + 157 + spin_lock_bh(&msk->pm.lock); 158 + list_for_each_entry(entry, &msk->pm.anno_list, list) { 159 + if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { 160 + ret = true; 161 + goto out; 162 + } 163 + } 164 + 165 + out: 166 + spin_unlock_bh(&msk->pm.lock); 167 + return ret; 168 + } 169 + 170 + static void __mptcp_pm_send_ack(struct mptcp_sock *msk, 171 + struct mptcp_subflow_context *subflow, 172 + bool prio, bool backup) 173 + { 174 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 175 + bool slow; 176 + 177 + pr_debug("send ack for %s\n", 178 + prio ? "mp_prio" : 179 + (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); 180 + 181 + slow = lock_sock_fast(ssk); 182 + if (prio) { 183 + subflow->send_mp_prio = 1; 184 + subflow->request_bkup = backup; 185 + } 186 + 187 + __mptcp_subflow_send_ack(ssk); 188 + unlock_sock_fast(ssk, slow); 189 + } 190 + 191 + void mptcp_pm_send_ack(struct mptcp_sock *msk, 192 + struct mptcp_subflow_context *subflow, 193 + bool prio, bool backup) 194 + { 195 + spin_unlock_bh(&msk->pm.lock); 196 + __mptcp_pm_send_ack(msk, subflow, prio, backup); 197 + spin_lock_bh(&msk->pm.lock); 198 + } 199 + 200 + void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) 201 + { 202 + struct mptcp_subflow_context *subflow, *alt = NULL; 203 + 204 + msk_owned_by_me(msk); 205 + lockdep_assert_held(&msk->pm.lock); 206 + 207 + if (!mptcp_pm_should_add_signal(msk) && 208 + !mptcp_pm_should_rm_signal(msk)) 209 + return; 210 + 211 + mptcp_for_each_subflow(msk, subflow) { 212 + if (__mptcp_subflow_active(subflow)) { 213 + if (!subflow->stale) { 214 + mptcp_pm_send_ack(msk, subflow, false, false); 215 + return; 216 + } 217 + 218 + if (!alt) 219 + alt = subflow; 220 + } 221 + } 222 + 223 + if (alt) 224 + mptcp_pm_send_ack(msk, alt, false, false); 225 + } 226 + 227 + int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, 228 + struct mptcp_addr_info *addr, 229 + struct mptcp_addr_info *rem, 230 + u8 bkup) 231 + { 232 + struct mptcp_subflow_context *subflow; 233 + 234 + pr_debug("bkup=%d\n", bkup); 235 + 236 + mptcp_for_each_subflow(msk, subflow) { 237 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 238 + struct mptcp_addr_info local, remote; 239 + 240 + mptcp_local_address((struct sock_common *)ssk, &local); 241 + if (!mptcp_addresses_equal(&local, addr, addr->port)) 242 + continue; 243 + 244 + if (rem && rem->family != AF_UNSPEC) { 245 + mptcp_remote_address((struct sock_common *)ssk, &remote); 246 + if (!mptcp_addresses_equal(&remote, rem, rem->port)) 247 + continue; 248 + } 249 + 250 + __mptcp_pm_send_ack(msk, subflow, true, bkup); 251 + return 0; 252 + } 253 + 254 + return -EINVAL; 255 + } 256 + 257 + static void mptcp_pm_add_timer(struct timer_list *timer) 258 + { 259 + struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); 260 + struct mptcp_sock *msk = entry->sock; 261 + struct sock *sk = (struct sock *)msk; 262 + 263 + pr_debug("msk=%p\n", msk); 264 + 265 + if (!msk) 266 + return; 267 + 268 + if (inet_sk_state_load(sk) == TCP_CLOSE) 269 + return; 270 + 271 + if (!entry->addr.id) 272 + return; 273 + 274 + if (mptcp_pm_should_add_signal_addr(msk)) { 275 + sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); 276 + goto out; 277 + } 278 + 279 + spin_lock_bh(&msk->pm.lock); 280 + 281 + if (!mptcp_pm_should_add_signal_addr(msk)) { 282 + pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); 283 + mptcp_pm_announce_addr(msk, &entry->addr, false); 284 + mptcp_pm_add_addr_send_ack(msk); 285 + entry->retrans_times++; 286 + } 287 + 288 + if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) 289 + sk_reset_timer(sk, timer, 290 + jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); 291 + 292 + spin_unlock_bh(&msk->pm.lock); 293 + 294 + if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) 295 + mptcp_pm_subflow_established(msk); 296 + 297 + out: 298 + __sock_put(sk); 299 + } 300 + 301 + struct mptcp_pm_add_entry * 302 + mptcp_pm_del_add_timer(struct mptcp_sock *msk, 303 + const struct mptcp_addr_info *addr, bool check_id) 304 + { 305 + struct mptcp_pm_add_entry *entry; 306 + struct sock *sk = (struct sock *)msk; 307 + struct timer_list *add_timer = NULL; 308 + 309 + spin_lock_bh(&msk->pm.lock); 310 + entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 311 + if (entry && (!check_id || entry->addr.id == addr->id)) { 312 + entry->retrans_times = ADD_ADDR_RETRANS_MAX; 313 + add_timer = &entry->add_timer; 314 + } 315 + if (!check_id && entry) 316 + list_del(&entry->list); 317 + spin_unlock_bh(&msk->pm.lock); 318 + 319 + /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ 320 + if (add_timer) 321 + sk_stop_timer_sync(sk, add_timer); 322 + 323 + return entry; 324 + } 325 + 326 + bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 327 + const struct mptcp_addr_info *addr) 328 + { 329 + struct mptcp_pm_add_entry *add_entry = NULL; 330 + struct sock *sk = (struct sock *)msk; 331 + struct net *net = sock_net(sk); 332 + 333 + lockdep_assert_held(&msk->pm.lock); 334 + 335 + add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 336 + 337 + if (add_entry) { 338 + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) 339 + return false; 340 + 341 + sk_reset_timer(sk, &add_entry->add_timer, 342 + jiffies + mptcp_get_add_addr_timeout(net)); 343 + return true; 344 + } 345 + 346 + add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); 347 + if (!add_entry) 348 + return false; 349 + 350 + list_add(&add_entry->list, &msk->pm.anno_list); 351 + 352 + add_entry->addr = *addr; 353 + add_entry->sock = msk; 354 + add_entry->retrans_times = 0; 355 + 356 + timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); 357 + sk_reset_timer(sk, &add_entry->add_timer, 358 + jiffies + mptcp_get_add_addr_timeout(net)); 359 + 360 + return true; 361 + } 362 + 363 + static void mptcp_pm_free_anno_list(struct mptcp_sock *msk) 364 + { 365 + struct mptcp_pm_add_entry *entry, *tmp; 366 + struct sock *sk = (struct sock *)msk; 367 + LIST_HEAD(free_list); 368 + 369 + pr_debug("msk=%p\n", msk); 370 + 371 + spin_lock_bh(&msk->pm.lock); 372 + list_splice_init(&msk->pm.anno_list, &free_list); 373 + spin_unlock_bh(&msk->pm.lock); 374 + 375 + list_for_each_entry_safe(entry, tmp, &free_list, list) { 376 + sk_stop_timer_sync(sk, &entry->add_timer); 377 + kfree(entry); 378 + } 50 379 } 51 380 52 381 /* path manager command handlers */ ··· 644 295 return; 645 296 646 297 mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); 298 + } 299 + 300 + static void mptcp_pm_rm_addr_or_subflow(struct mptcp_sock *msk, 301 + const struct mptcp_rm_list *rm_list, 302 + enum linux_mptcp_mib_field rm_type) 303 + { 304 + struct mptcp_subflow_context *subflow, *tmp; 305 + struct sock *sk = (struct sock *)msk; 306 + u8 i; 307 + 308 + pr_debug("%s rm_list_nr %d\n", 309 + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); 310 + 311 + msk_owned_by_me(msk); 312 + 313 + if (sk->sk_state == TCP_LISTEN) 314 + return; 315 + 316 + if (!rm_list->nr) 317 + return; 318 + 319 + if (list_empty(&msk->conn_list)) 320 + return; 321 + 322 + for (i = 0; i < rm_list->nr; i++) { 323 + u8 rm_id = rm_list->ids[i]; 324 + bool removed = false; 325 + 326 + mptcp_for_each_subflow_safe(msk, subflow, tmp) { 327 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 328 + u8 remote_id = READ_ONCE(subflow->remote_id); 329 + int how = RCV_SHUTDOWN | SEND_SHUTDOWN; 330 + u8 id = subflow_get_local_id(subflow); 331 + 332 + if ((1 << inet_sk_state_load(ssk)) & 333 + (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) 334 + continue; 335 + if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) 336 + continue; 337 + if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) 338 + continue; 339 + 340 + pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", 341 + rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", 342 + i, rm_id, id, remote_id, msk->mpc_endpoint_id); 343 + spin_unlock_bh(&msk->pm.lock); 344 + mptcp_subflow_shutdown(sk, ssk, how); 345 + removed |= subflow->request_join; 346 + 347 + /* the following takes care of updating the subflows counter */ 348 + mptcp_close_ssk(sk, ssk, subflow); 349 + spin_lock_bh(&msk->pm.lock); 350 + 351 + if (rm_type == MPTCP_MIB_RMSUBFLOW) 352 + __MPTCP_INC_STATS(sock_net(sk), rm_type); 353 + } 354 + 355 + if (rm_type == MPTCP_MIB_RMADDR) { 356 + __MPTCP_INC_STATS(sock_net(sk), rm_type); 357 + if (removed && mptcp_pm_is_kernel(msk)) 358 + mptcp_pm_nl_rm_addr(msk, rm_id); 359 + } 360 + } 361 + } 362 + 363 + static void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) 364 + { 365 + mptcp_pm_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); 366 + } 367 + 368 + void mptcp_pm_rm_subflow(struct mptcp_sock *msk, 369 + const struct mptcp_rm_list *rm_list) 370 + { 371 + mptcp_pm_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); 647 372 } 648 373 649 374 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, ··· 1001 578 int mptcp_pm_nl_set_flags_doit(struct sk_buff *skb, struct genl_info *info) 1002 579 { 1003 580 return mptcp_pm_set_flags(info); 581 + } 582 + 583 + static void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 584 + { 585 + struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); 586 + struct sock *sk = (struct sock *)msk; 587 + unsigned int active_max_loss_cnt; 588 + struct net *net = sock_net(sk); 589 + unsigned int stale_loss_cnt; 590 + bool slow; 591 + 592 + stale_loss_cnt = mptcp_stale_loss_cnt(net); 593 + if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) 594 + return; 595 + 596 + /* look for another available subflow not in loss state */ 597 + active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); 598 + mptcp_for_each_subflow(msk, iter) { 599 + if (iter != subflow && mptcp_subflow_active(iter) && 600 + iter->stale_count < active_max_loss_cnt) { 601 + /* we have some alternatives, try to mark this subflow as idle ...*/ 602 + slow = lock_sock_fast(ssk); 603 + if (!tcp_rtx_and_write_queues_empty(ssk)) { 604 + subflow->stale = 1; 605 + __mptcp_retransmit_pending_data(sk); 606 + MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); 607 + } 608 + unlock_sock_fast(ssk, slow); 609 + 610 + /* always try to push the pending data regardless of re-injections: 611 + * we can possibly use backup subflows now, and subflow selection 612 + * is cheap under the msk socket lock 613 + */ 614 + __mptcp_push_pending(sk, 0); 615 + return; 616 + } 617 + } 1004 618 } 1005 619 1006 620 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+1 -460
net/mptcp/pm_netlink.c
··· 18 18 19 19 static int pm_nl_pernet_id; 20 20 21 - struct mptcp_pm_add_entry { 22 - struct list_head list; 23 - struct mptcp_addr_info addr; 24 - u8 retrans_times; 25 - struct timer_list add_timer; 26 - struct mptcp_sock *sock; 27 - }; 28 - 29 21 struct pm_nl_pernet { 30 22 /* protects pernet updates */ 31 23 spinlock_t lock; ··· 33 41 }; 34 42 35 43 #define MPTCP_PM_ADDR_MAX 8 36 - #define ADD_ADDR_RETRANS_MAX 3 37 44 38 45 static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 39 46 { ··· 43 52 pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 44 53 { 45 54 return pm_nl_get_pernet(sock_net((struct sock *)msk)); 46 - } 47 - 48 - bool mptcp_addresses_equal(const struct mptcp_addr_info *a, 49 - const struct mptcp_addr_info *b, bool use_port) 50 - { 51 - bool addr_equals = false; 52 - 53 - if (a->family == b->family) { 54 - if (a->family == AF_INET) 55 - addr_equals = a->addr.s_addr == b->addr.s_addr; 56 - #if IS_ENABLED(CONFIG_MPTCP_IPV6) 57 - else 58 - addr_equals = ipv6_addr_equal(&a->addr6, &b->addr6); 59 - } else if (a->family == AF_INET) { 60 - if (ipv6_addr_v4mapped(&b->addr6)) 61 - addr_equals = a->addr.s_addr == b->addr6.s6_addr32[3]; 62 - } else if (b->family == AF_INET) { 63 - if (ipv6_addr_v4mapped(&a->addr6)) 64 - addr_equals = a->addr6.s6_addr32[3] == b->addr.s_addr; 65 - #endif 66 - } 67 - 68 - if (!addr_equals) 69 - return false; 70 - if (!use_port) 71 - return true; 72 - 73 - return a->port == b->port; 74 - } 75 - 76 - void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr) 77 - { 78 - addr->family = skc->skc_family; 79 - addr->port = htons(skc->skc_num); 80 - if (addr->family == AF_INET) 81 - addr->addr.s_addr = skc->skc_rcv_saddr; 82 - #if IS_ENABLED(CONFIG_MPTCP_IPV6) 83 - else if (addr->family == AF_INET6) 84 - addr->addr6 = skc->skc_v6_rcv_saddr; 85 - #endif 86 - } 87 - 88 - void mptcp_remote_address(const struct sock_common *skc, 89 - struct mptcp_addr_info *addr) 90 - { 91 - addr->family = skc->skc_family; 92 - addr->port = skc->skc_dport; 93 - if (addr->family == AF_INET) 94 - addr->addr.s_addr = skc->skc_daddr; 95 - #if IS_ENABLED(CONFIG_MPTCP_IPV6) 96 - else if (addr->family == AF_INET6) 97 - addr->addr6 = skc->skc_v6_daddr; 98 - #endif 99 - } 100 - 101 - bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, 102 - const struct mptcp_addr_info *saddr) 103 - { 104 - struct mptcp_subflow_context *subflow; 105 - struct mptcp_addr_info cur; 106 - struct sock_common *skc; 107 - 108 - list_for_each_entry(subflow, list, node) { 109 - skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow); 110 - 111 - mptcp_local_address(skc, &cur); 112 - if (mptcp_addresses_equal(&cur, saddr, saddr->port)) 113 - return true; 114 - } 115 - 116 - return false; 117 55 } 118 56 119 57 static bool lookup_subflow_by_daddr(const struct list_head *list, ··· 171 251 return true; 172 252 } 173 253 174 - struct mptcp_pm_add_entry * 175 - mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, 176 - const struct mptcp_addr_info *addr) 177 - { 178 - struct mptcp_pm_add_entry *entry; 179 - 180 - lockdep_assert_held(&msk->pm.lock); 181 - 182 - list_for_each_entry(entry, &msk->pm.anno_list, list) { 183 - if (mptcp_addresses_equal(&entry->addr, addr, true)) 184 - return entry; 185 - } 186 - 187 - return NULL; 188 - } 189 - 190 - bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) 191 - { 192 - struct mptcp_pm_add_entry *entry; 193 - struct mptcp_addr_info saddr; 194 - bool ret = false; 195 - 196 - mptcp_local_address((struct sock_common *)sk, &saddr); 197 - 198 - spin_lock_bh(&msk->pm.lock); 199 - list_for_each_entry(entry, &msk->pm.anno_list, list) { 200 - if (mptcp_addresses_equal(&entry->addr, &saddr, true)) { 201 - ret = true; 202 - goto out; 203 - } 204 - } 205 - 206 - out: 207 - spin_unlock_bh(&msk->pm.lock); 208 - return ret; 209 - } 210 - 211 - static void mptcp_pm_add_timer(struct timer_list *timer) 212 - { 213 - struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); 214 - struct mptcp_sock *msk = entry->sock; 215 - struct sock *sk = (struct sock *)msk; 216 - 217 - pr_debug("msk=%p\n", msk); 218 - 219 - if (!msk) 220 - return; 221 - 222 - if (inet_sk_state_load(sk) == TCP_CLOSE) 223 - return; 224 - 225 - if (!entry->addr.id) 226 - return; 227 - 228 - if (mptcp_pm_should_add_signal_addr(msk)) { 229 - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8); 230 - goto out; 231 - } 232 - 233 - spin_lock_bh(&msk->pm.lock); 234 - 235 - if (!mptcp_pm_should_add_signal_addr(msk)) { 236 - pr_debug("retransmit ADD_ADDR id=%d\n", entry->addr.id); 237 - mptcp_pm_announce_addr(msk, &entry->addr, false); 238 - mptcp_pm_add_addr_send_ack(msk); 239 - entry->retrans_times++; 240 - } 241 - 242 - if (entry->retrans_times < ADD_ADDR_RETRANS_MAX) 243 - sk_reset_timer(sk, timer, 244 - jiffies + mptcp_get_add_addr_timeout(sock_net(sk))); 245 - 246 - spin_unlock_bh(&msk->pm.lock); 247 - 248 - if (entry->retrans_times == ADD_ADDR_RETRANS_MAX) 249 - mptcp_pm_subflow_established(msk); 250 - 251 - out: 252 - __sock_put(sk); 253 - } 254 - 255 - struct mptcp_pm_add_entry * 256 - mptcp_pm_del_add_timer(struct mptcp_sock *msk, 257 - const struct mptcp_addr_info *addr, bool check_id) 258 - { 259 - struct mptcp_pm_add_entry *entry; 260 - struct sock *sk = (struct sock *)msk; 261 - struct timer_list *add_timer = NULL; 262 - 263 - spin_lock_bh(&msk->pm.lock); 264 - entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 265 - if (entry && (!check_id || entry->addr.id == addr->id)) { 266 - entry->retrans_times = ADD_ADDR_RETRANS_MAX; 267 - add_timer = &entry->add_timer; 268 - } 269 - if (!check_id && entry) 270 - list_del(&entry->list); 271 - spin_unlock_bh(&msk->pm.lock); 272 - 273 - /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ 274 - if (add_timer) 275 - sk_stop_timer_sync(sk, add_timer); 276 - 277 - return entry; 278 - } 279 - 280 - bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 281 - const struct mptcp_addr_info *addr) 282 - { 283 - struct mptcp_pm_add_entry *add_entry = NULL; 284 - struct sock *sk = (struct sock *)msk; 285 - struct net *net = sock_net(sk); 286 - 287 - lockdep_assert_held(&msk->pm.lock); 288 - 289 - add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); 290 - 291 - if (add_entry) { 292 - if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) 293 - return false; 294 - 295 - sk_reset_timer(sk, &add_entry->add_timer, 296 - jiffies + mptcp_get_add_addr_timeout(net)); 297 - return true; 298 - } 299 - 300 - add_entry = kmalloc(sizeof(*add_entry), GFP_ATOMIC); 301 - if (!add_entry) 302 - return false; 303 - 304 - list_add(&add_entry->list, &msk->pm.anno_list); 305 - 306 - add_entry->addr = *addr; 307 - add_entry->sock = msk; 308 - add_entry->retrans_times = 0; 309 - 310 - timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0); 311 - sk_reset_timer(sk, &add_entry->add_timer, 312 - jiffies + mptcp_get_add_addr_timeout(net)); 313 - 314 - return true; 315 - } 316 - 317 - void mptcp_pm_free_anno_list(struct mptcp_sock *msk) 318 - { 319 - struct mptcp_pm_add_entry *entry, *tmp; 320 - struct sock *sk = (struct sock *)msk; 321 - LIST_HEAD(free_list); 322 - 323 - pr_debug("msk=%p\n", msk); 324 - 325 - spin_lock_bh(&msk->pm.lock); 326 - list_splice_init(&msk->pm.anno_list, &free_list); 327 - spin_unlock_bh(&msk->pm.lock); 328 - 329 - list_for_each_entry_safe(entry, tmp, &free_list, list) { 330 - sk_stop_timer_sync(sk, &entry->add_timer); 331 - kfree(entry); 332 - } 333 - } 334 - 335 254 /* Fill all the remote addresses into the array addrs[], 336 255 * and return the array size. 337 256 */ ··· 237 478 } 238 479 239 480 return i; 240 - } 241 - 242 - static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, 243 - bool prio, bool backup) 244 - { 245 - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 246 - bool slow; 247 - 248 - pr_debug("send ack for %s\n", 249 - prio ? "mp_prio" : (mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr")); 250 - 251 - slow = lock_sock_fast(ssk); 252 - if (prio) { 253 - subflow->send_mp_prio = 1; 254 - subflow->request_bkup = backup; 255 - } 256 - 257 - __mptcp_subflow_send_ack(ssk); 258 - unlock_sock_fast(ssk, slow); 259 - } 260 - 261 - static void mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow, 262 - bool prio, bool backup) 263 - { 264 - spin_unlock_bh(&msk->pm.lock); 265 - __mptcp_pm_send_ack(msk, subflow, prio, backup); 266 - spin_lock_bh(&msk->pm.lock); 267 481 } 268 482 269 483 static struct mptcp_pm_addr_entry * ··· 504 772 } 505 773 } 506 774 507 - bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, 508 - const struct mptcp_addr_info *remote) 509 - { 510 - struct mptcp_addr_info mpc_remote; 511 - 512 - mptcp_remote_address((struct sock_common *)msk, &mpc_remote); 513 - return mptcp_addresses_equal(&mpc_remote, remote, remote->port); 514 - } 515 - 516 - void mptcp_pm_addr_send_ack(struct mptcp_sock *msk) 517 - { 518 - struct mptcp_subflow_context *subflow, *alt = NULL; 519 - 520 - msk_owned_by_me(msk); 521 - lockdep_assert_held(&msk->pm.lock); 522 - 523 - if (!mptcp_pm_should_add_signal(msk) && 524 - !mptcp_pm_should_rm_signal(msk)) 525 - return; 526 - 527 - mptcp_for_each_subflow(msk, subflow) { 528 - if (__mptcp_subflow_active(subflow)) { 529 - if (!subflow->stale) { 530 - mptcp_pm_send_ack(msk, subflow, false, false); 531 - return; 532 - } 533 - 534 - if (!alt) 535 - alt = subflow; 536 - } 537 - } 538 - 539 - if (alt) 540 - mptcp_pm_send_ack(msk, alt, false, false); 541 - } 542 - 543 - int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, 544 - struct mptcp_addr_info *addr, 545 - struct mptcp_addr_info *rem, 546 - u8 bkup) 547 - { 548 - struct mptcp_subflow_context *subflow; 549 - 550 - pr_debug("bkup=%d\n", bkup); 551 - 552 - mptcp_for_each_subflow(msk, subflow) { 553 - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 554 - struct mptcp_addr_info local, remote; 555 - 556 - mptcp_local_address((struct sock_common *)ssk, &local); 557 - if (!mptcp_addresses_equal(&local, addr, addr->port)) 558 - continue; 559 - 560 - if (rem && rem->family != AF_UNSPEC) { 561 - mptcp_remote_address((struct sock_common *)ssk, &remote); 562 - if (!mptcp_addresses_equal(&remote, rem, rem->port)) 563 - continue; 564 - } 565 - 566 - __mptcp_pm_send_ack(msk, subflow, true, bkup); 567 - return 0; 568 - } 569 - 570 - return -EINVAL; 571 - } 572 - 573 - static void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) 775 + void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) 574 776 { 575 777 if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { 576 778 /* Note: if the subflow has been closed before, this ··· 513 847 if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) 514 848 WRITE_ONCE(msk->pm.accept_addr, true); 515 849 } 516 - } 517 - 518 - static void mptcp_pm_rm_addr_or_subflow(struct mptcp_sock *msk, 519 - const struct mptcp_rm_list *rm_list, 520 - enum linux_mptcp_mib_field rm_type) 521 - { 522 - struct mptcp_subflow_context *subflow, *tmp; 523 - struct sock *sk = (struct sock *)msk; 524 - u8 i; 525 - 526 - pr_debug("%s rm_list_nr %d\n", 527 - rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", rm_list->nr); 528 - 529 - msk_owned_by_me(msk); 530 - 531 - if (sk->sk_state == TCP_LISTEN) 532 - return; 533 - 534 - if (!rm_list->nr) 535 - return; 536 - 537 - if (list_empty(&msk->conn_list)) 538 - return; 539 - 540 - for (i = 0; i < rm_list->nr; i++) { 541 - u8 rm_id = rm_list->ids[i]; 542 - bool removed = false; 543 - 544 - mptcp_for_each_subflow_safe(msk, subflow, tmp) { 545 - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 546 - u8 remote_id = READ_ONCE(subflow->remote_id); 547 - int how = RCV_SHUTDOWN | SEND_SHUTDOWN; 548 - u8 id = subflow_get_local_id(subflow); 549 - 550 - if ((1 << inet_sk_state_load(ssk)) & 551 - (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) 552 - continue; 553 - if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) 554 - continue; 555 - if (rm_type == MPTCP_MIB_RMSUBFLOW && id != rm_id) 556 - continue; 557 - 558 - pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u\n", 559 - rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow", 560 - i, rm_id, id, remote_id, msk->mpc_endpoint_id); 561 - spin_unlock_bh(&msk->pm.lock); 562 - mptcp_subflow_shutdown(sk, ssk, how); 563 - removed |= subflow->request_join; 564 - 565 - /* the following takes care of updating the subflows counter */ 566 - mptcp_close_ssk(sk, ssk, subflow); 567 - spin_lock_bh(&msk->pm.lock); 568 - 569 - if (rm_type == MPTCP_MIB_RMSUBFLOW) 570 - __MPTCP_INC_STATS(sock_net(sk), rm_type); 571 - } 572 - 573 - if (rm_type == MPTCP_MIB_RMADDR) { 574 - __MPTCP_INC_STATS(sock_net(sk), rm_type); 575 - if (removed && mptcp_pm_is_kernel(msk)) 576 - mptcp_pm_nl_rm_addr(msk, rm_id); 577 - } 578 - } 579 - } 580 - 581 - void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk) 582 - { 583 - mptcp_pm_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); 584 - } 585 - 586 - static void mptcp_pm_rm_subflow(struct mptcp_sock *msk, 587 - const struct mptcp_rm_list *rm_list) 588 - { 589 - mptcp_pm_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); 590 850 } 591 851 592 852 /* Called under PM lock */ ··· 778 1186 }, 779 1187 }; 780 1188 781 - void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) 782 - { 783 - struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk); 784 - struct sock *sk = (struct sock *)msk; 785 - unsigned int active_max_loss_cnt; 786 - struct net *net = sock_net(sk); 787 - unsigned int stale_loss_cnt; 788 - bool slow; 789 - 790 - stale_loss_cnt = mptcp_stale_loss_cnt(net); 791 - if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt) 792 - return; 793 - 794 - /* look for another available subflow not in loss state */ 795 - active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1); 796 - mptcp_for_each_subflow(msk, iter) { 797 - if (iter != subflow && mptcp_subflow_active(iter) && 798 - iter->stale_count < active_max_loss_cnt) { 799 - /* we have some alternatives, try to mark this subflow as idle ...*/ 800 - slow = lock_sock_fast(ssk); 801 - if (!tcp_rtx_and_write_queues_empty(ssk)) { 802 - subflow->stale = 1; 803 - __mptcp_retransmit_pending_data(sk); 804 - MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE); 805 - } 806 - unlock_sock_fast(ssk, slow); 807 - 808 - /* always try to push the pending data regardless of re-injections: 809 - * we can possibly use backup subflows now, and subflow selection 810 - * is cheap under the msk socket lock 811 - */ 812 - __mptcp_push_pending(sk, 0); 813 - return; 814 - } 815 - } 816 - } 817 - 818 1189 static int mptcp_pm_family_to_addr(int family) 819 1190 { 820 1191 #if IS_ENABLED(CONFIG_MPTCP_IPV6) ··· 998 1443 out_free: 999 1444 __mptcp_pm_release_addr_entry(entry); 1000 1445 return ret; 1001 - } 1002 - 1003 - bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, 1004 - const struct mptcp_addr_info *addr) 1005 - { 1006 - struct mptcp_pm_add_entry *entry; 1007 - 1008 - entry = mptcp_pm_del_add_timer(msk, addr, false); 1009 - if (entry) { 1010 - kfree(entry); 1011 - return true; 1012 - } 1013 - 1014 - return false; 1015 1446 } 1016 1447 1017 1448 static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk,
+6 -8
net/mptcp/protocol.h
··· 996 996 const struct mptcp_addr_info *loc, 997 997 const struct mptcp_addr_info *rem); 998 998 void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); 999 - void mptcp_pm_subflows_chk_stale(const struct mptcp_sock *msk, struct sock *ssk); 1000 999 void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side); 1001 1000 void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk); 1002 1001 bool mptcp_pm_allow_new_subflow(struct mptcp_sock *msk); ··· 1009 1010 void mptcp_pm_add_addr_echoed(struct mptcp_sock *msk, 1010 1011 const struct mptcp_addr_info *addr); 1011 1012 void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk); 1012 - bool mptcp_pm_is_init_remote_addr(struct mptcp_sock *msk, 1013 - const struct mptcp_addr_info *remote); 1013 + void mptcp_pm_send_ack(struct mptcp_sock *msk, 1014 + struct mptcp_subflow_context *subflow, 1015 + bool prio, bool backup); 1014 1016 void mptcp_pm_addr_send_ack(struct mptcp_sock *msk); 1015 - void mptcp_pm_rm_addr_recv(struct mptcp_sock *msk); 1017 + void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id); 1018 + void mptcp_pm_rm_subflow(struct mptcp_sock *msk, 1019 + const struct mptcp_rm_list *rm_list); 1016 1020 void mptcp_pm_rm_addr_received(struct mptcp_sock *msk, 1017 1021 const struct mptcp_rm_list *rm_list); 1018 1022 void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup); ··· 1026 1024 u8 bkup); 1027 1025 bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, 1028 1026 const struct mptcp_addr_info *addr); 1029 - void mptcp_pm_free_anno_list(struct mptcp_sock *msk); 1030 1027 bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk); 1031 1028 struct mptcp_pm_add_entry * 1032 1029 mptcp_pm_del_add_timer(struct mptcp_sock *msk, 1033 1030 const struct mptcp_addr_info *addr, bool check_id); 1034 - struct mptcp_pm_add_entry * 1035 - mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, 1036 - const struct mptcp_addr_info *addr); 1037 1031 bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, 1038 1032 const struct mptcp_addr_info *saddr); 1039 1033 bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk,