Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mptcp: pm: split in-kernel PM specific code

Before this patch, the PM code was dispersed in different places:

- pm.c had common code for all PMs

- pm_netlink.c was supposed to be about the in-kernel PM, but also had
exported common Netlink helpers, NL events for PM userspace daemons,
etc. quite confusing.

To clarify the code, a reorganisation is suggested here, only by moving
code around to avoid confusions:

- pm_netlink.c now only contains common PM Netlink code:
- PM events: this code was already there
- shared helpers around Netlink code that were already there as well
- more shared Netlink commands code from pm.c will come after

- pm_kernel.c now contains only code that is specific to the in-kernel
PM. Now all functions are either called from:
- pm.c: events coming from the core, when this PM is being used
- pm_netlink.c: for shared Netlink commands
- mptcp_pm_gen.c: for Netlink commands specific to the in-kernel PM
- sockopt.c: for the exported counters per netns
- (while at it, a useless 'return;' spot by checkpatch at the end of
mptcp_pm_nl_set_flags_all, has been removed)

The code around the PM is now less confusing, which should help for the
maintenance in the long term.

This will certainly impact future backports, but because other cleanups
have already done recently, and more are coming to ease the addition of
a new path-manager controlled with BPF (struct_ops), doing that now
seems to be a good time. Also, many issues around the PM have been fixed
a few months ago while increasing the code coverage in the selftests, so
such big reorganisation can be done with more confidence now.

No behavioural changes intended.

Reviewed-by: Geliang Tang <geliang@kernel.org>
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Link: https://patch.msgid.link/20250307-net-next-mptcp-pm-reorg-v1-14-abef20ada03b@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Matthieu Baerts (NGI0) and committed by
Jakub Kicinski
8617e85e e4c28e3d

+1411 -1405
+1 -1
net/mptcp/Makefile
··· 3 3 4 4 mptcp-y := protocol.o subflow.o options.o token.o crypto.o ctrl.o pm.o diag.o \ 5 5 mib.o pm_netlink.o sockopt.o pm_userspace.o fastopen.o sched.o \ 6 - mptcp_pm_gen.o 6 + mptcp_pm_gen.o pm_kernel.o 7 7 8 8 obj-$(CONFIG_SYN_COOKIES) += syncookies.o 9 9 obj-$(CONFIG_INET_MPTCP_DIAG) += mptcp_diag.o
+1410
net/mptcp/pm_kernel.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Multipath TCP 3 + * 4 + * Copyright (c) 2025, Matthieu Baerts. 5 + */ 6 + 7 + #define pr_fmt(fmt) "MPTCP: " fmt 8 + 9 + #include <net/netns/generic.h> 10 + 11 + #include "protocol.h" 12 + #include "mib.h" 13 + #include "mptcp_pm_gen.h" 14 + 15 + static int pm_nl_pernet_id; 16 + 17 + struct pm_nl_pernet { 18 + /* protects pernet updates */ 19 + spinlock_t lock; 20 + struct list_head local_addr_list; 21 + unsigned int addrs; 22 + unsigned int stale_loss_cnt; 23 + unsigned int add_addr_signal_max; 24 + unsigned int add_addr_accept_max; 25 + unsigned int local_addr_max; 26 + unsigned int subflows_max; 27 + unsigned int next_id; 28 + DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 29 + }; 30 + 31 + #define MPTCP_PM_ADDR_MAX 8 32 + 33 + static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 34 + { 35 + return net_generic(net, pm_nl_pernet_id); 36 + } 37 + 38 + static struct pm_nl_pernet * 39 + pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 40 + { 41 + return pm_nl_get_pernet(sock_net((struct sock *)msk)); 42 + } 43 + 44 + static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) 45 + { 46 + return pm_nl_get_pernet(genl_info_net(info)); 47 + } 48 + 49 + unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) 50 + { 51 + const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 52 + 53 + return READ_ONCE(pernet->add_addr_signal_max); 54 + } 55 + EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); 56 + 57 + unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) 58 + { 59 + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 60 + 61 + return READ_ONCE(pernet->add_addr_accept_max); 62 + } 63 + EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); 64 + 65 + unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) 66 + { 67 + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 68 + 69 + return READ_ONCE(pernet->subflows_max); 70 + } 71 + EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); 72 + 73 + unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) 74 + { 75 + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 76 + 77 + return READ_ONCE(pernet->local_addr_max); 78 + } 79 + EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); 80 + 81 + static bool lookup_subflow_by_daddr(const struct list_head *list, 82 + const struct mptcp_addr_info *daddr) 83 + { 84 + struct mptcp_subflow_context *subflow; 85 + struct mptcp_addr_info cur; 86 + 87 + list_for_each_entry(subflow, list, node) { 88 + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 89 + 90 + if (!((1 << inet_sk_state_load(ssk)) & 91 + (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) 92 + continue; 93 + 94 + mptcp_remote_address((struct sock_common *)ssk, &cur); 95 + if (mptcp_addresses_equal(&cur, daddr, daddr->port)) 96 + return true; 97 + } 98 + 99 + return false; 100 + } 101 + 102 + static bool 103 + select_local_address(const struct pm_nl_pernet *pernet, 104 + const struct mptcp_sock *msk, 105 + struct mptcp_pm_local *new_local) 106 + { 107 + struct mptcp_pm_addr_entry *entry; 108 + bool found = false; 109 + 110 + msk_owned_by_me(msk); 111 + 112 + rcu_read_lock(); 113 + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 114 + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) 115 + continue; 116 + 117 + if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 118 + continue; 119 + 120 + new_local->addr = entry->addr; 121 + new_local->flags = entry->flags; 122 + new_local->ifindex = entry->ifindex; 123 + found = true; 124 + break; 125 + } 126 + rcu_read_unlock(); 127 + 128 + return found; 129 + } 130 + 131 + static bool 132 + select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, 133 + struct mptcp_pm_local *new_local) 134 + { 135 + struct mptcp_pm_addr_entry *entry; 136 + bool found = false; 137 + 138 + rcu_read_lock(); 139 + /* do not keep any additional per socket state, just signal 140 + * the address list in order. 141 + * Note: removal from the local address list during the msk life-cycle 142 + * can lead to additional addresses not being announced. 143 + */ 144 + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 145 + if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 146 + continue; 147 + 148 + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) 149 + continue; 150 + 151 + new_local->addr = entry->addr; 152 + new_local->flags = entry->flags; 153 + new_local->ifindex = entry->ifindex; 154 + found = true; 155 + break; 156 + } 157 + rcu_read_unlock(); 158 + 159 + return found; 160 + } 161 + 162 + /* Fill all the remote addresses into the array addrs[], 163 + * and return the array size. 164 + */ 165 + static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, 166 + struct mptcp_addr_info *local, 167 + bool fullmesh, 168 + struct mptcp_addr_info *addrs) 169 + { 170 + bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 171 + struct sock *sk = (struct sock *)msk, *ssk; 172 + struct mptcp_subflow_context *subflow; 173 + struct mptcp_addr_info remote = { 0 }; 174 + unsigned int subflows_max; 175 + int i = 0; 176 + 177 + subflows_max = mptcp_pm_get_subflows_max(msk); 178 + mptcp_remote_address((struct sock_common *)sk, &remote); 179 + 180 + /* Non-fullmesh endpoint, fill in the single entry 181 + * corresponding to the primary MPC subflow remote address 182 + */ 183 + if (!fullmesh) { 184 + if (deny_id0) 185 + return 0; 186 + 187 + if (!mptcp_pm_addr_families_match(sk, local, &remote)) 188 + return 0; 189 + 190 + msk->pm.subflows++; 191 + addrs[i++] = remote; 192 + } else { 193 + DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 194 + 195 + /* Forbid creation of new subflows matching existing 196 + * ones, possibly already created by incoming ADD_ADDR 197 + */ 198 + bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 199 + mptcp_for_each_subflow(msk, subflow) 200 + if (READ_ONCE(subflow->local_id) == local->id) 201 + __set_bit(subflow->remote_id, unavail_id); 202 + 203 + mptcp_for_each_subflow(msk, subflow) { 204 + ssk = mptcp_subflow_tcp_sock(subflow); 205 + mptcp_remote_address((struct sock_common *)ssk, &addrs[i]); 206 + addrs[i].id = READ_ONCE(subflow->remote_id); 207 + if (deny_id0 && !addrs[i].id) 208 + continue; 209 + 210 + if (test_bit(addrs[i].id, unavail_id)) 211 + continue; 212 + 213 + if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) 214 + continue; 215 + 216 + if (msk->pm.subflows < subflows_max) { 217 + /* forbid creating multiple address towards 218 + * this id 219 + */ 220 + __set_bit(addrs[i].id, unavail_id); 221 + msk->pm.subflows++; 222 + i++; 223 + } 224 + } 225 + } 226 + 227 + return i; 228 + } 229 + 230 + static struct mptcp_pm_addr_entry * 231 + __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) 232 + { 233 + struct mptcp_pm_addr_entry *entry; 234 + 235 + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, 236 + lockdep_is_held(&pernet->lock)) { 237 + if (entry->addr.id == id) 238 + return entry; 239 + } 240 + return NULL; 241 + } 242 + 243 + static struct mptcp_pm_addr_entry * 244 + __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) 245 + { 246 + struct mptcp_pm_addr_entry *entry; 247 + 248 + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, 249 + lockdep_is_held(&pernet->lock)) { 250 + if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) 251 + return entry; 252 + } 253 + return NULL; 254 + } 255 + 256 + static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 257 + { 258 + struct sock *sk = (struct sock *)msk; 259 + unsigned int add_addr_signal_max; 260 + bool signal_and_subflow = false; 261 + unsigned int local_addr_max; 262 + struct pm_nl_pernet *pernet; 263 + struct mptcp_pm_local local; 264 + unsigned int subflows_max; 265 + 266 + pernet = pm_nl_get_pernet(sock_net(sk)); 267 + 268 + add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); 269 + local_addr_max = mptcp_pm_get_local_addr_max(msk); 270 + subflows_max = mptcp_pm_get_subflows_max(msk); 271 + 272 + /* do lazy endpoint usage accounting for the MPC subflows */ 273 + if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { 274 + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); 275 + struct mptcp_pm_addr_entry *entry; 276 + struct mptcp_addr_info mpc_addr; 277 + bool backup = false; 278 + 279 + mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); 280 + rcu_read_lock(); 281 + entry = __lookup_addr(pernet, &mpc_addr); 282 + if (entry) { 283 + __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); 284 + msk->mpc_endpoint_id = entry->addr.id; 285 + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 286 + } 287 + rcu_read_unlock(); 288 + 289 + if (backup) 290 + mptcp_pm_send_ack(msk, subflow, true, backup); 291 + 292 + msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 293 + } 294 + 295 + pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", 296 + msk->pm.local_addr_used, local_addr_max, 297 + msk->pm.add_addr_signaled, add_addr_signal_max, 298 + msk->pm.subflows, subflows_max); 299 + 300 + /* check first for announce */ 301 + if (msk->pm.add_addr_signaled < add_addr_signal_max) { 302 + /* due to racing events on both ends we can reach here while 303 + * previous add address is still running: if we invoke now 304 + * mptcp_pm_announce_addr(), that will fail and the 305 + * corresponding id will be marked as used. 306 + * Instead let the PM machinery reschedule us when the 307 + * current address announce will be completed. 308 + */ 309 + if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 310 + return; 311 + 312 + if (!select_signal_address(pernet, msk, &local)) 313 + goto subflow; 314 + 315 + /* If the alloc fails, we are on memory pressure, not worth 316 + * continuing, and trying to create subflows. 317 + */ 318 + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) 319 + return; 320 + 321 + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 322 + msk->pm.add_addr_signaled++; 323 + 324 + /* Special case for ID0: set the correct ID */ 325 + if (local.addr.id == msk->mpc_endpoint_id) 326 + local.addr.id = 0; 327 + 328 + mptcp_pm_announce_addr(msk, &local.addr, false); 329 + mptcp_pm_addr_send_ack(msk); 330 + 331 + if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) 332 + signal_and_subflow = true; 333 + } 334 + 335 + subflow: 336 + /* check if should create a new subflow */ 337 + while (msk->pm.local_addr_used < local_addr_max && 338 + msk->pm.subflows < subflows_max) { 339 + struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 340 + bool fullmesh; 341 + int i, nr; 342 + 343 + if (signal_and_subflow) 344 + signal_and_subflow = false; 345 + else if (!select_local_address(pernet, msk, &local)) 346 + break; 347 + 348 + fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 349 + 350 + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 351 + 352 + /* Special case for ID0: set the correct ID */ 353 + if (local.addr.id == msk->mpc_endpoint_id) 354 + local.addr.id = 0; 355 + else /* local_addr_used is not decr for ID 0 */ 356 + msk->pm.local_addr_used++; 357 + 358 + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); 359 + if (nr == 0) 360 + continue; 361 + 362 + spin_unlock_bh(&msk->pm.lock); 363 + for (i = 0; i < nr; i++) 364 + __mptcp_subflow_connect(sk, &local, &addrs[i]); 365 + spin_lock_bh(&msk->pm.lock); 366 + } 367 + mptcp_pm_nl_check_work_pending(msk); 368 + } 369 + 370 + static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) 371 + { 372 + mptcp_pm_create_subflow_or_signal_addr(msk); 373 + } 374 + 375 + static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) 376 + { 377 + mptcp_pm_create_subflow_or_signal_addr(msk); 378 + } 379 + 380 + /* Fill all the local addresses into the array addrs[], 381 + * and return the array size. 382 + */ 383 + static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, 384 + struct mptcp_addr_info *remote, 385 + struct mptcp_pm_local *locals) 386 + { 387 + struct sock *sk = (struct sock *)msk; 388 + struct mptcp_pm_addr_entry *entry; 389 + struct mptcp_addr_info mpc_addr; 390 + struct pm_nl_pernet *pernet; 391 + unsigned int subflows_max; 392 + int i = 0; 393 + 394 + pernet = pm_nl_get_pernet_from_msk(msk); 395 + subflows_max = mptcp_pm_get_subflows_max(msk); 396 + 397 + mptcp_local_address((struct sock_common *)msk, &mpc_addr); 398 + 399 + rcu_read_lock(); 400 + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 401 + if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) 402 + continue; 403 + 404 + if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 405 + continue; 406 + 407 + if (msk->pm.subflows < subflows_max) { 408 + locals[i].addr = entry->addr; 409 + locals[i].flags = entry->flags; 410 + locals[i].ifindex = entry->ifindex; 411 + 412 + /* Special case for ID0: set the correct ID */ 413 + if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) 414 + locals[i].addr.id = 0; 415 + 416 + msk->pm.subflows++; 417 + i++; 418 + } 419 + } 420 + rcu_read_unlock(); 421 + 422 + /* If the array is empty, fill in the single 423 + * 'IPADDRANY' local address 424 + */ 425 + if (!i) { 426 + memset(&locals[i], 0, sizeof(locals[i])); 427 + locals[i].addr.family = 428 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 429 + remote->family == AF_INET6 && 430 + ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : 431 + #endif 432 + remote->family; 433 + 434 + if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) 435 + return 0; 436 + 437 + msk->pm.subflows++; 438 + i++; 439 + } 440 + 441 + return i; 442 + } 443 + 444 + static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) 445 + { 446 + struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; 447 + struct sock *sk = (struct sock *)msk; 448 + unsigned int add_addr_accept_max; 449 + struct mptcp_addr_info remote; 450 + unsigned int subflows_max; 451 + bool sf_created = false; 452 + int i, nr; 453 + 454 + add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); 455 + subflows_max = mptcp_pm_get_subflows_max(msk); 456 + 457 + pr_debug("accepted %d:%d remote family %d\n", 458 + msk->pm.add_addr_accepted, add_addr_accept_max, 459 + msk->pm.remote.family); 460 + 461 + remote = msk->pm.remote; 462 + mptcp_pm_announce_addr(msk, &remote, true); 463 + mptcp_pm_addr_send_ack(msk); 464 + 465 + if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) 466 + return; 467 + 468 + /* pick id 0 port, if none is provided the remote address */ 469 + if (!remote.port) 470 + remote.port = sk->sk_dport; 471 + 472 + /* connect to the specified remote address, using whatever 473 + * local address the routing configuration will pick. 474 + */ 475 + nr = fill_local_addresses_vec(msk, &remote, locals); 476 + if (nr == 0) 477 + return; 478 + 479 + spin_unlock_bh(&msk->pm.lock); 480 + for (i = 0; i < nr; i++) 481 + if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) 482 + sf_created = true; 483 + spin_lock_bh(&msk->pm.lock); 484 + 485 + if (sf_created) { 486 + /* add_addr_accepted is not decr for ID 0 */ 487 + if (remote.id) 488 + msk->pm.add_addr_accepted++; 489 + if (msk->pm.add_addr_accepted >= add_addr_accept_max || 490 + msk->pm.subflows >= subflows_max) 491 + WRITE_ONCE(msk->pm.accept_addr, false); 492 + } 493 + } 494 + 495 + void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) 496 + { 497 + if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { 498 + /* Note: if the subflow has been closed before, this 499 + * add_addr_accepted counter will not be decremented. 500 + */ 501 + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) 502 + WRITE_ONCE(msk->pm.accept_addr, true); 503 + } 504 + } 505 + 506 + static bool address_use_port(struct mptcp_pm_addr_entry *entry) 507 + { 508 + return (entry->flags & 509 + (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == 510 + MPTCP_PM_ADDR_FLAG_SIGNAL; 511 + } 512 + 513 + /* caller must ensure the RCU grace period is already elapsed */ 514 + static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) 515 + { 516 + if (entry->lsk) 517 + sock_release(entry->lsk); 518 + kfree(entry); 519 + } 520 + 521 + static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 522 + struct mptcp_pm_addr_entry *entry, 523 + bool needs_id, bool replace) 524 + { 525 + struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 526 + unsigned int addr_max; 527 + int ret = -EINVAL; 528 + 529 + spin_lock_bh(&pernet->lock); 530 + /* to keep the code simple, don't do IDR-like allocation for address ID, 531 + * just bail when we exceed limits 532 + */ 533 + if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) 534 + pernet->next_id = 1; 535 + if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { 536 + ret = -ERANGE; 537 + goto out; 538 + } 539 + if (test_bit(entry->addr.id, pernet->id_bitmap)) { 540 + ret = -EBUSY; 541 + goto out; 542 + } 543 + 544 + /* do not insert duplicate address, differentiate on port only 545 + * singled addresses 546 + */ 547 + if (!address_use_port(entry)) 548 + entry->addr.port = 0; 549 + list_for_each_entry(cur, &pernet->local_addr_list, list) { 550 + if (mptcp_addresses_equal(&cur->addr, &entry->addr, 551 + cur->addr.port || entry->addr.port)) { 552 + /* allow replacing the exiting endpoint only if such 553 + * endpoint is an implicit one and the user-space 554 + * did not provide an endpoint id 555 + */ 556 + if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { 557 + ret = -EEXIST; 558 + goto out; 559 + } 560 + if (entry->addr.id) 561 + goto out; 562 + 563 + /* allow callers that only need to look up the local 564 + * addr's id to skip replacement. This allows them to 565 + * avoid calling synchronize_rcu in the packet recv 566 + * path. 567 + */ 568 + if (!replace) { 569 + kfree(entry); 570 + ret = cur->addr.id; 571 + goto out; 572 + } 573 + 574 + pernet->addrs--; 575 + entry->addr.id = cur->addr.id; 576 + list_del_rcu(&cur->list); 577 + del_entry = cur; 578 + break; 579 + } 580 + } 581 + 582 + if (!entry->addr.id && needs_id) { 583 + find_next: 584 + entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 585 + MPTCP_PM_MAX_ADDR_ID + 1, 586 + pernet->next_id); 587 + if (!entry->addr.id && pernet->next_id != 1) { 588 + pernet->next_id = 1; 589 + goto find_next; 590 + } 591 + } 592 + 593 + if (!entry->addr.id && needs_id) 594 + goto out; 595 + 596 + __set_bit(entry->addr.id, pernet->id_bitmap); 597 + if (entry->addr.id > pernet->next_id) 598 + pernet->next_id = entry->addr.id; 599 + 600 + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 601 + addr_max = pernet->add_addr_signal_max; 602 + WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); 603 + } 604 + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 605 + addr_max = pernet->local_addr_max; 606 + WRITE_ONCE(pernet->local_addr_max, addr_max + 1); 607 + } 608 + 609 + pernet->addrs++; 610 + if (!entry->addr.port) 611 + list_add_tail_rcu(&entry->list, &pernet->local_addr_list); 612 + else 613 + list_add_rcu(&entry->list, &pernet->local_addr_list); 614 + ret = entry->addr.id; 615 + 616 + out: 617 + spin_unlock_bh(&pernet->lock); 618 + 619 + /* just replaced an existing entry, free it */ 620 + if (del_entry) { 621 + synchronize_rcu(); 622 + __mptcp_pm_release_addr_entry(del_entry); 623 + } 624 + return ret; 625 + } 626 + 627 + static struct lock_class_key mptcp_slock_keys[2]; 628 + static struct lock_class_key mptcp_keys[2]; 629 + 630 + static int mptcp_pm_nl_create_listen_socket(struct sock *sk, 631 + struct mptcp_pm_addr_entry *entry) 632 + { 633 + bool is_ipv6 = sk->sk_family == AF_INET6; 634 + int addrlen = sizeof(struct sockaddr_in); 635 + struct sockaddr_storage addr; 636 + struct sock *newsk, *ssk; 637 + int backlog = 1024; 638 + int err; 639 + 640 + err = sock_create_kern(sock_net(sk), entry->addr.family, 641 + SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); 642 + if (err) 643 + return err; 644 + 645 + newsk = entry->lsk->sk; 646 + if (!newsk) 647 + return -EINVAL; 648 + 649 + /* The subflow socket lock is acquired in a nested to the msk one 650 + * in several places, even by the TCP stack, and this msk is a kernel 651 + * socket: lockdep complains. Instead of propagating the _nested 652 + * modifiers in several places, re-init the lock class for the msk 653 + * socket to an mptcp specific one. 654 + */ 655 + sock_lock_init_class_and_name(newsk, 656 + is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", 657 + &mptcp_slock_keys[is_ipv6], 658 + is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", 659 + &mptcp_keys[is_ipv6]); 660 + 661 + lock_sock(newsk); 662 + ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); 663 + release_sock(newsk); 664 + if (IS_ERR(ssk)) 665 + return PTR_ERR(ssk); 666 + 667 + mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 668 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 669 + if (entry->addr.family == AF_INET6) 670 + addrlen = sizeof(struct sockaddr_in6); 671 + #endif 672 + if (ssk->sk_family == AF_INET) 673 + err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 674 + #if IS_ENABLED(CONFIG_MPTCP_IPV6) 675 + else if (ssk->sk_family == AF_INET6) 676 + err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 677 + #endif 678 + if (err) 679 + return err; 680 + 681 + /* We don't use mptcp_set_state() here because it needs to be called 682 + * under the msk socket lock. For the moment, that will not bring 683 + * anything more than only calling inet_sk_state_store(), because the 684 + * old status is known (TCP_CLOSE). 685 + */ 686 + inet_sk_state_store(newsk, TCP_LISTEN); 687 + lock_sock(ssk); 688 + WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); 689 + err = __inet_listen_sk(ssk, backlog); 690 + if (!err) 691 + mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 692 + release_sock(ssk); 693 + return err; 694 + } 695 + 696 + int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, 697 + struct mptcp_pm_addr_entry *skc) 698 + { 699 + struct mptcp_pm_addr_entry *entry; 700 + struct pm_nl_pernet *pernet; 701 + int ret; 702 + 703 + pernet = pm_nl_get_pernet_from_msk(msk); 704 + 705 + rcu_read_lock(); 706 + entry = __lookup_addr(pernet, &skc->addr); 707 + ret = entry ? entry->addr.id : -1; 708 + rcu_read_unlock(); 709 + if (ret >= 0) 710 + return ret; 711 + 712 + /* address not found, add to local list */ 713 + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 714 + if (!entry) 715 + return -ENOMEM; 716 + 717 + *entry = *skc; 718 + entry->addr.port = 0; 719 + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); 720 + if (ret < 0) 721 + kfree(entry); 722 + 723 + return ret; 724 + } 725 + 726 + bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) 727 + { 728 + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 729 + struct mptcp_pm_addr_entry *entry; 730 + bool backup; 731 + 732 + rcu_read_lock(); 733 + entry = __lookup_addr(pernet, skc); 734 + backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 735 + rcu_read_unlock(); 736 + 737 + return backup; 738 + } 739 + 740 + static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, 741 + struct mptcp_addr_info *addr) 742 + { 743 + struct mptcp_sock *msk; 744 + long s_slot = 0, s_num = 0; 745 + 746 + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 747 + struct sock *sk = (struct sock *)msk; 748 + struct mptcp_addr_info mpc_addr; 749 + 750 + if (!READ_ONCE(msk->fully_established) || 751 + mptcp_pm_is_userspace(msk)) 752 + goto next; 753 + 754 + /* if the endp linked to the init sf is re-added with a != ID */ 755 + mptcp_local_address((struct sock_common *)msk, &mpc_addr); 756 + 757 + lock_sock(sk); 758 + spin_lock_bh(&msk->pm.lock); 759 + if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) 760 + msk->mpc_endpoint_id = addr->id; 761 + mptcp_pm_create_subflow_or_signal_addr(msk); 762 + spin_unlock_bh(&msk->pm.lock); 763 + release_sock(sk); 764 + 765 + next: 766 + sock_put(sk); 767 + cond_resched(); 768 + } 769 + 770 + return 0; 771 + } 772 + 773 + static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, 774 + struct genl_info *info) 775 + { 776 + struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 777 + 778 + if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 779 + mptcp_pm_address_nl_policy, info->extack) && 780 + tb[MPTCP_PM_ADDR_ATTR_ID]) 781 + return true; 782 + return false; 783 + } 784 + 785 + /* Add an MPTCP endpoint */ 786 + int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) 787 + { 788 + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 789 + struct mptcp_pm_addr_entry addr, *entry; 790 + struct nlattr *attr; 791 + int ret; 792 + 793 + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 794 + return -EINVAL; 795 + 796 + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 797 + ret = mptcp_pm_parse_entry(attr, info, true, &addr); 798 + if (ret < 0) 799 + return ret; 800 + 801 + if (addr.addr.port && !address_use_port(&addr)) { 802 + NL_SET_ERR_MSG_ATTR(info->extack, attr, 803 + "flags must have signal and not subflow when using port"); 804 + return -EINVAL; 805 + } 806 + 807 + if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && 808 + addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 809 + NL_SET_ERR_MSG_ATTR(info->extack, attr, 810 + "flags mustn't have both signal and fullmesh"); 811 + return -EINVAL; 812 + } 813 + 814 + if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { 815 + NL_SET_ERR_MSG_ATTR(info->extack, attr, 816 + "can't create IMPLICIT endpoint"); 817 + return -EINVAL; 818 + } 819 + 820 + entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); 821 + if (!entry) { 822 + GENL_SET_ERR_MSG(info, "can't allocate addr"); 823 + return -ENOMEM; 824 + } 825 + 826 + *entry = addr; 827 + if (entry->addr.port) { 828 + ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 829 + if (ret) { 830 + GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); 831 + goto out_free; 832 + } 833 + } 834 + ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, 835 + !mptcp_pm_has_addr_attr_id(attr, info), 836 + true); 837 + if (ret < 0) { 838 + GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); 839 + goto out_free; 840 + } 841 + 842 + mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); 843 + return 0; 844 + 845 + out_free: 846 + __mptcp_pm_release_addr_entry(entry); 847 + return ret; 848 + } 849 + 850 + static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, 851 + const struct mptcp_addr_info *addr) 852 + { 853 + return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; 854 + } 855 + 856 + static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, 857 + const struct mptcp_addr_info *addr, 858 + bool force) 859 + { 860 + struct mptcp_rm_list list = { .nr = 0 }; 861 + bool ret; 862 + 863 + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 864 + 865 + ret = mptcp_remove_anno_list_by_saddr(msk, addr); 866 + if (ret || force) { 867 + spin_lock_bh(&msk->pm.lock); 868 + if (ret) { 869 + __set_bit(addr->id, msk->pm.id_avail_bitmap); 870 + msk->pm.add_addr_signaled--; 871 + } 872 + mptcp_pm_remove_addr(msk, &list); 873 + spin_unlock_bh(&msk->pm.lock); 874 + } 875 + return ret; 876 + } 877 + 878 + static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) 879 + { 880 + /* If it was marked as used, and not ID 0, decrement local_addr_used */ 881 + if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && 882 + id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) 883 + msk->pm.local_addr_used--; 884 + } 885 + 886 + static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, 887 + const struct mptcp_pm_addr_entry *entry) 888 + { 889 + const struct mptcp_addr_info *addr = &entry->addr; 890 + struct mptcp_rm_list list = { .nr = 1 }; 891 + long s_slot = 0, s_num = 0; 892 + struct mptcp_sock *msk; 893 + 894 + pr_debug("remove_id=%d\n", addr->id); 895 + 896 + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 897 + struct sock *sk = (struct sock *)msk; 898 + bool remove_subflow; 899 + 900 + if (mptcp_pm_is_userspace(msk)) 901 + goto next; 902 + 903 + lock_sock(sk); 904 + remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); 905 + mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && 906 + !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); 907 + 908 + list.ids[0] = mptcp_endp_get_local_id(msk, addr); 909 + if (remove_subflow) { 910 + spin_lock_bh(&msk->pm.lock); 911 + mptcp_pm_rm_subflow(msk, &list); 912 + spin_unlock_bh(&msk->pm.lock); 913 + } 914 + 915 + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 916 + spin_lock_bh(&msk->pm.lock); 917 + __mark_subflow_endp_available(msk, list.ids[0]); 918 + spin_unlock_bh(&msk->pm.lock); 919 + } 920 + 921 + if (msk->mpc_endpoint_id == entry->addr.id) 922 + msk->mpc_endpoint_id = 0; 923 + release_sock(sk); 924 + 925 + next: 926 + sock_put(sk); 927 + cond_resched(); 928 + } 929 + 930 + return 0; 931 + } 932 + 933 + static int mptcp_nl_remove_id_zero_address(struct net *net, 934 + struct mptcp_addr_info *addr) 935 + { 936 + struct mptcp_rm_list list = { .nr = 0 }; 937 + long s_slot = 0, s_num = 0; 938 + struct mptcp_sock *msk; 939 + 940 + list.ids[list.nr++] = 0; 941 + 942 + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 943 + struct sock *sk = (struct sock *)msk; 944 + struct mptcp_addr_info msk_local; 945 + 946 + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 947 + goto next; 948 + 949 + mptcp_local_address((struct sock_common *)msk, &msk_local); 950 + if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) 951 + goto next; 952 + 953 + lock_sock(sk); 954 + spin_lock_bh(&msk->pm.lock); 955 + mptcp_pm_remove_addr(msk, &list); 956 + mptcp_pm_rm_subflow(msk, &list); 957 + __mark_subflow_endp_available(msk, 0); 958 + spin_unlock_bh(&msk->pm.lock); 959 + release_sock(sk); 960 + 961 + next: 962 + sock_put(sk); 963 + cond_resched(); 964 + } 965 + 966 + return 0; 967 + } 968 + 969 + /* Remove an MPTCP endpoint */ 970 + int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) 971 + { 972 + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 973 + struct mptcp_pm_addr_entry addr, *entry; 974 + unsigned int addr_max; 975 + struct nlattr *attr; 976 + int ret; 977 + 978 + if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 979 + return -EINVAL; 980 + 981 + attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 982 + ret = mptcp_pm_parse_entry(attr, info, false, &addr); 983 + if (ret < 0) 984 + return ret; 985 + 986 + /* the zero id address is special: the first address used by the msk 987 + * always gets such an id, so different subflows can have different zero 988 + * id addresses. Additionally zero id is not accounted for in id_bitmap. 989 + * Let's use an 'mptcp_rm_list' instead of the common remove code. 990 + */ 991 + if (addr.addr.id == 0) 992 + return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); 993 + 994 + spin_lock_bh(&pernet->lock); 995 + entry = __lookup_addr_by_id(pernet, addr.addr.id); 996 + if (!entry) { 997 + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 998 + spin_unlock_bh(&pernet->lock); 999 + return -EINVAL; 1000 + } 1001 + if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 1002 + addr_max = pernet->add_addr_signal_max; 1003 + WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); 1004 + } 1005 + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 1006 + addr_max = pernet->local_addr_max; 1007 + WRITE_ONCE(pernet->local_addr_max, addr_max - 1); 1008 + } 1009 + 1010 + pernet->addrs--; 1011 + list_del_rcu(&entry->list); 1012 + __clear_bit(entry->addr.id, pernet->id_bitmap); 1013 + spin_unlock_bh(&pernet->lock); 1014 + 1015 + mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); 1016 + synchronize_rcu(); 1017 + __mptcp_pm_release_addr_entry(entry); 1018 + 1019 + return ret; 1020 + } 1021 + 1022 + static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, 1023 + struct list_head *rm_list) 1024 + { 1025 + struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; 1026 + struct mptcp_pm_addr_entry *entry; 1027 + 1028 + list_for_each_entry(entry, rm_list, list) { 1029 + if (slist.nr < MPTCP_RM_IDS_MAX && 1030 + mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) 1031 + slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1032 + 1033 + if (alist.nr < MPTCP_RM_IDS_MAX && 1034 + mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) 1035 + alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 1036 + } 1037 + 1038 + spin_lock_bh(&msk->pm.lock); 1039 + if (alist.nr) { 1040 + msk->pm.add_addr_signaled -= alist.nr; 1041 + mptcp_pm_remove_addr(msk, &alist); 1042 + } 1043 + if (slist.nr) 1044 + mptcp_pm_rm_subflow(msk, &slist); 1045 + /* Reset counters: maybe some subflows have been removed before */ 1046 + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1047 + msk->pm.local_addr_used = 0; 1048 + spin_unlock_bh(&msk->pm.lock); 1049 + } 1050 + 1051 + static void mptcp_nl_flush_addrs_list(struct net *net, 1052 + struct list_head *rm_list) 1053 + { 1054 + long s_slot = 0, s_num = 0; 1055 + struct mptcp_sock *msk; 1056 + 1057 + if (list_empty(rm_list)) 1058 + return; 1059 + 1060 + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1061 + struct sock *sk = (struct sock *)msk; 1062 + 1063 + if (!mptcp_pm_is_userspace(msk)) { 1064 + lock_sock(sk); 1065 + mptcp_pm_flush_addrs_and_subflows(msk, rm_list); 1066 + release_sock(sk); 1067 + } 1068 + 1069 + sock_put(sk); 1070 + cond_resched(); 1071 + } 1072 + } 1073 + 1074 + /* caller must ensure the RCU grace period is already elapsed */ 1075 + static void __flush_addrs(struct list_head *list) 1076 + { 1077 + while (!list_empty(list)) { 1078 + struct mptcp_pm_addr_entry *cur; 1079 + 1080 + cur = list_entry(list->next, 1081 + struct mptcp_pm_addr_entry, list); 1082 + list_del_rcu(&cur->list); 1083 + __mptcp_pm_release_addr_entry(cur); 1084 + } 1085 + } 1086 + 1087 + static void __reset_counters(struct pm_nl_pernet *pernet) 1088 + { 1089 + WRITE_ONCE(pernet->add_addr_signal_max, 0); 1090 + WRITE_ONCE(pernet->add_addr_accept_max, 0); 1091 + WRITE_ONCE(pernet->local_addr_max, 0); 1092 + pernet->addrs = 0; 1093 + } 1094 + 1095 + int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) 1096 + { 1097 + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1098 + LIST_HEAD(free_list); 1099 + 1100 + spin_lock_bh(&pernet->lock); 1101 + list_splice_init(&pernet->local_addr_list, &free_list); 1102 + __reset_counters(pernet); 1103 + pernet->next_id = 1; 1104 + bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 1105 + spin_unlock_bh(&pernet->lock); 1106 + mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); 1107 + synchronize_rcu(); 1108 + __flush_addrs(&free_list); 1109 + return 0; 1110 + } 1111 + 1112 + int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, 1113 + struct genl_info *info) 1114 + { 1115 + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1116 + struct mptcp_pm_addr_entry *entry; 1117 + int ret = -EINVAL; 1118 + 1119 + rcu_read_lock(); 1120 + entry = __lookup_addr_by_id(pernet, id); 1121 + if (entry) { 1122 + *addr = *entry; 1123 + ret = 0; 1124 + } 1125 + rcu_read_unlock(); 1126 + 1127 + return ret; 1128 + } 1129 + 1130 + int mptcp_pm_nl_dump_addr(struct sk_buff *msg, 1131 + struct netlink_callback *cb) 1132 + { 1133 + struct net *net = sock_net(msg->sk); 1134 + struct mptcp_pm_addr_entry *entry; 1135 + struct pm_nl_pernet *pernet; 1136 + int id = cb->args[0]; 1137 + int i; 1138 + 1139 + pernet = pm_nl_get_pernet(net); 1140 + 1141 + rcu_read_lock(); 1142 + for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { 1143 + if (test_bit(i, pernet->id_bitmap)) { 1144 + entry = __lookup_addr_by_id(pernet, i); 1145 + if (!entry) 1146 + break; 1147 + 1148 + if (entry->addr.id <= id) 1149 + continue; 1150 + 1151 + if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) 1152 + break; 1153 + 1154 + id = entry->addr.id; 1155 + } 1156 + } 1157 + rcu_read_unlock(); 1158 + 1159 + cb->args[0] = id; 1160 + return msg->len; 1161 + } 1162 + 1163 + static int parse_limit(struct genl_info *info, int id, unsigned int *limit) 1164 + { 1165 + struct nlattr *attr = info->attrs[id]; 1166 + 1167 + if (!attr) 1168 + return 0; 1169 + 1170 + *limit = nla_get_u32(attr); 1171 + if (*limit > MPTCP_PM_ADDR_MAX) { 1172 + NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, 1173 + "limit greater than maximum (%u)", 1174 + MPTCP_PM_ADDR_MAX); 1175 + return -EINVAL; 1176 + } 1177 + return 0; 1178 + } 1179 + 1180 + int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) 1181 + { 1182 + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1183 + unsigned int rcv_addrs, subflows; 1184 + int ret; 1185 + 1186 + spin_lock_bh(&pernet->lock); 1187 + rcv_addrs = pernet->add_addr_accept_max; 1188 + ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); 1189 + if (ret) 1190 + goto unlock; 1191 + 1192 + subflows = pernet->subflows_max; 1193 + ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); 1194 + if (ret) 1195 + goto unlock; 1196 + 1197 + WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); 1198 + WRITE_ONCE(pernet->subflows_max, subflows); 1199 + 1200 + unlock: 1201 + spin_unlock_bh(&pernet->lock); 1202 + return ret; 1203 + } 1204 + 1205 + int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) 1206 + { 1207 + struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 1208 + struct sk_buff *msg; 1209 + void *reply; 1210 + 1211 + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1212 + if (!msg) 1213 + return -ENOMEM; 1214 + 1215 + reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 1216 + MPTCP_PM_CMD_GET_LIMITS); 1217 + if (!reply) 1218 + goto fail; 1219 + 1220 + if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, 1221 + READ_ONCE(pernet->add_addr_accept_max))) 1222 + goto fail; 1223 + 1224 + if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, 1225 + READ_ONCE(pernet->subflows_max))) 1226 + goto fail; 1227 + 1228 + genlmsg_end(msg, reply); 1229 + return genlmsg_reply(msg, info); 1230 + 1231 + fail: 1232 + GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 1233 + nlmsg_free(msg); 1234 + return -EMSGSIZE; 1235 + } 1236 + 1237 + static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, 1238 + struct mptcp_addr_info *addr) 1239 + { 1240 + struct mptcp_rm_list list = { .nr = 0 }; 1241 + 1242 + list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 1243 + 1244 + spin_lock_bh(&msk->pm.lock); 1245 + mptcp_pm_rm_subflow(msk, &list); 1246 + __mark_subflow_endp_available(msk, list.ids[0]); 1247 + mptcp_pm_create_subflow_or_signal_addr(msk); 1248 + spin_unlock_bh(&msk->pm.lock); 1249 + } 1250 + 1251 + static void mptcp_pm_nl_set_flags_all(struct net *net, 1252 + struct mptcp_pm_addr_entry *local, 1253 + u8 changed) 1254 + { 1255 + u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); 1256 + u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 1257 + long s_slot = 0, s_num = 0; 1258 + struct mptcp_sock *msk; 1259 + 1260 + if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) 1261 + return; 1262 + 1263 + while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 1264 + struct sock *sk = (struct sock *)msk; 1265 + 1266 + if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 1267 + goto next; 1268 + 1269 + lock_sock(sk); 1270 + if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) 1271 + mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); 1272 + /* Subflows will only be recreated if the SUBFLOW flag is set */ 1273 + if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) 1274 + mptcp_pm_nl_fullmesh(msk, &local->addr); 1275 + release_sock(sk); 1276 + 1277 + next: 1278 + sock_put(sk); 1279 + cond_resched(); 1280 + } 1281 + } 1282 + 1283 + int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, 1284 + struct genl_info *info) 1285 + { 1286 + struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 1287 + u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | 1288 + MPTCP_PM_ADDR_FLAG_FULLMESH; 1289 + struct net *net = genl_info_net(info); 1290 + struct mptcp_pm_addr_entry *entry; 1291 + struct pm_nl_pernet *pernet; 1292 + u8 lookup_by_id = 0; 1293 + 1294 + pernet = pm_nl_get_pernet(net); 1295 + 1296 + if (local->addr.family == AF_UNSPEC) { 1297 + lookup_by_id = 1; 1298 + if (!local->addr.id) { 1299 + NL_SET_ERR_MSG_ATTR(info->extack, attr, 1300 + "missing address ID"); 1301 + return -EOPNOTSUPP; 1302 + } 1303 + } 1304 + 1305 + spin_lock_bh(&pernet->lock); 1306 + entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : 1307 + __lookup_addr(pernet, &local->addr); 1308 + if (!entry) { 1309 + spin_unlock_bh(&pernet->lock); 1310 + NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 1311 + return -EINVAL; 1312 + } 1313 + if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && 1314 + (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | 1315 + MPTCP_PM_ADDR_FLAG_IMPLICIT))) { 1316 + spin_unlock_bh(&pernet->lock); 1317 + NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); 1318 + return -EINVAL; 1319 + } 1320 + 1321 + changed = (local->flags ^ entry->flags) & mask; 1322 + entry->flags = (entry->flags & ~mask) | (local->flags & mask); 1323 + *local = *entry; 1324 + spin_unlock_bh(&pernet->lock); 1325 + 1326 + mptcp_pm_nl_set_flags_all(net, local, changed); 1327 + return 0; 1328 + } 1329 + 1330 + bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) 1331 + { 1332 + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 1333 + 1334 + if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || 1335 + (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, 1336 + MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { 1337 + WRITE_ONCE(msk->pm.work_pending, false); 1338 + return false; 1339 + } 1340 + return true; 1341 + } 1342 + 1343 + /* Called under PM lock */ 1344 + void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) 1345 + { 1346 + struct mptcp_pm_data *pm = &msk->pm; 1347 + 1348 + if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { 1349 + pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); 1350 + mptcp_pm_nl_add_addr_received(msk); 1351 + } 1352 + if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { 1353 + pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); 1354 + mptcp_pm_nl_fully_established(msk); 1355 + } 1356 + if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { 1357 + pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); 1358 + mptcp_pm_nl_subflow_established(msk); 1359 + } 1360 + } 1361 + 1362 + static int __net_init pm_nl_init_net(struct net *net) 1363 + { 1364 + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1365 + 1366 + INIT_LIST_HEAD_RCU(&pernet->local_addr_list); 1367 + 1368 + /* Cit. 2 subflows ought to be enough for anybody. */ 1369 + pernet->subflows_max = 2; 1370 + pernet->next_id = 1; 1371 + pernet->stale_loss_cnt = 4; 1372 + spin_lock_init(&pernet->lock); 1373 + 1374 + /* No need to initialize other pernet fields, the struct is zeroed at 1375 + * allocation time. 1376 + */ 1377 + 1378 + return 0; 1379 + } 1380 + 1381 + static void __net_exit pm_nl_exit_net(struct list_head *net_list) 1382 + { 1383 + struct net *net; 1384 + 1385 + list_for_each_entry(net, net_list, exit_list) { 1386 + struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 1387 + 1388 + /* net is removed from namespace list, can't race with 1389 + * other modifiers, also netns core already waited for a 1390 + * RCU grace period. 1391 + */ 1392 + __flush_addrs(&pernet->local_addr_list); 1393 + } 1394 + } 1395 + 1396 + static struct pernet_operations mptcp_pm_pernet_ops = { 1397 + .init = pm_nl_init_net, 1398 + .exit_batch = pm_nl_exit_net, 1399 + .id = &pm_nl_pernet_id, 1400 + .size = sizeof(struct pm_nl_pernet), 1401 + }; 1402 + 1403 + void __init mptcp_pm_nl_init(void) 1404 + { 1405 + if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 1406 + panic("Failed to register MPTCP PM pernet subsystem.\n"); 1407 + 1408 + if (genl_register_family(&mptcp_genl_family)) 1409 + panic("Failed to register MPTCP PM netlink family\n"); 1410 + }
-1404
net/mptcp/pm_netlink.c
··· 6 6 7 7 #define pr_fmt(fmt) "MPTCP: " fmt 8 8 9 - #include <linux/inet.h> 10 - #include <linux/kernel.h> 11 - #include <net/inet_common.h> 12 - #include <net/netns/generic.h> 13 - #include <net/mptcp.h> 14 - 15 9 #include "protocol.h" 16 - #include "mib.h" 17 10 #include "mptcp_pm_gen.h" 18 - 19 - static int pm_nl_pernet_id; 20 - 21 - struct pm_nl_pernet { 22 - /* protects pernet updates */ 23 - spinlock_t lock; 24 - struct list_head local_addr_list; 25 - unsigned int addrs; 26 - unsigned int stale_loss_cnt; 27 - unsigned int add_addr_signal_max; 28 - unsigned int add_addr_accept_max; 29 - unsigned int local_addr_max; 30 - unsigned int subflows_max; 31 - unsigned int next_id; 32 - DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 33 - }; 34 - 35 - #define MPTCP_PM_ADDR_MAX 8 36 - 37 - static struct pm_nl_pernet *pm_nl_get_pernet(const struct net *net) 38 - { 39 - return net_generic(net, pm_nl_pernet_id); 40 - } 41 - 42 - static struct pm_nl_pernet * 43 - pm_nl_get_pernet_from_msk(const struct mptcp_sock *msk) 44 - { 45 - return pm_nl_get_pernet(sock_net((struct sock *)msk)); 46 - } 47 - 48 - static bool lookup_subflow_by_daddr(const struct list_head *list, 49 - const struct mptcp_addr_info *daddr) 50 - { 51 - struct mptcp_subflow_context *subflow; 52 - struct mptcp_addr_info cur; 53 - 54 - list_for_each_entry(subflow, list, node) { 55 - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); 56 - 57 - if (!((1 << inet_sk_state_load(ssk)) & 58 - (TCPF_ESTABLISHED | TCPF_SYN_SENT | TCPF_SYN_RECV))) 59 - continue; 60 - 61 - mptcp_remote_address((struct sock_common *)ssk, &cur); 62 - if (mptcp_addresses_equal(&cur, daddr, daddr->port)) 63 - return true; 64 - } 65 - 66 - return false; 67 - } 68 - 69 - static bool 70 - select_local_address(const struct pm_nl_pernet *pernet, 71 - const struct mptcp_sock *msk, 72 - struct mptcp_pm_local *new_local) 73 - { 74 - struct mptcp_pm_addr_entry *entry; 75 - bool found = false; 76 - 77 - msk_owned_by_me(msk); 78 - 79 - rcu_read_lock(); 80 - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 81 - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) 82 - continue; 83 - 84 - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 85 - continue; 86 - 87 - new_local->addr = entry->addr; 88 - new_local->flags = entry->flags; 89 - new_local->ifindex = entry->ifindex; 90 - found = true; 91 - break; 92 - } 93 - rcu_read_unlock(); 94 - 95 - return found; 96 - } 97 - 98 - static bool 99 - select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, 100 - struct mptcp_pm_local *new_local) 101 - { 102 - struct mptcp_pm_addr_entry *entry; 103 - bool found = false; 104 - 105 - rcu_read_lock(); 106 - /* do not keep any additional per socket state, just signal 107 - * the address list in order. 108 - * Note: removal from the local address list during the msk life-cycle 109 - * can lead to additional addresses not being announced. 110 - */ 111 - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 112 - if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) 113 - continue; 114 - 115 - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) 116 - continue; 117 - 118 - new_local->addr = entry->addr; 119 - new_local->flags = entry->flags; 120 - new_local->ifindex = entry->ifindex; 121 - found = true; 122 - break; 123 - } 124 - rcu_read_unlock(); 125 - 126 - return found; 127 - } 128 - 129 - unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) 130 - { 131 - const struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 132 - 133 - return READ_ONCE(pernet->add_addr_signal_max); 134 - } 135 - EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_signal_max); 136 - 137 - unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk) 138 - { 139 - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 140 - 141 - return READ_ONCE(pernet->add_addr_accept_max); 142 - } 143 - EXPORT_SYMBOL_GPL(mptcp_pm_get_add_addr_accept_max); 144 - 145 - unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk) 146 - { 147 - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 148 - 149 - return READ_ONCE(pernet->subflows_max); 150 - } 151 - EXPORT_SYMBOL_GPL(mptcp_pm_get_subflows_max); 152 - 153 - unsigned int mptcp_pm_get_local_addr_max(const struct mptcp_sock *msk) 154 - { 155 - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 156 - 157 - return READ_ONCE(pernet->local_addr_max); 158 - } 159 - EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max); 160 - 161 - bool mptcp_pm_nl_check_work_pending(struct mptcp_sock *msk) 162 - { 163 - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 164 - 165 - if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) || 166 - (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap, 167 - MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1)) { 168 - WRITE_ONCE(msk->pm.work_pending, false); 169 - return false; 170 - } 171 - return true; 172 - } 173 - 174 - /* Fill all the remote addresses into the array addrs[], 175 - * and return the array size. 176 - */ 177 - static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, 178 - struct mptcp_addr_info *local, 179 - bool fullmesh, 180 - struct mptcp_addr_info *addrs) 181 - { 182 - bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0); 183 - struct sock *sk = (struct sock *)msk, *ssk; 184 - struct mptcp_subflow_context *subflow; 185 - struct mptcp_addr_info remote = { 0 }; 186 - unsigned int subflows_max; 187 - int i = 0; 188 - 189 - subflows_max = mptcp_pm_get_subflows_max(msk); 190 - mptcp_remote_address((struct sock_common *)sk, &remote); 191 - 192 - /* Non-fullmesh endpoint, fill in the single entry 193 - * corresponding to the primary MPC subflow remote address 194 - */ 195 - if (!fullmesh) { 196 - if (deny_id0) 197 - return 0; 198 - 199 - if (!mptcp_pm_addr_families_match(sk, local, &remote)) 200 - return 0; 201 - 202 - msk->pm.subflows++; 203 - addrs[i++] = remote; 204 - } else { 205 - DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 206 - 207 - /* Forbid creation of new subflows matching existing 208 - * ones, possibly already created by incoming ADD_ADDR 209 - */ 210 - bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1); 211 - mptcp_for_each_subflow(msk, subflow) 212 - if (READ_ONCE(subflow->local_id) == local->id) 213 - __set_bit(subflow->remote_id, unavail_id); 214 - 215 - mptcp_for_each_subflow(msk, subflow) { 216 - ssk = mptcp_subflow_tcp_sock(subflow); 217 - mptcp_remote_address((struct sock_common *)ssk, &addrs[i]); 218 - addrs[i].id = READ_ONCE(subflow->remote_id); 219 - if (deny_id0 && !addrs[i].id) 220 - continue; 221 - 222 - if (test_bit(addrs[i].id, unavail_id)) 223 - continue; 224 - 225 - if (!mptcp_pm_addr_families_match(sk, local, &addrs[i])) 226 - continue; 227 - 228 - if (msk->pm.subflows < subflows_max) { 229 - /* forbid creating multiple address towards 230 - * this id 231 - */ 232 - __set_bit(addrs[i].id, unavail_id); 233 - msk->pm.subflows++; 234 - i++; 235 - } 236 - } 237 - } 238 - 239 - return i; 240 - } 241 - 242 - static struct mptcp_pm_addr_entry * 243 - __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) 244 - { 245 - struct mptcp_pm_addr_entry *entry; 246 - 247 - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, 248 - lockdep_is_held(&pernet->lock)) { 249 - if (entry->addr.id == id) 250 - return entry; 251 - } 252 - return NULL; 253 - } 254 - 255 - static struct mptcp_pm_addr_entry * 256 - __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) 257 - { 258 - struct mptcp_pm_addr_entry *entry; 259 - 260 - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list, 261 - lockdep_is_held(&pernet->lock)) { 262 - if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port)) 263 - return entry; 264 - } 265 - return NULL; 266 - } 267 - 268 - static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) 269 - { 270 - struct sock *sk = (struct sock *)msk; 271 - unsigned int add_addr_signal_max; 272 - bool signal_and_subflow = false; 273 - unsigned int local_addr_max; 274 - struct pm_nl_pernet *pernet; 275 - struct mptcp_pm_local local; 276 - unsigned int subflows_max; 277 - 278 - pernet = pm_nl_get_pernet(sock_net(sk)); 279 - 280 - add_addr_signal_max = mptcp_pm_get_add_addr_signal_max(msk); 281 - local_addr_max = mptcp_pm_get_local_addr_max(msk); 282 - subflows_max = mptcp_pm_get_subflows_max(msk); 283 - 284 - /* do lazy endpoint usage accounting for the MPC subflows */ 285 - if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) { 286 - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(msk->first); 287 - struct mptcp_pm_addr_entry *entry; 288 - struct mptcp_addr_info mpc_addr; 289 - bool backup = false; 290 - 291 - mptcp_local_address((struct sock_common *)msk->first, &mpc_addr); 292 - rcu_read_lock(); 293 - entry = __lookup_addr(pernet, &mpc_addr); 294 - if (entry) { 295 - __clear_bit(entry->addr.id, msk->pm.id_avail_bitmap); 296 - msk->mpc_endpoint_id = entry->addr.id; 297 - backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 298 - } 299 - rcu_read_unlock(); 300 - 301 - if (backup) 302 - mptcp_pm_send_ack(msk, subflow, true, backup); 303 - 304 - msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED); 305 - } 306 - 307 - pr_debug("local %d:%d signal %d:%d subflows %d:%d\n", 308 - msk->pm.local_addr_used, local_addr_max, 309 - msk->pm.add_addr_signaled, add_addr_signal_max, 310 - msk->pm.subflows, subflows_max); 311 - 312 - /* check first for announce */ 313 - if (msk->pm.add_addr_signaled < add_addr_signal_max) { 314 - /* due to racing events on both ends we can reach here while 315 - * previous add address is still running: if we invoke now 316 - * mptcp_pm_announce_addr(), that will fail and the 317 - * corresponding id will be marked as used. 318 - * Instead let the PM machinery reschedule us when the 319 - * current address announce will be completed. 320 - */ 321 - if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) 322 - return; 323 - 324 - if (!select_signal_address(pernet, msk, &local)) 325 - goto subflow; 326 - 327 - /* If the alloc fails, we are on memory pressure, not worth 328 - * continuing, and trying to create subflows. 329 - */ 330 - if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) 331 - return; 332 - 333 - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 334 - msk->pm.add_addr_signaled++; 335 - 336 - /* Special case for ID0: set the correct ID */ 337 - if (local.addr.id == msk->mpc_endpoint_id) 338 - local.addr.id = 0; 339 - 340 - mptcp_pm_announce_addr(msk, &local.addr, false); 341 - mptcp_pm_addr_send_ack(msk); 342 - 343 - if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) 344 - signal_and_subflow = true; 345 - } 346 - 347 - subflow: 348 - /* check if should create a new subflow */ 349 - while (msk->pm.local_addr_used < local_addr_max && 350 - msk->pm.subflows < subflows_max) { 351 - struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; 352 - bool fullmesh; 353 - int i, nr; 354 - 355 - if (signal_and_subflow) 356 - signal_and_subflow = false; 357 - else if (!select_local_address(pernet, msk, &local)) 358 - break; 359 - 360 - fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); 361 - 362 - __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); 363 - 364 - /* Special case for ID0: set the correct ID */ 365 - if (local.addr.id == msk->mpc_endpoint_id) 366 - local.addr.id = 0; 367 - else /* local_addr_used is not decr for ID 0 */ 368 - msk->pm.local_addr_used++; 369 - 370 - nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); 371 - if (nr == 0) 372 - continue; 373 - 374 - spin_unlock_bh(&msk->pm.lock); 375 - for (i = 0; i < nr; i++) 376 - __mptcp_subflow_connect(sk, &local, &addrs[i]); 377 - spin_lock_bh(&msk->pm.lock); 378 - } 379 - mptcp_pm_nl_check_work_pending(msk); 380 - } 381 - 382 - static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk) 383 - { 384 - mptcp_pm_create_subflow_or_signal_addr(msk); 385 - } 386 - 387 - static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk) 388 - { 389 - mptcp_pm_create_subflow_or_signal_addr(msk); 390 - } 391 - 392 - /* Fill all the local addresses into the array addrs[], 393 - * and return the array size. 394 - */ 395 - static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, 396 - struct mptcp_addr_info *remote, 397 - struct mptcp_pm_local *locals) 398 - { 399 - struct sock *sk = (struct sock *)msk; 400 - struct mptcp_pm_addr_entry *entry; 401 - struct mptcp_addr_info mpc_addr; 402 - struct pm_nl_pernet *pernet; 403 - unsigned int subflows_max; 404 - int i = 0; 405 - 406 - pernet = pm_nl_get_pernet_from_msk(msk); 407 - subflows_max = mptcp_pm_get_subflows_max(msk); 408 - 409 - mptcp_local_address((struct sock_common *)msk, &mpc_addr); 410 - 411 - rcu_read_lock(); 412 - list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { 413 - if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) 414 - continue; 415 - 416 - if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote)) 417 - continue; 418 - 419 - if (msk->pm.subflows < subflows_max) { 420 - locals[i].addr = entry->addr; 421 - locals[i].flags = entry->flags; 422 - locals[i].ifindex = entry->ifindex; 423 - 424 - /* Special case for ID0: set the correct ID */ 425 - if (mptcp_addresses_equal(&locals[i].addr, &mpc_addr, locals[i].addr.port)) 426 - locals[i].addr.id = 0; 427 - 428 - msk->pm.subflows++; 429 - i++; 430 - } 431 - } 432 - rcu_read_unlock(); 433 - 434 - /* If the array is empty, fill in the single 435 - * 'IPADDRANY' local address 436 - */ 437 - if (!i) { 438 - memset(&locals[i], 0, sizeof(locals[i])); 439 - locals[i].addr.family = 440 - #if IS_ENABLED(CONFIG_MPTCP_IPV6) 441 - remote->family == AF_INET6 && 442 - ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : 443 - #endif 444 - remote->family; 445 - 446 - if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) 447 - return 0; 448 - 449 - msk->pm.subflows++; 450 - i++; 451 - } 452 - 453 - return i; 454 - } 455 - 456 - static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) 457 - { 458 - struct mptcp_pm_local locals[MPTCP_PM_ADDR_MAX]; 459 - struct sock *sk = (struct sock *)msk; 460 - unsigned int add_addr_accept_max; 461 - struct mptcp_addr_info remote; 462 - unsigned int subflows_max; 463 - bool sf_created = false; 464 - int i, nr; 465 - 466 - add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); 467 - subflows_max = mptcp_pm_get_subflows_max(msk); 468 - 469 - pr_debug("accepted %d:%d remote family %d\n", 470 - msk->pm.add_addr_accepted, add_addr_accept_max, 471 - msk->pm.remote.family); 472 - 473 - remote = msk->pm.remote; 474 - mptcp_pm_announce_addr(msk, &remote, true); 475 - mptcp_pm_addr_send_ack(msk); 476 - 477 - if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) 478 - return; 479 - 480 - /* pick id 0 port, if none is provided the remote address */ 481 - if (!remote.port) 482 - remote.port = sk->sk_dport; 483 - 484 - /* connect to the specified remote address, using whatever 485 - * local address the routing configuration will pick. 486 - */ 487 - nr = fill_local_addresses_vec(msk, &remote, locals); 488 - if (nr == 0) 489 - return; 490 - 491 - spin_unlock_bh(&msk->pm.lock); 492 - for (i = 0; i < nr; i++) 493 - if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0) 494 - sf_created = true; 495 - spin_lock_bh(&msk->pm.lock); 496 - 497 - if (sf_created) { 498 - /* add_addr_accepted is not decr for ID 0 */ 499 - if (remote.id) 500 - msk->pm.add_addr_accepted++; 501 - if (msk->pm.add_addr_accepted >= add_addr_accept_max || 502 - msk->pm.subflows >= subflows_max) 503 - WRITE_ONCE(msk->pm.accept_addr, false); 504 - } 505 - } 506 - 507 - void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id) 508 - { 509 - if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { 510 - /* Note: if the subflow has been closed before, this 511 - * add_addr_accepted counter will not be decremented. 512 - */ 513 - if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) 514 - WRITE_ONCE(msk->pm.accept_addr, true); 515 - } 516 - } 517 - 518 - /* Called under PM lock */ 519 - void __mptcp_pm_kernel_worker(struct mptcp_sock *msk) 520 - { 521 - struct mptcp_pm_data *pm = &msk->pm; 522 - 523 - if (pm->status & BIT(MPTCP_PM_ADD_ADDR_RECEIVED)) { 524 - pm->status &= ~BIT(MPTCP_PM_ADD_ADDR_RECEIVED); 525 - mptcp_pm_nl_add_addr_received(msk); 526 - } 527 - if (pm->status & BIT(MPTCP_PM_ESTABLISHED)) { 528 - pm->status &= ~BIT(MPTCP_PM_ESTABLISHED); 529 - mptcp_pm_nl_fully_established(msk); 530 - } 531 - if (pm->status & BIT(MPTCP_PM_SUBFLOW_ESTABLISHED)) { 532 - pm->status &= ~BIT(MPTCP_PM_SUBFLOW_ESTABLISHED); 533 - mptcp_pm_nl_subflow_established(msk); 534 - } 535 - } 536 - 537 - static bool address_use_port(struct mptcp_pm_addr_entry *entry) 538 - { 539 - return (entry->flags & 540 - (MPTCP_PM_ADDR_FLAG_SIGNAL | MPTCP_PM_ADDR_FLAG_SUBFLOW)) == 541 - MPTCP_PM_ADDR_FLAG_SIGNAL; 542 - } 543 - 544 - /* caller must ensure the RCU grace period is already elapsed */ 545 - static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry) 546 - { 547 - if (entry->lsk) 548 - sock_release(entry->lsk); 549 - kfree(entry); 550 - } 551 - 552 - static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet, 553 - struct mptcp_pm_addr_entry *entry, 554 - bool needs_id, bool replace) 555 - { 556 - struct mptcp_pm_addr_entry *cur, *del_entry = NULL; 557 - unsigned int addr_max; 558 - int ret = -EINVAL; 559 - 560 - spin_lock_bh(&pernet->lock); 561 - /* to keep the code simple, don't do IDR-like allocation for address ID, 562 - * just bail when we exceed limits 563 - */ 564 - if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID) 565 - pernet->next_id = 1; 566 - if (pernet->addrs >= MPTCP_PM_ADDR_MAX) { 567 - ret = -ERANGE; 568 - goto out; 569 - } 570 - if (test_bit(entry->addr.id, pernet->id_bitmap)) { 571 - ret = -EBUSY; 572 - goto out; 573 - } 574 - 575 - /* do not insert duplicate address, differentiate on port only 576 - * singled addresses 577 - */ 578 - if (!address_use_port(entry)) 579 - entry->addr.port = 0; 580 - list_for_each_entry(cur, &pernet->local_addr_list, list) { 581 - if (mptcp_addresses_equal(&cur->addr, &entry->addr, 582 - cur->addr.port || entry->addr.port)) { 583 - /* allow replacing the exiting endpoint only if such 584 - * endpoint is an implicit one and the user-space 585 - * did not provide an endpoint id 586 - */ 587 - if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) { 588 - ret = -EEXIST; 589 - goto out; 590 - } 591 - if (entry->addr.id) 592 - goto out; 593 - 594 - /* allow callers that only need to look up the local 595 - * addr's id to skip replacement. This allows them to 596 - * avoid calling synchronize_rcu in the packet recv 597 - * path. 598 - */ 599 - if (!replace) { 600 - kfree(entry); 601 - ret = cur->addr.id; 602 - goto out; 603 - } 604 - 605 - pernet->addrs--; 606 - entry->addr.id = cur->addr.id; 607 - list_del_rcu(&cur->list); 608 - del_entry = cur; 609 - break; 610 - } 611 - } 612 - 613 - if (!entry->addr.id && needs_id) { 614 - find_next: 615 - entry->addr.id = find_next_zero_bit(pernet->id_bitmap, 616 - MPTCP_PM_MAX_ADDR_ID + 1, 617 - pernet->next_id); 618 - if (!entry->addr.id && pernet->next_id != 1) { 619 - pernet->next_id = 1; 620 - goto find_next; 621 - } 622 - } 623 - 624 - if (!entry->addr.id && needs_id) 625 - goto out; 626 - 627 - __set_bit(entry->addr.id, pernet->id_bitmap); 628 - if (entry->addr.id > pernet->next_id) 629 - pernet->next_id = entry->addr.id; 630 - 631 - if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 632 - addr_max = pernet->add_addr_signal_max; 633 - WRITE_ONCE(pernet->add_addr_signal_max, addr_max + 1); 634 - } 635 - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 636 - addr_max = pernet->local_addr_max; 637 - WRITE_ONCE(pernet->local_addr_max, addr_max + 1); 638 - } 639 - 640 - pernet->addrs++; 641 - if (!entry->addr.port) 642 - list_add_tail_rcu(&entry->list, &pernet->local_addr_list); 643 - else 644 - list_add_rcu(&entry->list, &pernet->local_addr_list); 645 - ret = entry->addr.id; 646 - 647 - out: 648 - spin_unlock_bh(&pernet->lock); 649 - 650 - /* just replaced an existing entry, free it */ 651 - if (del_entry) { 652 - synchronize_rcu(); 653 - __mptcp_pm_release_addr_entry(del_entry); 654 - } 655 - return ret; 656 - } 657 - 658 - static struct lock_class_key mptcp_slock_keys[2]; 659 - static struct lock_class_key mptcp_keys[2]; 660 - 661 - static int mptcp_pm_nl_create_listen_socket(struct sock *sk, 662 - struct mptcp_pm_addr_entry *entry) 663 - { 664 - bool is_ipv6 = sk->sk_family == AF_INET6; 665 - int addrlen = sizeof(struct sockaddr_in); 666 - struct sockaddr_storage addr; 667 - struct sock *newsk, *ssk; 668 - int backlog = 1024; 669 - int err; 670 - 671 - err = sock_create_kern(sock_net(sk), entry->addr.family, 672 - SOCK_STREAM, IPPROTO_MPTCP, &entry->lsk); 673 - if (err) 674 - return err; 675 - 676 - newsk = entry->lsk->sk; 677 - if (!newsk) 678 - return -EINVAL; 679 - 680 - /* The subflow socket lock is acquired in a nested to the msk one 681 - * in several places, even by the TCP stack, and this msk is a kernel 682 - * socket: lockdep complains. Instead of propagating the _nested 683 - * modifiers in several places, re-init the lock class for the msk 684 - * socket to an mptcp specific one. 685 - */ 686 - sock_lock_init_class_and_name(newsk, 687 - is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET", 688 - &mptcp_slock_keys[is_ipv6], 689 - is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET", 690 - &mptcp_keys[is_ipv6]); 691 - 692 - lock_sock(newsk); 693 - ssk = __mptcp_nmpc_sk(mptcp_sk(newsk)); 694 - release_sock(newsk); 695 - if (IS_ERR(ssk)) 696 - return PTR_ERR(ssk); 697 - 698 - mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); 699 - #if IS_ENABLED(CONFIG_MPTCP_IPV6) 700 - if (entry->addr.family == AF_INET6) 701 - addrlen = sizeof(struct sockaddr_in6); 702 - #endif 703 - if (ssk->sk_family == AF_INET) 704 - err = inet_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 705 - #if IS_ENABLED(CONFIG_MPTCP_IPV6) 706 - else if (ssk->sk_family == AF_INET6) 707 - err = inet6_bind_sk(ssk, (struct sockaddr *)&addr, addrlen); 708 - #endif 709 - if (err) 710 - return err; 711 - 712 - /* We don't use mptcp_set_state() here because it needs to be called 713 - * under the msk socket lock. For the moment, that will not bring 714 - * anything more than only calling inet_sk_state_store(), because the 715 - * old status is known (TCP_CLOSE). 716 - */ 717 - inet_sk_state_store(newsk, TCP_LISTEN); 718 - lock_sock(ssk); 719 - WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true); 720 - err = __inet_listen_sk(ssk, backlog); 721 - if (!err) 722 - mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED); 723 - release_sock(ssk); 724 - return err; 725 - } 726 - 727 - int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, 728 - struct mptcp_pm_addr_entry *skc) 729 - { 730 - struct mptcp_pm_addr_entry *entry; 731 - struct pm_nl_pernet *pernet; 732 - int ret; 733 - 734 - pernet = pm_nl_get_pernet_from_msk(msk); 735 - 736 - rcu_read_lock(); 737 - entry = __lookup_addr(pernet, &skc->addr); 738 - ret = entry ? entry->addr.id : -1; 739 - rcu_read_unlock(); 740 - if (ret >= 0) 741 - return ret; 742 - 743 - /* address not found, add to local list */ 744 - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 745 - if (!entry) 746 - return -ENOMEM; 747 - 748 - *entry = *skc; 749 - entry->addr.port = 0; 750 - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); 751 - if (ret < 0) 752 - kfree(entry); 753 - 754 - return ret; 755 - } 756 - 757 - bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) 758 - { 759 - struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); 760 - struct mptcp_pm_addr_entry *entry; 761 - bool backup; 762 - 763 - rcu_read_lock(); 764 - entry = __lookup_addr(pernet, skc); 765 - backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 766 - rcu_read_unlock(); 767 - 768 - return backup; 769 - } 770 11 771 12 #define MPTCP_PM_CMD_GRP_OFFSET 0 772 13 #define MPTCP_PM_EV_GRP_OFFSET 1 ··· 127 886 return 0; 128 887 } 129 888 130 - static struct pm_nl_pernet *genl_info_pm_nl(struct genl_info *info) 131 - { 132 - return pm_nl_get_pernet(genl_info_net(info)); 133 - } 134 - 135 - static int mptcp_nl_add_subflow_or_signal_addr(struct net *net, 136 - struct mptcp_addr_info *addr) 137 - { 138 - struct mptcp_sock *msk; 139 - long s_slot = 0, s_num = 0; 140 - 141 - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 142 - struct sock *sk = (struct sock *)msk; 143 - struct mptcp_addr_info mpc_addr; 144 - 145 - if (!READ_ONCE(msk->fully_established) || 146 - mptcp_pm_is_userspace(msk)) 147 - goto next; 148 - 149 - /* if the endp linked to the init sf is re-added with a != ID */ 150 - mptcp_local_address((struct sock_common *)msk, &mpc_addr); 151 - 152 - lock_sock(sk); 153 - spin_lock_bh(&msk->pm.lock); 154 - if (mptcp_addresses_equal(addr, &mpc_addr, addr->port)) 155 - msk->mpc_endpoint_id = addr->id; 156 - mptcp_pm_create_subflow_or_signal_addr(msk); 157 - spin_unlock_bh(&msk->pm.lock); 158 - release_sock(sk); 159 - 160 - next: 161 - sock_put(sk); 162 - cond_resched(); 163 - } 164 - 165 - return 0; 166 - } 167 - 168 - static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr, 169 - struct genl_info *info) 170 - { 171 - struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1]; 172 - 173 - if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr, 174 - mptcp_pm_address_nl_policy, info->extack) && 175 - tb[MPTCP_PM_ADDR_ATTR_ID]) 176 - return true; 177 - return false; 178 - } 179 - 180 - int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) 181 - { 182 - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 183 - struct mptcp_pm_addr_entry addr, *entry; 184 - struct nlattr *attr; 185 - int ret; 186 - 187 - if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 188 - return -EINVAL; 189 - 190 - attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 191 - ret = mptcp_pm_parse_entry(attr, info, true, &addr); 192 - if (ret < 0) 193 - return ret; 194 - 195 - if (addr.addr.port && !address_use_port(&addr)) { 196 - NL_SET_ERR_MSG_ATTR(info->extack, attr, 197 - "flags must have signal and not subflow when using port"); 198 - return -EINVAL; 199 - } 200 - 201 - if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL && 202 - addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) { 203 - NL_SET_ERR_MSG_ATTR(info->extack, attr, 204 - "flags mustn't have both signal and fullmesh"); 205 - return -EINVAL; 206 - } 207 - 208 - if (addr.flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) { 209 - NL_SET_ERR_MSG_ATTR(info->extack, attr, 210 - "can't create IMPLICIT endpoint"); 211 - return -EINVAL; 212 - } 213 - 214 - entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); 215 - if (!entry) { 216 - GENL_SET_ERR_MSG(info, "can't allocate addr"); 217 - return -ENOMEM; 218 - } 219 - 220 - *entry = addr; 221 - if (entry->addr.port) { 222 - ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); 223 - if (ret) { 224 - GENL_SET_ERR_MSG_FMT(info, "create listen socket error: %d", ret); 225 - goto out_free; 226 - } 227 - } 228 - ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, 229 - !mptcp_pm_has_addr_attr_id(attr, info), 230 - true); 231 - if (ret < 0) { 232 - GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret); 233 - goto out_free; 234 - } 235 - 236 - mptcp_nl_add_subflow_or_signal_addr(sock_net(skb->sk), &entry->addr); 237 - return 0; 238 - 239 - out_free: 240 - __mptcp_pm_release_addr_entry(entry); 241 - return ret; 242 - } 243 - 244 - static u8 mptcp_endp_get_local_id(struct mptcp_sock *msk, 245 - const struct mptcp_addr_info *addr) 246 - { 247 - return msk->mpc_endpoint_id == addr->id ? 0 : addr->id; 248 - } 249 - 250 - static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, 251 - const struct mptcp_addr_info *addr, 252 - bool force) 253 - { 254 - struct mptcp_rm_list list = { .nr = 0 }; 255 - bool ret; 256 - 257 - list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 258 - 259 - ret = mptcp_remove_anno_list_by_saddr(msk, addr); 260 - if (ret || force) { 261 - spin_lock_bh(&msk->pm.lock); 262 - if (ret) { 263 - __set_bit(addr->id, msk->pm.id_avail_bitmap); 264 - msk->pm.add_addr_signaled--; 265 - } 266 - mptcp_pm_remove_addr(msk, &list); 267 - spin_unlock_bh(&msk->pm.lock); 268 - } 269 - return ret; 270 - } 271 - 272 - static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) 273 - { 274 - /* If it was marked as used, and not ID 0, decrement local_addr_used */ 275 - if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && 276 - id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) 277 - msk->pm.local_addr_used--; 278 - } 279 - 280 - static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, 281 - const struct mptcp_pm_addr_entry *entry) 282 - { 283 - const struct mptcp_addr_info *addr = &entry->addr; 284 - struct mptcp_rm_list list = { .nr = 1 }; 285 - long s_slot = 0, s_num = 0; 286 - struct mptcp_sock *msk; 287 - 288 - pr_debug("remove_id=%d\n", addr->id); 289 - 290 - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 291 - struct sock *sk = (struct sock *)msk; 292 - bool remove_subflow; 293 - 294 - if (mptcp_pm_is_userspace(msk)) 295 - goto next; 296 - 297 - lock_sock(sk); 298 - remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); 299 - mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && 300 - !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); 301 - 302 - list.ids[0] = mptcp_endp_get_local_id(msk, addr); 303 - if (remove_subflow) { 304 - spin_lock_bh(&msk->pm.lock); 305 - mptcp_pm_rm_subflow(msk, &list); 306 - spin_unlock_bh(&msk->pm.lock); 307 - } 308 - 309 - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 310 - spin_lock_bh(&msk->pm.lock); 311 - __mark_subflow_endp_available(msk, list.ids[0]); 312 - spin_unlock_bh(&msk->pm.lock); 313 - } 314 - 315 - if (msk->mpc_endpoint_id == entry->addr.id) 316 - msk->mpc_endpoint_id = 0; 317 - release_sock(sk); 318 - 319 - next: 320 - sock_put(sk); 321 - cond_resched(); 322 - } 323 - 324 - return 0; 325 - } 326 - 327 - static int mptcp_nl_remove_id_zero_address(struct net *net, 328 - struct mptcp_addr_info *addr) 329 - { 330 - struct mptcp_rm_list list = { .nr = 0 }; 331 - long s_slot = 0, s_num = 0; 332 - struct mptcp_sock *msk; 333 - 334 - list.ids[list.nr++] = 0; 335 - 336 - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 337 - struct sock *sk = (struct sock *)msk; 338 - struct mptcp_addr_info msk_local; 339 - 340 - if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 341 - goto next; 342 - 343 - mptcp_local_address((struct sock_common *)msk, &msk_local); 344 - if (!mptcp_addresses_equal(&msk_local, addr, addr->port)) 345 - goto next; 346 - 347 - lock_sock(sk); 348 - spin_lock_bh(&msk->pm.lock); 349 - mptcp_pm_remove_addr(msk, &list); 350 - mptcp_pm_rm_subflow(msk, &list); 351 - __mark_subflow_endp_available(msk, 0); 352 - spin_unlock_bh(&msk->pm.lock); 353 - release_sock(sk); 354 - 355 - next: 356 - sock_put(sk); 357 - cond_resched(); 358 - } 359 - 360 - return 0; 361 - } 362 - 363 - int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) 364 - { 365 - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 366 - struct mptcp_pm_addr_entry addr, *entry; 367 - unsigned int addr_max; 368 - struct nlattr *attr; 369 - int ret; 370 - 371 - if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) 372 - return -EINVAL; 373 - 374 - attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; 375 - ret = mptcp_pm_parse_entry(attr, info, false, &addr); 376 - if (ret < 0) 377 - return ret; 378 - 379 - /* the zero id address is special: the first address used by the msk 380 - * always gets such an id, so different subflows can have different zero 381 - * id addresses. Additionally zero id is not accounted for in id_bitmap. 382 - * Let's use an 'mptcp_rm_list' instead of the common remove code. 383 - */ 384 - if (addr.addr.id == 0) 385 - return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr); 386 - 387 - spin_lock_bh(&pernet->lock); 388 - entry = __lookup_addr_by_id(pernet, addr.addr.id); 389 - if (!entry) { 390 - NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 391 - spin_unlock_bh(&pernet->lock); 392 - return -EINVAL; 393 - } 394 - if (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { 395 - addr_max = pernet->add_addr_signal_max; 396 - WRITE_ONCE(pernet->add_addr_signal_max, addr_max - 1); 397 - } 398 - if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { 399 - addr_max = pernet->local_addr_max; 400 - WRITE_ONCE(pernet->local_addr_max, addr_max - 1); 401 - } 402 - 403 - pernet->addrs--; 404 - list_del_rcu(&entry->list); 405 - __clear_bit(entry->addr.id, pernet->id_bitmap); 406 - spin_unlock_bh(&pernet->lock); 407 - 408 - mptcp_nl_remove_subflow_and_signal_addr(sock_net(skb->sk), entry); 409 - synchronize_rcu(); 410 - __mptcp_pm_release_addr_entry(entry); 411 - 412 - return ret; 413 - } 414 - 415 - static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, 416 - struct list_head *rm_list) 417 - { 418 - struct mptcp_rm_list alist = { .nr = 0 }, slist = { .nr = 0 }; 419 - struct mptcp_pm_addr_entry *entry; 420 - 421 - list_for_each_entry(entry, rm_list, list) { 422 - if (slist.nr < MPTCP_RM_IDS_MAX && 423 - mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) 424 - slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 425 - 426 - if (alist.nr < MPTCP_RM_IDS_MAX && 427 - mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) 428 - alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); 429 - } 430 - 431 - spin_lock_bh(&msk->pm.lock); 432 - if (alist.nr) { 433 - msk->pm.add_addr_signaled -= alist.nr; 434 - mptcp_pm_remove_addr(msk, &alist); 435 - } 436 - if (slist.nr) 437 - mptcp_pm_rm_subflow(msk, &slist); 438 - /* Reset counters: maybe some subflows have been removed before */ 439 - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 440 - msk->pm.local_addr_used = 0; 441 - spin_unlock_bh(&msk->pm.lock); 442 - } 443 - 444 - static void mptcp_nl_flush_addrs_list(struct net *net, 445 - struct list_head *rm_list) 446 - { 447 - long s_slot = 0, s_num = 0; 448 - struct mptcp_sock *msk; 449 - 450 - if (list_empty(rm_list)) 451 - return; 452 - 453 - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 454 - struct sock *sk = (struct sock *)msk; 455 - 456 - if (!mptcp_pm_is_userspace(msk)) { 457 - lock_sock(sk); 458 - mptcp_pm_flush_addrs_and_subflows(msk, rm_list); 459 - release_sock(sk); 460 - } 461 - 462 - sock_put(sk); 463 - cond_resched(); 464 - } 465 - } 466 - 467 - /* caller must ensure the RCU grace period is already elapsed */ 468 - static void __flush_addrs(struct list_head *list) 469 - { 470 - while (!list_empty(list)) { 471 - struct mptcp_pm_addr_entry *cur; 472 - 473 - cur = list_entry(list->next, 474 - struct mptcp_pm_addr_entry, list); 475 - list_del_rcu(&cur->list); 476 - __mptcp_pm_release_addr_entry(cur); 477 - } 478 - } 479 - 480 - static void __reset_counters(struct pm_nl_pernet *pernet) 481 - { 482 - WRITE_ONCE(pernet->add_addr_signal_max, 0); 483 - WRITE_ONCE(pernet->add_addr_accept_max, 0); 484 - WRITE_ONCE(pernet->local_addr_max, 0); 485 - pernet->addrs = 0; 486 - } 487 - 488 - int mptcp_pm_nl_flush_addrs_doit(struct sk_buff *skb, struct genl_info *info) 489 - { 490 - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 491 - LIST_HEAD(free_list); 492 - 493 - spin_lock_bh(&pernet->lock); 494 - list_splice_init(&pernet->local_addr_list, &free_list); 495 - __reset_counters(pernet); 496 - pernet->next_id = 1; 497 - bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); 498 - spin_unlock_bh(&pernet->lock); 499 - mptcp_nl_flush_addrs_list(sock_net(skb->sk), &free_list); 500 - synchronize_rcu(); 501 - __flush_addrs(&free_list); 502 - return 0; 503 - } 504 - 505 889 int mptcp_nl_fill_addr(struct sk_buff *skb, 506 890 struct mptcp_pm_addr_entry *entry) 507 891 { ··· 164 1298 nla_put_failure: 165 1299 nla_nest_cancel(skb, attr); 166 1300 return -EMSGSIZE; 167 - } 168 - 169 - int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, 170 - struct genl_info *info) 171 - { 172 - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 173 - struct mptcp_pm_addr_entry *entry; 174 - int ret = -EINVAL; 175 - 176 - rcu_read_lock(); 177 - entry = __lookup_addr_by_id(pernet, id); 178 - if (entry) { 179 - *addr = *entry; 180 - ret = 0; 181 - } 182 - rcu_read_unlock(); 183 - 184 - return ret; 185 - } 186 - 187 - int mptcp_pm_nl_dump_addr(struct sk_buff *msg, 188 - struct netlink_callback *cb) 189 - { 190 - struct net *net = sock_net(msg->sk); 191 - struct mptcp_pm_addr_entry *entry; 192 - struct pm_nl_pernet *pernet; 193 - int id = cb->args[0]; 194 - int i; 195 - 196 - pernet = pm_nl_get_pernet(net); 197 - 198 - rcu_read_lock(); 199 - for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { 200 - if (test_bit(i, pernet->id_bitmap)) { 201 - entry = __lookup_addr_by_id(pernet, i); 202 - if (!entry) 203 - break; 204 - 205 - if (entry->addr.id <= id) 206 - continue; 207 - 208 - if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) 209 - break; 210 - 211 - id = entry->addr.id; 212 - } 213 - } 214 - rcu_read_unlock(); 215 - 216 - cb->args[0] = id; 217 - return msg->len; 218 - } 219 - 220 - static int parse_limit(struct genl_info *info, int id, unsigned int *limit) 221 - { 222 - struct nlattr *attr = info->attrs[id]; 223 - 224 - if (!attr) 225 - return 0; 226 - 227 - *limit = nla_get_u32(attr); 228 - if (*limit > MPTCP_PM_ADDR_MAX) { 229 - NL_SET_ERR_MSG_ATTR_FMT(info->extack, attr, 230 - "limit greater than maximum (%u)", 231 - MPTCP_PM_ADDR_MAX); 232 - return -EINVAL; 233 - } 234 - return 0; 235 - } 236 - 237 - int mptcp_pm_nl_set_limits_doit(struct sk_buff *skb, struct genl_info *info) 238 - { 239 - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 240 - unsigned int rcv_addrs, subflows; 241 - int ret; 242 - 243 - spin_lock_bh(&pernet->lock); 244 - rcv_addrs = pernet->add_addr_accept_max; 245 - ret = parse_limit(info, MPTCP_PM_ATTR_RCV_ADD_ADDRS, &rcv_addrs); 246 - if (ret) 247 - goto unlock; 248 - 249 - subflows = pernet->subflows_max; 250 - ret = parse_limit(info, MPTCP_PM_ATTR_SUBFLOWS, &subflows); 251 - if (ret) 252 - goto unlock; 253 - 254 - WRITE_ONCE(pernet->add_addr_accept_max, rcv_addrs); 255 - WRITE_ONCE(pernet->subflows_max, subflows); 256 - 257 - unlock: 258 - spin_unlock_bh(&pernet->lock); 259 - return ret; 260 - } 261 - 262 - int mptcp_pm_nl_get_limits_doit(struct sk_buff *skb, struct genl_info *info) 263 - { 264 - struct pm_nl_pernet *pernet = genl_info_pm_nl(info); 265 - struct sk_buff *msg; 266 - void *reply; 267 - 268 - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 269 - if (!msg) 270 - return -ENOMEM; 271 - 272 - reply = genlmsg_put_reply(msg, info, &mptcp_genl_family, 0, 273 - MPTCP_PM_CMD_GET_LIMITS); 274 - if (!reply) 275 - goto fail; 276 - 277 - if (nla_put_u32(msg, MPTCP_PM_ATTR_RCV_ADD_ADDRS, 278 - READ_ONCE(pernet->add_addr_accept_max))) 279 - goto fail; 280 - 281 - if (nla_put_u32(msg, MPTCP_PM_ATTR_SUBFLOWS, 282 - READ_ONCE(pernet->subflows_max))) 283 - goto fail; 284 - 285 - genlmsg_end(msg, reply); 286 - return genlmsg_reply(msg, info); 287 - 288 - fail: 289 - GENL_SET_ERR_MSG(info, "not enough space in Netlink message"); 290 - nlmsg_free(msg); 291 - return -EMSGSIZE; 292 - } 293 - 294 - static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, 295 - struct mptcp_addr_info *addr) 296 - { 297 - struct mptcp_rm_list list = { .nr = 0 }; 298 - 299 - list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); 300 - 301 - spin_lock_bh(&msk->pm.lock); 302 - mptcp_pm_rm_subflow(msk, &list); 303 - __mark_subflow_endp_available(msk, list.ids[0]); 304 - mptcp_pm_create_subflow_or_signal_addr(msk); 305 - spin_unlock_bh(&msk->pm.lock); 306 - } 307 - 308 - static void mptcp_pm_nl_set_flags_all(struct net *net, 309 - struct mptcp_pm_addr_entry *local, 310 - u8 changed) 311 - { 312 - u8 is_subflow = !!(local->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW); 313 - u8 bkup = !!(local->flags & MPTCP_PM_ADDR_FLAG_BACKUP); 314 - long s_slot = 0, s_num = 0; 315 - struct mptcp_sock *msk; 316 - 317 - if (changed == MPTCP_PM_ADDR_FLAG_FULLMESH && !is_subflow) 318 - return; 319 - 320 - while ((msk = mptcp_token_iter_next(net, &s_slot, &s_num)) != NULL) { 321 - struct sock *sk = (struct sock *)msk; 322 - 323 - if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) 324 - goto next; 325 - 326 - lock_sock(sk); 327 - if (changed & MPTCP_PM_ADDR_FLAG_BACKUP) 328 - mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); 329 - /* Subflows will only be recreated if the SUBFLOW flag is set */ 330 - if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)) 331 - mptcp_pm_nl_fullmesh(msk, &local->addr); 332 - release_sock(sk); 333 - 334 - next: 335 - sock_put(sk); 336 - cond_resched(); 337 - } 338 - 339 - return; 340 - } 341 - 342 - int mptcp_pm_nl_set_flags(struct mptcp_pm_addr_entry *local, 343 - struct genl_info *info) 344 - { 345 - struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; 346 - u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP | 347 - MPTCP_PM_ADDR_FLAG_FULLMESH; 348 - struct net *net = genl_info_net(info); 349 - struct mptcp_pm_addr_entry *entry; 350 - struct pm_nl_pernet *pernet; 351 - u8 lookup_by_id = 0; 352 - 353 - pernet = pm_nl_get_pernet(net); 354 - 355 - if (local->addr.family == AF_UNSPEC) { 356 - lookup_by_id = 1; 357 - if (!local->addr.id) { 358 - NL_SET_ERR_MSG_ATTR(info->extack, attr, 359 - "missing address ID"); 360 - return -EOPNOTSUPP; 361 - } 362 - } 363 - 364 - spin_lock_bh(&pernet->lock); 365 - entry = lookup_by_id ? __lookup_addr_by_id(pernet, local->addr.id) : 366 - __lookup_addr(pernet, &local->addr); 367 - if (!entry) { 368 - spin_unlock_bh(&pernet->lock); 369 - NL_SET_ERR_MSG_ATTR(info->extack, attr, "address not found"); 370 - return -EINVAL; 371 - } 372 - if ((local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) && 373 - (entry->flags & (MPTCP_PM_ADDR_FLAG_SIGNAL | 374 - MPTCP_PM_ADDR_FLAG_IMPLICIT))) { 375 - spin_unlock_bh(&pernet->lock); 376 - NL_SET_ERR_MSG_ATTR(info->extack, attr, "invalid addr flags"); 377 - return -EINVAL; 378 - } 379 - 380 - changed = (local->flags ^ entry->flags) & mask; 381 - entry->flags = (entry->flags & ~mask) | (local->flags & mask); 382 - *local = *entry; 383 - spin_unlock_bh(&pernet->lock); 384 - 385 - mptcp_pm_nl_set_flags_all(net, local, changed); 386 - return 0; 387 1301 } 388 1302 389 1303 static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp) ··· 510 1864 .mcgrps = mptcp_pm_mcgrps, 511 1865 .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), 512 1866 }; 513 - 514 - static int __net_init pm_nl_init_net(struct net *net) 515 - { 516 - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 517 - 518 - INIT_LIST_HEAD_RCU(&pernet->local_addr_list); 519 - 520 - /* Cit. 2 subflows ought to be enough for anybody. */ 521 - pernet->subflows_max = 2; 522 - pernet->next_id = 1; 523 - pernet->stale_loss_cnt = 4; 524 - spin_lock_init(&pernet->lock); 525 - 526 - /* No need to initialize other pernet fields, the struct is zeroed at 527 - * allocation time. 528 - */ 529 - 530 - return 0; 531 - } 532 - 533 - static void __net_exit pm_nl_exit_net(struct list_head *net_list) 534 - { 535 - struct net *net; 536 - 537 - list_for_each_entry(net, net_list, exit_list) { 538 - struct pm_nl_pernet *pernet = pm_nl_get_pernet(net); 539 - 540 - /* net is removed from namespace list, can't race with 541 - * other modifiers, also netns core already waited for a 542 - * RCU grace period. 543 - */ 544 - __flush_addrs(&pernet->local_addr_list); 545 - } 546 - } 547 - 548 - static struct pernet_operations mptcp_pm_pernet_ops = { 549 - .init = pm_nl_init_net, 550 - .exit_batch = pm_nl_exit_net, 551 - .id = &pm_nl_pernet_id, 552 - .size = sizeof(struct pm_nl_pernet), 553 - }; 554 - 555 - void __init mptcp_pm_nl_init(void) 556 - { 557 - if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) 558 - panic("Failed to register MPTCP PM pernet subsystem.\n"); 559 - 560 - if (genl_register_family(&mptcp_genl_family)) 561 - panic("Failed to register MPTCP PM netlink family\n"); 562 - }