at v6.14 469 lines 12 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* Multipath TCP 3 * 4 * Copyright (c) 2019, Tessares SA. 5 */ 6 7#ifdef CONFIG_SYSCTL 8#include <linux/sysctl.h> 9#endif 10 11#include <net/net_namespace.h> 12#include <net/netns/generic.h> 13 14#include "protocol.h" 15#include "mib.h" 16 17#define MPTCP_SYSCTL_PATH "net/mptcp" 18 19static int mptcp_pernet_id; 20 21#ifdef CONFIG_SYSCTL 22static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX; 23#endif 24 25struct mptcp_pernet { 26#ifdef CONFIG_SYSCTL 27 struct ctl_table_header *ctl_table_hdr; 28#endif 29 30 unsigned int add_addr_timeout; 31 unsigned int blackhole_timeout; 32 unsigned int close_timeout; 33 unsigned int stale_loss_cnt; 34 atomic_t active_disable_times; 35 u8 syn_retrans_before_tcp_fallback; 36 unsigned long active_disable_stamp; 37 u8 mptcp_enabled; 38 u8 checksum_enabled; 39 u8 allow_join_initial_addr_port; 40 u8 pm_type; 41 char scheduler[MPTCP_SCHED_NAME_MAX]; 42}; 43 44static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) 45{ 46 return net_generic(net, mptcp_pernet_id); 47} 48 49int mptcp_is_enabled(const struct net *net) 50{ 51 return mptcp_get_pernet(net)->mptcp_enabled; 52} 53 54unsigned int mptcp_get_add_addr_timeout(const struct net *net) 55{ 56 return mptcp_get_pernet(net)->add_addr_timeout; 57} 58 59int mptcp_is_checksum_enabled(const struct net *net) 60{ 61 return mptcp_get_pernet(net)->checksum_enabled; 62} 63 64int mptcp_allow_join_id0(const struct net *net) 65{ 66 return mptcp_get_pernet(net)->allow_join_initial_addr_port; 67} 68 69unsigned int mptcp_stale_loss_cnt(const struct net *net) 70{ 71 return mptcp_get_pernet(net)->stale_loss_cnt; 72} 73 74unsigned int mptcp_close_timeout(const struct sock *sk) 75{ 76 if (sock_flag(sk, SOCK_DEAD)) 77 return TCP_TIMEWAIT_LEN; 78 return mptcp_get_pernet(sock_net(sk))->close_timeout; 79} 80 81int mptcp_get_pm_type(const struct net *net) 82{ 83 return mptcp_get_pernet(net)->pm_type; 84} 85 86const char *mptcp_get_scheduler(const struct net *net) 87{ 88 return mptcp_get_pernet(net)->scheduler; 89} 90 91static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) 92{ 93 pernet->mptcp_enabled = 1; 94 pernet->add_addr_timeout = TCP_RTO_MAX; 95 pernet->blackhole_timeout = 3600; 96 pernet->syn_retrans_before_tcp_fallback = 2; 97 atomic_set(&pernet->active_disable_times, 0); 98 pernet->close_timeout = TCP_TIMEWAIT_LEN; 99 pernet->checksum_enabled = 0; 100 pernet->allow_join_initial_addr_port = 1; 101 pernet->stale_loss_cnt = 4; 102 pernet->pm_type = MPTCP_PM_TYPE_KERNEL; 103 strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); 104} 105 106#ifdef CONFIG_SYSCTL 107static int mptcp_set_scheduler(char *scheduler, const char *name) 108{ 109 struct mptcp_sched_ops *sched; 110 int ret = 0; 111 112 rcu_read_lock(); 113 sched = mptcp_sched_find(name); 114 if (sched) 115 strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX); 116 else 117 ret = -ENOENT; 118 rcu_read_unlock(); 119 120 return ret; 121} 122 123static int proc_scheduler(const struct ctl_table *ctl, int write, 124 void *buffer, size_t *lenp, loff_t *ppos) 125{ 126 char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data; 127 char val[MPTCP_SCHED_NAME_MAX]; 128 struct ctl_table tbl = { 129 .data = val, 130 .maxlen = MPTCP_SCHED_NAME_MAX, 131 }; 132 int ret; 133 134 strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX); 135 136 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 137 if (write && ret == 0) 138 ret = mptcp_set_scheduler(*scheduler, val); 139 140 return ret; 141} 142 143static int proc_available_schedulers(const struct ctl_table *ctl, 144 int write, void *buffer, 145 size_t *lenp, loff_t *ppos) 146{ 147 struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, }; 148 int ret; 149 150 tbl.data = kmalloc(tbl.maxlen, GFP_USER); 151 if (!tbl.data) 152 return -ENOMEM; 153 154 mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX); 155 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 156 kfree(tbl.data); 157 158 return ret; 159} 160 161static int proc_blackhole_detect_timeout(const struct ctl_table *table, 162 int write, void *buffer, size_t *lenp, 163 loff_t *ppos) 164{ 165 struct mptcp_pernet *pernet = container_of(table->data, 166 struct mptcp_pernet, 167 blackhole_timeout); 168 int ret; 169 170 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 171 if (write && ret == 0) 172 atomic_set(&pernet->active_disable_times, 0); 173 174 return ret; 175} 176 177static struct ctl_table mptcp_sysctl_table[] = { 178 { 179 .procname = "enabled", 180 .maxlen = sizeof(u8), 181 .mode = 0644, 182 /* users with CAP_NET_ADMIN or root (not and) can change this 183 * value, same as other sysctl or the 'net' tree. 184 */ 185 .proc_handler = proc_dou8vec_minmax, 186 .extra1 = SYSCTL_ZERO, 187 .extra2 = SYSCTL_ONE 188 }, 189 { 190 .procname = "add_addr_timeout", 191 .maxlen = sizeof(unsigned int), 192 .mode = 0644, 193 .proc_handler = proc_dointvec_jiffies, 194 }, 195 { 196 .procname = "checksum_enabled", 197 .maxlen = sizeof(u8), 198 .mode = 0644, 199 .proc_handler = proc_dou8vec_minmax, 200 .extra1 = SYSCTL_ZERO, 201 .extra2 = SYSCTL_ONE 202 }, 203 { 204 .procname = "allow_join_initial_addr_port", 205 .maxlen = sizeof(u8), 206 .mode = 0644, 207 .proc_handler = proc_dou8vec_minmax, 208 .extra1 = SYSCTL_ZERO, 209 .extra2 = SYSCTL_ONE 210 }, 211 { 212 .procname = "stale_loss_cnt", 213 .maxlen = sizeof(unsigned int), 214 .mode = 0644, 215 .proc_handler = proc_douintvec_minmax, 216 }, 217 { 218 .procname = "pm_type", 219 .maxlen = sizeof(u8), 220 .mode = 0644, 221 .proc_handler = proc_dou8vec_minmax, 222 .extra1 = SYSCTL_ZERO, 223 .extra2 = &mptcp_pm_type_max 224 }, 225 { 226 .procname = "scheduler", 227 .maxlen = MPTCP_SCHED_NAME_MAX, 228 .mode = 0644, 229 .proc_handler = proc_scheduler, 230 }, 231 { 232 .procname = "available_schedulers", 233 .maxlen = MPTCP_SCHED_BUF_MAX, 234 .mode = 0444, 235 .proc_handler = proc_available_schedulers, 236 }, 237 { 238 .procname = "close_timeout", 239 .maxlen = sizeof(unsigned int), 240 .mode = 0644, 241 .proc_handler = proc_dointvec_jiffies, 242 }, 243 { 244 .procname = "blackhole_timeout", 245 .maxlen = sizeof(unsigned int), 246 .mode = 0644, 247 .proc_handler = proc_blackhole_detect_timeout, 248 .extra1 = SYSCTL_ZERO, 249 }, 250 { 251 .procname = "syn_retrans_before_tcp_fallback", 252 .maxlen = sizeof(u8), 253 .mode = 0644, 254 .proc_handler = proc_dou8vec_minmax, 255 }, 256}; 257 258static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 259{ 260 struct ctl_table_header *hdr; 261 struct ctl_table *table; 262 263 table = mptcp_sysctl_table; 264 if (!net_eq(net, &init_net)) { 265 table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL); 266 if (!table) 267 goto err_alloc; 268 } 269 270 table[0].data = &pernet->mptcp_enabled; 271 table[1].data = &pernet->add_addr_timeout; 272 table[2].data = &pernet->checksum_enabled; 273 table[3].data = &pernet->allow_join_initial_addr_port; 274 table[4].data = &pernet->stale_loss_cnt; 275 table[5].data = &pernet->pm_type; 276 table[6].data = &pernet->scheduler; 277 /* table[7] is for available_schedulers which is read-only info */ 278 table[8].data = &pernet->close_timeout; 279 table[9].data = &pernet->blackhole_timeout; 280 table[10].data = &pernet->syn_retrans_before_tcp_fallback; 281 282 hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, 283 ARRAY_SIZE(mptcp_sysctl_table)); 284 if (!hdr) 285 goto err_reg; 286 287 pernet->ctl_table_hdr = hdr; 288 289 return 0; 290 291err_reg: 292 if (!net_eq(net, &init_net)) 293 kfree(table); 294err_alloc: 295 return -ENOMEM; 296} 297 298static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) 299{ 300 const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg; 301 302 unregister_net_sysctl_table(pernet->ctl_table_hdr); 303 304 kfree(table); 305} 306 307#else 308 309static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) 310{ 311 return 0; 312} 313 314static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {} 315 316#endif /* CONFIG_SYSCTL */ 317 318/* The following code block is to deal with middle box issues with MPTCP, 319 * similar to what is done with TFO. 320 * The proposed solution is to disable active MPTCP globally when SYN+MPC are 321 * dropped, while SYN without MPC aren't. In this case, active side MPTCP is 322 * disabled globally for 1hr at first. Then if it happens again, it is disabled 323 * for 2h, then 4h, 8h, ... 324 * The timeout is reset back to 1hr when a successful active MPTCP connection is 325 * fully established. 326 */ 327 328/* Disable active MPTCP and record current jiffies and active_disable_times */ 329void mptcp_active_disable(struct sock *sk) 330{ 331 struct net *net = sock_net(sk); 332 struct mptcp_pernet *pernet; 333 334 pernet = mptcp_get_pernet(net); 335 336 if (!READ_ONCE(pernet->blackhole_timeout)) 337 return; 338 339 /* Paired with READ_ONCE() in mptcp_active_should_disable() */ 340 WRITE_ONCE(pernet->active_disable_stamp, jiffies); 341 342 /* Paired with smp_rmb() in mptcp_active_should_disable(). 343 * We want pernet->active_disable_stamp to be updated first. 344 */ 345 smp_mb__before_atomic(); 346 atomic_inc(&pernet->active_disable_times); 347 348 MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE); 349} 350 351/* Calculate timeout for MPTCP active disable 352 * Return true if we are still in the active MPTCP disable period 353 * Return false if timeout already expired and we should use active MPTCP 354 */ 355bool mptcp_active_should_disable(struct sock *ssk) 356{ 357 struct net *net = sock_net(ssk); 358 unsigned int blackhole_timeout; 359 struct mptcp_pernet *pernet; 360 unsigned long timeout; 361 int disable_times; 362 int multiplier; 363 364 pernet = mptcp_get_pernet(net); 365 blackhole_timeout = READ_ONCE(pernet->blackhole_timeout); 366 367 if (!blackhole_timeout) 368 return false; 369 370 disable_times = atomic_read(&pernet->active_disable_times); 371 if (!disable_times) 372 return false; 373 374 /* Paired with smp_mb__before_atomic() in mptcp_active_disable() */ 375 smp_rmb(); 376 377 /* Limit timeout to max: 2^6 * initial timeout */ 378 multiplier = 1 << min(disable_times - 1, 6); 379 380 /* Paired with the WRITE_ONCE() in mptcp_active_disable(). */ 381 timeout = READ_ONCE(pernet->active_disable_stamp) + 382 multiplier * blackhole_timeout * HZ; 383 384 return time_before(jiffies, timeout); 385} 386 387/* Enable active MPTCP and reset active_disable_times if needed */ 388void mptcp_active_enable(struct sock *sk) 389{ 390 struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk)); 391 392 if (atomic_read(&pernet->active_disable_times)) { 393 struct dst_entry *dst = sk_dst_get(sk); 394 395 if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)) 396 atomic_set(&pernet->active_disable_times, 0); 397 } 398} 399 400/* Check the number of retransmissions, and fallback to TCP if needed */ 401void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) 402{ 403 struct mptcp_subflow_context *subflow; 404 405 if (!sk_is_mptcp(ssk)) 406 return; 407 408 subflow = mptcp_subflow_ctx(ssk); 409 410 if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) { 411 struct net *net = sock_net(ssk); 412 u8 timeouts, to_max; 413 414 timeouts = inet_csk(ssk)->icsk_retransmits; 415 to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback; 416 417 if (timeouts == to_max || (timeouts < to_max && expired)) { 418 MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); 419 subflow->mpc_drop = 1; 420 mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); 421 } 422 } else if (ssk->sk_state == TCP_SYN_SENT) { 423 subflow->mpc_drop = 0; 424 } 425} 426 427static int __net_init mptcp_net_init(struct net *net) 428{ 429 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 430 431 mptcp_pernet_set_defaults(pernet); 432 433 return mptcp_pernet_new_table(net, pernet); 434} 435 436/* Note: the callback will only be called per extra netns */ 437static void __net_exit mptcp_net_exit(struct net *net) 438{ 439 struct mptcp_pernet *pernet = mptcp_get_pernet(net); 440 441 mptcp_pernet_del_table(pernet); 442} 443 444static struct pernet_operations mptcp_pernet_ops = { 445 .init = mptcp_net_init, 446 .exit = mptcp_net_exit, 447 .id = &mptcp_pernet_id, 448 .size = sizeof(struct mptcp_pernet), 449}; 450 451void __init mptcp_init(void) 452{ 453 mptcp_join_cookie_init(); 454 mptcp_proto_init(); 455 456 if (register_pernet_subsys(&mptcp_pernet_ops) < 0) 457 panic("Failed to register MPTCP pernet subsystem.\n"); 458} 459 460#if IS_ENABLED(CONFIG_MPTCP_IPV6) 461int __init mptcpv6_init(void) 462{ 463 int err; 464 465 err = mptcp_proto_v6_init(); 466 467 return err; 468} 469#endif