at v6.12-rc4 777 lines 19 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9#include <linux/filter.h> 10#include <linux/mm.h> 11#include <linux/sysctl.h> 12#include <linux/module.h> 13#include <linux/socket.h> 14#include <linux/netdevice.h> 15#include <linux/ratelimit.h> 16#include <linux/vmalloc.h> 17#include <linux/init.h> 18#include <linux/slab.h> 19#include <linux/sched/isolation.h> 20 21#include <net/ip.h> 22#include <net/sock.h> 23#include <net/net_ratelimit.h> 24#include <net/busy_poll.h> 25#include <net/pkt_sched.h> 26#include <net/hotdata.h> 27#include <net/proto_memory.h> 28#include <net/rps.h> 29 30#include "dev.h" 31 32static int int_3600 = 3600; 33static int min_sndbuf = SOCK_MIN_SNDBUF; 34static int min_rcvbuf = SOCK_MIN_RCVBUF; 35static int max_skb_frags = MAX_SKB_FRAGS; 36static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 37 38static int net_msg_warn; /* Unused, but still a sysctl */ 39 40int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 41EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 42 43/* 0 - Keep current behavior: 44 * IPv4: inherit all current settings from init_net 45 * IPv6: reset all settings to default 46 * 1 - Both inherit all current settings from init_net 47 * 2 - Both reset all settings to default 48 * 3 - Both inherit all settings from current netns 49 */ 50int sysctl_devconf_inherit_init_net __read_mostly; 51EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 52 53#if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 54static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 55 struct cpumask *mask) 56{ 57 char kbuf[128]; 58 int len; 59 60 if (*ppos || !*lenp) { 61 *lenp = 0; 62 return; 63 } 64 65 len = min(sizeof(kbuf) - 1, *lenp); 66 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 67 if (!len) { 68 *lenp = 0; 69 return; 70 } 71 72 if (len < *lenp) 73 kbuf[len++] = '\n'; 74 memcpy(buffer, kbuf, len); 75 *lenp = len; 76 *ppos += len; 77} 78#endif 79 80#ifdef CONFIG_RPS 81 82static struct cpumask *rps_default_mask_cow_alloc(struct net *net) 83{ 84 struct cpumask *rps_default_mask; 85 86 if (net->core.rps_default_mask) 87 return net->core.rps_default_mask; 88 89 rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); 90 if (!rps_default_mask) 91 return NULL; 92 93 /* pairs with READ_ONCE in rx_queue_default_mask() */ 94 WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); 95 return rps_default_mask; 96} 97 98static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 99 void *buffer, size_t *lenp, loff_t *ppos) 100{ 101 struct net *net = (struct net *)table->data; 102 int err = 0; 103 104 rtnl_lock(); 105 if (write) { 106 struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); 107 108 err = -ENOMEM; 109 if (!rps_default_mask) 110 goto done; 111 112 err = cpumask_parse(buffer, rps_default_mask); 113 if (err) 114 goto done; 115 116 err = rps_cpumask_housekeeping(rps_default_mask); 117 if (err) 118 goto done; 119 } else { 120 dump_cpumask(buffer, lenp, ppos, 121 net->core.rps_default_mask ? : cpu_none_mask); 122 } 123 124done: 125 rtnl_unlock(); 126 return err; 127} 128 129static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 130 void *buffer, size_t *lenp, loff_t *ppos) 131{ 132 unsigned int orig_size, size; 133 int ret, i; 134 struct ctl_table tmp = { 135 .data = &size, 136 .maxlen = sizeof(size), 137 .mode = table->mode 138 }; 139 struct rps_sock_flow_table *orig_sock_table, *sock_table; 140 static DEFINE_MUTEX(sock_flow_mutex); 141 142 mutex_lock(&sock_flow_mutex); 143 144 orig_sock_table = rcu_dereference_protected( 145 net_hotdata.rps_sock_flow_table, 146 lockdep_is_held(&sock_flow_mutex)); 147 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 148 149 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 150 151 if (write) { 152 if (size) { 153 if (size > 1<<29) { 154 /* Enforce limit to prevent overflow */ 155 mutex_unlock(&sock_flow_mutex); 156 return -EINVAL; 157 } 158 size = roundup_pow_of_two(size); 159 if (size != orig_size) { 160 sock_table = 161 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 162 if (!sock_table) { 163 mutex_unlock(&sock_flow_mutex); 164 return -ENOMEM; 165 } 166 net_hotdata.rps_cpu_mask = 167 roundup_pow_of_two(nr_cpu_ids) - 1; 168 sock_table->mask = size - 1; 169 } else 170 sock_table = orig_sock_table; 171 172 for (i = 0; i < size; i++) 173 sock_table->ents[i] = RPS_NO_CPU; 174 } else 175 sock_table = NULL; 176 177 if (sock_table != orig_sock_table) { 178 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 179 sock_table); 180 if (sock_table) { 181 static_branch_inc(&rps_needed); 182 static_branch_inc(&rfs_needed); 183 } 184 if (orig_sock_table) { 185 static_branch_dec(&rps_needed); 186 static_branch_dec(&rfs_needed); 187 kvfree_rcu_mightsleep(orig_sock_table); 188 } 189 } 190 } 191 192 mutex_unlock(&sock_flow_mutex); 193 194 return ret; 195} 196#endif /* CONFIG_RPS */ 197 198#ifdef CONFIG_NET_FLOW_LIMIT 199static DEFINE_MUTEX(flow_limit_update_mutex); 200 201static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 202 void *buffer, size_t *lenp, loff_t *ppos) 203{ 204 struct sd_flow_limit *cur; 205 struct softnet_data *sd; 206 cpumask_var_t mask; 207 int i, len, ret = 0; 208 209 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 210 return -ENOMEM; 211 212 if (write) { 213 ret = cpumask_parse(buffer, mask); 214 if (ret) 215 goto done; 216 217 mutex_lock(&flow_limit_update_mutex); 218 len = sizeof(*cur) + netdev_flow_limit_table_len; 219 for_each_possible_cpu(i) { 220 sd = &per_cpu(softnet_data, i); 221 cur = rcu_dereference_protected(sd->flow_limit, 222 lockdep_is_held(&flow_limit_update_mutex)); 223 if (cur && !cpumask_test_cpu(i, mask)) { 224 RCU_INIT_POINTER(sd->flow_limit, NULL); 225 kfree_rcu_mightsleep(cur); 226 } else if (!cur && cpumask_test_cpu(i, mask)) { 227 cur = kzalloc_node(len, GFP_KERNEL, 228 cpu_to_node(i)); 229 if (!cur) { 230 /* not unwinding previous changes */ 231 ret = -ENOMEM; 232 goto write_unlock; 233 } 234 cur->num_buckets = netdev_flow_limit_table_len; 235 rcu_assign_pointer(sd->flow_limit, cur); 236 } 237 } 238write_unlock: 239 mutex_unlock(&flow_limit_update_mutex); 240 } else { 241 cpumask_clear(mask); 242 rcu_read_lock(); 243 for_each_possible_cpu(i) { 244 sd = &per_cpu(softnet_data, i); 245 if (rcu_dereference(sd->flow_limit)) 246 cpumask_set_cpu(i, mask); 247 } 248 rcu_read_unlock(); 249 250 dump_cpumask(buffer, lenp, ppos, mask); 251 } 252 253done: 254 free_cpumask_var(mask); 255 return ret; 256} 257 258static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 259 void *buffer, size_t *lenp, loff_t *ppos) 260{ 261 unsigned int old, *ptr; 262 int ret; 263 264 mutex_lock(&flow_limit_update_mutex); 265 266 ptr = table->data; 267 old = *ptr; 268 ret = proc_dointvec(table, write, buffer, lenp, ppos); 269 if (!ret && write && !is_power_of_2(*ptr)) { 270 *ptr = old; 271 ret = -EINVAL; 272 } 273 274 mutex_unlock(&flow_limit_update_mutex); 275 return ret; 276} 277#endif /* CONFIG_NET_FLOW_LIMIT */ 278 279#ifdef CONFIG_NET_SCHED 280static int set_default_qdisc(const struct ctl_table *table, int write, 281 void *buffer, size_t *lenp, loff_t *ppos) 282{ 283 char id[IFNAMSIZ]; 284 struct ctl_table tbl = { 285 .data = id, 286 .maxlen = IFNAMSIZ, 287 }; 288 int ret; 289 290 qdisc_get_default(id, IFNAMSIZ); 291 292 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 293 if (write && ret == 0) 294 ret = qdisc_set_default(id); 295 return ret; 296} 297#endif 298 299static int proc_do_dev_weight(const struct ctl_table *table, int write, 300 void *buffer, size_t *lenp, loff_t *ppos) 301{ 302 static DEFINE_MUTEX(dev_weight_mutex); 303 int ret, weight; 304 305 mutex_lock(&dev_weight_mutex); 306 ret = proc_dointvec(table, write, buffer, lenp, ppos); 307 if (!ret && write) { 308 weight = READ_ONCE(weight_p); 309 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 310 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 311 } 312 mutex_unlock(&dev_weight_mutex); 313 314 return ret; 315} 316 317static int proc_do_rss_key(const struct ctl_table *table, int write, 318 void *buffer, size_t *lenp, loff_t *ppos) 319{ 320 struct ctl_table fake_table; 321 char buf[NETDEV_RSS_KEY_LEN * 3]; 322 323 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 324 fake_table.data = buf; 325 fake_table.maxlen = sizeof(buf); 326 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 327} 328 329#ifdef CONFIG_BPF_JIT 330static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 331 void *buffer, size_t *lenp, 332 loff_t *ppos) 333{ 334 int ret, jit_enable = *(int *)table->data; 335 int min = *(int *)table->extra1; 336 int max = *(int *)table->extra2; 337 struct ctl_table tmp = *table; 338 339 if (write && !capable(CAP_SYS_ADMIN)) 340 return -EPERM; 341 342 tmp.data = &jit_enable; 343 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 344 if (write && !ret) { 345 if (jit_enable < 2 || 346 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 347 *(int *)table->data = jit_enable; 348 if (jit_enable == 2) 349 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 350 } else { 351 ret = -EPERM; 352 } 353 } 354 355 if (write && ret && min == max) 356 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 357 358 return ret; 359} 360 361# ifdef CONFIG_HAVE_EBPF_JIT 362static int 363proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 364 void *buffer, size_t *lenp, loff_t *ppos) 365{ 366 if (!capable(CAP_SYS_ADMIN)) 367 return -EPERM; 368 369 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 370} 371# endif /* CONFIG_HAVE_EBPF_JIT */ 372 373static int 374proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 375 void *buffer, size_t *lenp, loff_t *ppos) 376{ 377 if (!capable(CAP_SYS_ADMIN)) 378 return -EPERM; 379 380 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 381} 382#endif 383 384static struct ctl_table net_core_table[] = { 385 { 386 .procname = "mem_pcpu_rsv", 387 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 388 .maxlen = sizeof(int), 389 .mode = 0644, 390 .proc_handler = proc_dointvec_minmax, 391 .extra1 = &min_mem_pcpu_rsv, 392 }, 393 { 394 .procname = "dev_weight", 395 .data = &weight_p, 396 .maxlen = sizeof(int), 397 .mode = 0644, 398 .proc_handler = proc_do_dev_weight, 399 }, 400 { 401 .procname = "dev_weight_rx_bias", 402 .data = &dev_weight_rx_bias, 403 .maxlen = sizeof(int), 404 .mode = 0644, 405 .proc_handler = proc_do_dev_weight, 406 }, 407 { 408 .procname = "dev_weight_tx_bias", 409 .data = &dev_weight_tx_bias, 410 .maxlen = sizeof(int), 411 .mode = 0644, 412 .proc_handler = proc_do_dev_weight, 413 }, 414 { 415 .procname = "netdev_max_backlog", 416 .data = &net_hotdata.max_backlog, 417 .maxlen = sizeof(int), 418 .mode = 0644, 419 .proc_handler = proc_dointvec 420 }, 421 { 422 .procname = "netdev_rss_key", 423 .data = &netdev_rss_key, 424 .maxlen = sizeof(int), 425 .mode = 0444, 426 .proc_handler = proc_do_rss_key, 427 }, 428#ifdef CONFIG_BPF_JIT 429 { 430 .procname = "bpf_jit_enable", 431 .data = &bpf_jit_enable, 432 .maxlen = sizeof(int), 433 .mode = 0644, 434 .proc_handler = proc_dointvec_minmax_bpf_enable, 435# ifdef CONFIG_BPF_JIT_ALWAYS_ON 436 .extra1 = SYSCTL_ONE, 437 .extra2 = SYSCTL_ONE, 438# else 439 .extra1 = SYSCTL_ZERO, 440 .extra2 = SYSCTL_TWO, 441# endif 442 }, 443# ifdef CONFIG_HAVE_EBPF_JIT 444 { 445 .procname = "bpf_jit_harden", 446 .data = &bpf_jit_harden, 447 .maxlen = sizeof(int), 448 .mode = 0600, 449 .proc_handler = proc_dointvec_minmax_bpf_restricted, 450 .extra1 = SYSCTL_ZERO, 451 .extra2 = SYSCTL_TWO, 452 }, 453 { 454 .procname = "bpf_jit_kallsyms", 455 .data = &bpf_jit_kallsyms, 456 .maxlen = sizeof(int), 457 .mode = 0600, 458 .proc_handler = proc_dointvec_minmax_bpf_restricted, 459 .extra1 = SYSCTL_ZERO, 460 .extra2 = SYSCTL_ONE, 461 }, 462# endif 463 { 464 .procname = "bpf_jit_limit", 465 .data = &bpf_jit_limit, 466 .maxlen = sizeof(long), 467 .mode = 0600, 468 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 469 .extra1 = SYSCTL_LONG_ONE, 470 .extra2 = &bpf_jit_limit_max, 471 }, 472#endif 473 { 474 .procname = "netdev_tstamp_prequeue", 475 .data = &net_hotdata.tstamp_prequeue, 476 .maxlen = sizeof(int), 477 .mode = 0644, 478 .proc_handler = proc_dointvec 479 }, 480 { 481 .procname = "message_cost", 482 .data = &net_ratelimit_state.interval, 483 .maxlen = sizeof(int), 484 .mode = 0644, 485 .proc_handler = proc_dointvec_jiffies, 486 }, 487 { 488 .procname = "message_burst", 489 .data = &net_ratelimit_state.burst, 490 .maxlen = sizeof(int), 491 .mode = 0644, 492 .proc_handler = proc_dointvec, 493 }, 494 { 495 .procname = "tstamp_allow_data", 496 .data = &sysctl_tstamp_allow_data, 497 .maxlen = sizeof(int), 498 .mode = 0644, 499 .proc_handler = proc_dointvec_minmax, 500 .extra1 = SYSCTL_ZERO, 501 .extra2 = SYSCTL_ONE 502 }, 503#ifdef CONFIG_RPS 504 { 505 .procname = "rps_sock_flow_entries", 506 .maxlen = sizeof(int), 507 .mode = 0644, 508 .proc_handler = rps_sock_flow_sysctl 509 }, 510#endif 511#ifdef CONFIG_NET_FLOW_LIMIT 512 { 513 .procname = "flow_limit_cpu_bitmap", 514 .mode = 0644, 515 .proc_handler = flow_limit_cpu_sysctl 516 }, 517 { 518 .procname = "flow_limit_table_len", 519 .data = &netdev_flow_limit_table_len, 520 .maxlen = sizeof(int), 521 .mode = 0644, 522 .proc_handler = flow_limit_table_len_sysctl 523 }, 524#endif /* CONFIG_NET_FLOW_LIMIT */ 525#ifdef CONFIG_NET_RX_BUSY_POLL 526 { 527 .procname = "busy_poll", 528 .data = &sysctl_net_busy_poll, 529 .maxlen = sizeof(unsigned int), 530 .mode = 0644, 531 .proc_handler = proc_dointvec_minmax, 532 .extra1 = SYSCTL_ZERO, 533 }, 534 { 535 .procname = "busy_read", 536 .data = &sysctl_net_busy_read, 537 .maxlen = sizeof(unsigned int), 538 .mode = 0644, 539 .proc_handler = proc_dointvec_minmax, 540 .extra1 = SYSCTL_ZERO, 541 }, 542#endif 543#ifdef CONFIG_NET_SCHED 544 { 545 .procname = "default_qdisc", 546 .mode = 0644, 547 .maxlen = IFNAMSIZ, 548 .proc_handler = set_default_qdisc 549 }, 550#endif 551 { 552 .procname = "netdev_budget", 553 .data = &net_hotdata.netdev_budget, 554 .maxlen = sizeof(int), 555 .mode = 0644, 556 .proc_handler = proc_dointvec 557 }, 558 { 559 .procname = "warnings", 560 .data = &net_msg_warn, 561 .maxlen = sizeof(int), 562 .mode = 0644, 563 .proc_handler = proc_dointvec 564 }, 565 { 566 .procname = "max_skb_frags", 567 .data = &net_hotdata.sysctl_max_skb_frags, 568 .maxlen = sizeof(int), 569 .mode = 0644, 570 .proc_handler = proc_dointvec_minmax, 571 .extra1 = SYSCTL_ONE, 572 .extra2 = &max_skb_frags, 573 }, 574 { 575 .procname = "netdev_budget_usecs", 576 .data = &net_hotdata.netdev_budget_usecs, 577 .maxlen = sizeof(unsigned int), 578 .mode = 0644, 579 .proc_handler = proc_dointvec_minmax, 580 .extra1 = SYSCTL_ZERO, 581 }, 582 { 583 .procname = "fb_tunnels_only_for_init_net", 584 .data = &sysctl_fb_tunnels_only_for_init_net, 585 .maxlen = sizeof(int), 586 .mode = 0644, 587 .proc_handler = proc_dointvec_minmax, 588 .extra1 = SYSCTL_ZERO, 589 .extra2 = SYSCTL_TWO, 590 }, 591 { 592 .procname = "devconf_inherit_init_net", 593 .data = &sysctl_devconf_inherit_init_net, 594 .maxlen = sizeof(int), 595 .mode = 0644, 596 .proc_handler = proc_dointvec_minmax, 597 .extra1 = SYSCTL_ZERO, 598 .extra2 = SYSCTL_THREE, 599 }, 600 { 601 .procname = "high_order_alloc_disable", 602 .data = &net_high_order_alloc_disable_key.key, 603 .maxlen = sizeof(net_high_order_alloc_disable_key), 604 .mode = 0644, 605 .proc_handler = proc_do_static_key, 606 }, 607 { 608 .procname = "gro_normal_batch", 609 .data = &net_hotdata.gro_normal_batch, 610 .maxlen = sizeof(unsigned int), 611 .mode = 0644, 612 .proc_handler = proc_dointvec_minmax, 613 .extra1 = SYSCTL_ONE, 614 }, 615 { 616 .procname = "netdev_unregister_timeout_secs", 617 .data = &netdev_unregister_timeout_secs, 618 .maxlen = sizeof(unsigned int), 619 .mode = 0644, 620 .proc_handler = proc_dointvec_minmax, 621 .extra1 = SYSCTL_ONE, 622 .extra2 = &int_3600, 623 }, 624 { 625 .procname = "skb_defer_max", 626 .data = &net_hotdata.sysctl_skb_defer_max, 627 .maxlen = sizeof(unsigned int), 628 .mode = 0644, 629 .proc_handler = proc_dointvec_minmax, 630 .extra1 = SYSCTL_ZERO, 631 }, 632}; 633 634static struct ctl_table netns_core_table[] = { 635#if IS_ENABLED(CONFIG_RPS) 636 { 637 .procname = "rps_default_mask", 638 .data = &init_net, 639 .mode = 0644, 640 .proc_handler = rps_default_mask_sysctl 641 }, 642#endif 643 { 644 .procname = "somaxconn", 645 .data = &init_net.core.sysctl_somaxconn, 646 .maxlen = sizeof(int), 647 .mode = 0644, 648 .extra1 = SYSCTL_ZERO, 649 .proc_handler = proc_dointvec_minmax 650 }, 651 { 652 .procname = "optmem_max", 653 .data = &init_net.core.sysctl_optmem_max, 654 .maxlen = sizeof(int), 655 .mode = 0644, 656 .extra1 = SYSCTL_ZERO, 657 .proc_handler = proc_dointvec_minmax 658 }, 659 { 660 .procname = "txrehash", 661 .data = &init_net.core.sysctl_txrehash, 662 .maxlen = sizeof(u8), 663 .mode = 0644, 664 .extra1 = SYSCTL_ZERO, 665 .extra2 = SYSCTL_ONE, 666 .proc_handler = proc_dou8vec_minmax, 667 }, 668 /* sysctl_core_net_init() will set the values after this 669 * to readonly in network namespaces 670 */ 671 { 672 .procname = "wmem_max", 673 .data = &sysctl_wmem_max, 674 .maxlen = sizeof(int), 675 .mode = 0644, 676 .proc_handler = proc_dointvec_minmax, 677 .extra1 = &min_sndbuf, 678 }, 679 { 680 .procname = "rmem_max", 681 .data = &sysctl_rmem_max, 682 .maxlen = sizeof(int), 683 .mode = 0644, 684 .proc_handler = proc_dointvec_minmax, 685 .extra1 = &min_rcvbuf, 686 }, 687 { 688 .procname = "wmem_default", 689 .data = &sysctl_wmem_default, 690 .maxlen = sizeof(int), 691 .mode = 0644, 692 .proc_handler = proc_dointvec_minmax, 693 .extra1 = &min_sndbuf, 694 }, 695 { 696 .procname = "rmem_default", 697 .data = &sysctl_rmem_default, 698 .maxlen = sizeof(int), 699 .mode = 0644, 700 .proc_handler = proc_dointvec_minmax, 701 .extra1 = &min_rcvbuf, 702 }, 703}; 704 705static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 706{ 707 /* fallback tunnels for initns only */ 708 if (!strncmp(str, "initns", 6)) 709 sysctl_fb_tunnels_only_for_init_net = 1; 710 /* no fallback tunnels anywhere */ 711 else if (!strncmp(str, "none", 4)) 712 sysctl_fb_tunnels_only_for_init_net = 2; 713 714 return 1; 715} 716__setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 717 718static __net_init int sysctl_core_net_init(struct net *net) 719{ 720 size_t table_size = ARRAY_SIZE(netns_core_table); 721 struct ctl_table *tbl; 722 723 tbl = netns_core_table; 724 if (!net_eq(net, &init_net)) { 725 int i; 726 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 727 if (tbl == NULL) 728 goto err_dup; 729 730 for (i = 0; i < table_size; ++i) { 731 if (tbl[i].data == &sysctl_wmem_max) 732 break; 733 734 tbl[i].data += (char *)net - (char *)&init_net; 735 } 736 for (; i < table_size; ++i) 737 tbl[i].mode &= ~0222; 738 } 739 740 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 741 if (net->core.sysctl_hdr == NULL) 742 goto err_reg; 743 744 return 0; 745 746err_reg: 747 if (tbl != netns_core_table) 748 kfree(tbl); 749err_dup: 750 return -ENOMEM; 751} 752 753static __net_exit void sysctl_core_net_exit(struct net *net) 754{ 755 const struct ctl_table *tbl; 756 757 tbl = net->core.sysctl_hdr->ctl_table_arg; 758 unregister_net_sysctl_table(net->core.sysctl_hdr); 759 BUG_ON(tbl == netns_core_table); 760#if IS_ENABLED(CONFIG_RPS) 761 kfree(net->core.rps_default_mask); 762#endif 763 kfree(tbl); 764} 765 766static __net_initdata struct pernet_operations sysctl_core_ops = { 767 .init = sysctl_core_net_init, 768 .exit = sysctl_core_net_exit, 769}; 770 771static __init int sysctl_core_init(void) 772{ 773 register_net_sysctl(&init_net, "net/core", net_core_table); 774 return register_pernet_subsys(&sysctl_core_ops); 775} 776 777fs_initcall(sysctl_core_init);