at v6.19-rc8 811 lines 20 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* -*- linux-c -*- 3 * sysctl_net_core.c: sysctl interface to net core subsystem. 4 * 5 * Begun April 1, 1996, Mike Shaver. 6 * Added /proc/sys/net/core directory entry (empty =) ). [MS] 7 */ 8 9#include <linux/filter.h> 10#include <linux/mm.h> 11#include <linux/sysctl.h> 12#include <linux/module.h> 13#include <linux/socket.h> 14#include <linux/netdevice.h> 15#include <linux/ratelimit.h> 16#include <linux/vmalloc.h> 17#include <linux/init.h> 18#include <linux/slab.h> 19#include <linux/sched/isolation.h> 20 21#include <net/ip.h> 22#include <net/sock.h> 23#include <net/net_ratelimit.h> 24#include <net/busy_poll.h> 25#include <net/pkt_sched.h> 26#include <net/hotdata.h> 27#include <net/proto_memory.h> 28#include <net/rps.h> 29 30#include "dev.h" 31#include "net-sysfs.h" 32 33static int int_3600 = 3600; 34static int min_sndbuf = SOCK_MIN_SNDBUF; 35static int min_rcvbuf = SOCK_MIN_RCVBUF; 36static int max_skb_frags = MAX_SKB_FRAGS; 37static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; 38static int netdev_budget_usecs_min = 2 * USEC_PER_SEC / HZ; 39 40static int net_msg_warn; /* Unused, but still a sysctl */ 41 42int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; 43EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); 44 45/* 0 - Keep current behavior: 46 * IPv4: inherit all current settings from init_net 47 * IPv6: reset all settings to default 48 * 1 - Both inherit all current settings from init_net 49 * 2 - Both reset all settings to default 50 * 3 - Both inherit all settings from current netns 51 */ 52int sysctl_devconf_inherit_init_net __read_mostly; 53EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); 54 55#if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) 56static int dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, 57 struct cpumask *mask) 58{ 59 char *kbuf; 60 int len; 61 62 if (*ppos || !*lenp) { 63 *lenp = 0; 64 return 0; 65 } 66 67 /* CPUs are displayed as a hex bitmap + a comma between each groups of 8 68 * nibbles (except the last one which has a newline instead). 69 * Guesstimate the buffer size at the group granularity level. 70 */ 71 len = min(DIV_ROUND_UP(nr_cpumask_bits, 32) * (8 + 1), *lenp); 72 kbuf = kmalloc(len, GFP_KERNEL); 73 if (!kbuf) { 74 *lenp = 0; 75 return -ENOMEM; 76 } 77 78 len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); 79 if (!len) { 80 *lenp = 0; 81 goto free_buf; 82 } 83 84 /* scnprintf writes a trailing null char not counted in the returned 85 * length, override it with a newline. 86 */ 87 kbuf[len++] = '\n'; 88 memcpy(buffer, kbuf, len); 89 *lenp = len; 90 *ppos += len; 91 92free_buf: 93 kfree(kbuf); 94 return 0; 95} 96#endif 97 98#ifdef CONFIG_RPS 99 100DEFINE_MUTEX(rps_default_mask_mutex); 101 102static int rps_default_mask_sysctl(const struct ctl_table *table, int write, 103 void *buffer, size_t *lenp, loff_t *ppos) 104{ 105 struct net *net = (struct net *)table->data; 106 struct cpumask *mask; 107 int err = 0; 108 109 mutex_lock(&rps_default_mask_mutex); 110 mask = net->core.rps_default_mask; 111 if (write) { 112 if (!mask) { 113 mask = kzalloc(cpumask_size(), GFP_KERNEL); 114 net->core.rps_default_mask = mask; 115 } 116 err = -ENOMEM; 117 if (!mask) 118 goto done; 119 120 err = cpumask_parse(buffer, mask); 121 if (err) 122 goto done; 123 124 err = rps_cpumask_housekeeping(mask); 125 if (err) 126 goto done; 127 } else { 128 err = dump_cpumask(buffer, lenp, ppos, 129 mask ?: cpu_none_mask); 130 } 131 132done: 133 mutex_unlock(&rps_default_mask_mutex); 134 return err; 135} 136 137static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, 138 void *buffer, size_t *lenp, loff_t *ppos) 139{ 140 unsigned int orig_size, size; 141 int ret, i; 142 struct ctl_table tmp = { 143 .data = &size, 144 .maxlen = sizeof(size), 145 .mode = table->mode 146 }; 147 struct rps_sock_flow_table *orig_sock_table, *sock_table; 148 static DEFINE_MUTEX(sock_flow_mutex); 149 150 mutex_lock(&sock_flow_mutex); 151 152 orig_sock_table = rcu_dereference_protected( 153 net_hotdata.rps_sock_flow_table, 154 lockdep_is_held(&sock_flow_mutex)); 155 size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; 156 157 ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); 158 159 if (write) { 160 if (size) { 161 if (size > 1<<29) { 162 /* Enforce limit to prevent overflow */ 163 mutex_unlock(&sock_flow_mutex); 164 return -EINVAL; 165 } 166 size = roundup_pow_of_two(size); 167 if (size != orig_size) { 168 sock_table = 169 vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); 170 if (!sock_table) { 171 mutex_unlock(&sock_flow_mutex); 172 return -ENOMEM; 173 } 174 net_hotdata.rps_cpu_mask = 175 roundup_pow_of_two(nr_cpu_ids) - 1; 176 sock_table->mask = size - 1; 177 } else 178 sock_table = orig_sock_table; 179 180 for (i = 0; i < size; i++) 181 sock_table->ents[i] = RPS_NO_CPU; 182 } else 183 sock_table = NULL; 184 185 if (sock_table != orig_sock_table) { 186 rcu_assign_pointer(net_hotdata.rps_sock_flow_table, 187 sock_table); 188 if (sock_table) { 189 static_branch_inc(&rps_needed); 190 static_branch_inc(&rfs_needed); 191 } 192 if (orig_sock_table) { 193 static_branch_dec(&rps_needed); 194 static_branch_dec(&rfs_needed); 195 kvfree_rcu(orig_sock_table, rcu); 196 } 197 } 198 } 199 200 mutex_unlock(&sock_flow_mutex); 201 202 return ret; 203} 204#endif /* CONFIG_RPS */ 205 206#ifdef CONFIG_NET_FLOW_LIMIT 207static DEFINE_MUTEX(flow_limit_update_mutex); 208 209static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write, 210 void *buffer, size_t *lenp, loff_t *ppos) 211{ 212 struct sd_flow_limit *cur; 213 struct softnet_data *sd; 214 cpumask_var_t mask; 215 int i, len, ret = 0; 216 217 if (!alloc_cpumask_var(&mask, GFP_KERNEL)) 218 return -ENOMEM; 219 220 if (write) { 221 ret = cpumask_parse(buffer, mask); 222 if (ret) 223 goto done; 224 225 mutex_lock(&flow_limit_update_mutex); 226 len = sizeof(*cur) + netdev_flow_limit_table_len; 227 for_each_possible_cpu(i) { 228 sd = &per_cpu(softnet_data, i); 229 cur = rcu_dereference_protected(sd->flow_limit, 230 lockdep_is_held(&flow_limit_update_mutex)); 231 if (cur && !cpumask_test_cpu(i, mask)) { 232 RCU_INIT_POINTER(sd->flow_limit, NULL); 233 kfree_rcu(cur, rcu); 234 } else if (!cur && cpumask_test_cpu(i, mask)) { 235 cur = kzalloc_node(len, GFP_KERNEL, 236 cpu_to_node(i)); 237 if (!cur) { 238 /* not unwinding previous changes */ 239 ret = -ENOMEM; 240 goto write_unlock; 241 } 242 cur->log_buckets = ilog2(netdev_flow_limit_table_len); 243 rcu_assign_pointer(sd->flow_limit, cur); 244 } 245 } 246write_unlock: 247 mutex_unlock(&flow_limit_update_mutex); 248 } else { 249 cpumask_clear(mask); 250 rcu_read_lock(); 251 for_each_possible_cpu(i) { 252 sd = &per_cpu(softnet_data, i); 253 if (rcu_dereference(sd->flow_limit)) 254 cpumask_set_cpu(i, mask); 255 } 256 rcu_read_unlock(); 257 258 ret = dump_cpumask(buffer, lenp, ppos, mask); 259 } 260 261done: 262 free_cpumask_var(mask); 263 return ret; 264} 265 266static int flow_limit_table_len_sysctl(const struct ctl_table *table, int write, 267 void *buffer, size_t *lenp, loff_t *ppos) 268{ 269 unsigned int old, *ptr; 270 int ret; 271 272 mutex_lock(&flow_limit_update_mutex); 273 274 ptr = table->data; 275 old = *ptr; 276 ret = proc_dointvec(table, write, buffer, lenp, ppos); 277 if (!ret && write && !is_power_of_2(*ptr)) { 278 *ptr = old; 279 ret = -EINVAL; 280 } 281 282 mutex_unlock(&flow_limit_update_mutex); 283 return ret; 284} 285#endif /* CONFIG_NET_FLOW_LIMIT */ 286 287#ifdef CONFIG_NET_SCHED 288static int set_default_qdisc(const struct ctl_table *table, int write, 289 void *buffer, size_t *lenp, loff_t *ppos) 290{ 291 char id[IFNAMSIZ]; 292 struct ctl_table tbl = { 293 .data = id, 294 .maxlen = IFNAMSIZ, 295 }; 296 int ret; 297 298 qdisc_get_default(id, IFNAMSIZ); 299 300 ret = proc_dostring(&tbl, write, buffer, lenp, ppos); 301 if (write && ret == 0) 302 ret = qdisc_set_default(id); 303 return ret; 304} 305#endif 306 307static int proc_do_dev_weight(const struct ctl_table *table, int write, 308 void *buffer, size_t *lenp, loff_t *ppos) 309{ 310 static DEFINE_MUTEX(dev_weight_mutex); 311 int ret, weight; 312 313 mutex_lock(&dev_weight_mutex); 314 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 315 if (!ret && write) { 316 weight = READ_ONCE(weight_p); 317 WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); 318 WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); 319 } 320 mutex_unlock(&dev_weight_mutex); 321 322 return ret; 323} 324 325static int proc_do_rss_key(const struct ctl_table *table, int write, 326 void *buffer, size_t *lenp, loff_t *ppos) 327{ 328 struct ctl_table fake_table; 329 char buf[NETDEV_RSS_KEY_LEN * 3]; 330 331 snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key); 332 fake_table.data = buf; 333 fake_table.maxlen = sizeof(buf); 334 return proc_dostring(&fake_table, write, buffer, lenp, ppos); 335} 336 337#ifdef CONFIG_BPF_JIT 338static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, 339 void *buffer, size_t *lenp, 340 loff_t *ppos) 341{ 342 int ret, jit_enable = *(int *)table->data; 343 int min = *(int *)table->extra1; 344 int max = *(int *)table->extra2; 345 struct ctl_table tmp = *table; 346 347 if (write && !capable(CAP_SYS_ADMIN)) 348 return -EPERM; 349 350 tmp.data = &jit_enable; 351 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 352 if (write && !ret) { 353 if (jit_enable < 2 || 354 (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { 355 *(int *)table->data = jit_enable; 356 if (jit_enable == 2) 357 pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); 358 } else { 359 ret = -EPERM; 360 } 361 } 362 363 if (write && ret && min == max) 364 pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n"); 365 366 return ret; 367} 368 369# ifdef CONFIG_HAVE_EBPF_JIT 370static int 371proc_dointvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 372 void *buffer, size_t *lenp, loff_t *ppos) 373{ 374 if (!capable(CAP_SYS_ADMIN)) 375 return -EPERM; 376 377 return proc_dointvec_minmax(table, write, buffer, lenp, ppos); 378} 379# endif /* CONFIG_HAVE_EBPF_JIT */ 380 381static int 382proc_dolongvec_minmax_bpf_restricted(const struct ctl_table *table, int write, 383 void *buffer, size_t *lenp, loff_t *ppos) 384{ 385 if (!capable(CAP_SYS_ADMIN)) 386 return -EPERM; 387 388 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 389} 390#endif 391 392static struct ctl_table net_core_table[] = { 393 { 394 .procname = "mem_pcpu_rsv", 395 .data = &net_hotdata.sysctl_mem_pcpu_rsv, 396 .maxlen = sizeof(int), 397 .mode = 0644, 398 .proc_handler = proc_dointvec_minmax, 399 .extra1 = &min_mem_pcpu_rsv, 400 }, 401 { 402 .procname = "dev_weight", 403 .data = &weight_p, 404 .maxlen = sizeof(int), 405 .mode = 0644, 406 .proc_handler = proc_do_dev_weight, 407 .extra1 = SYSCTL_ONE, 408 }, 409 { 410 .procname = "dev_weight_rx_bias", 411 .data = &dev_weight_rx_bias, 412 .maxlen = sizeof(int), 413 .mode = 0644, 414 .proc_handler = proc_do_dev_weight, 415 .extra1 = SYSCTL_ONE, 416 }, 417 { 418 .procname = "dev_weight_tx_bias", 419 .data = &dev_weight_tx_bias, 420 .maxlen = sizeof(int), 421 .mode = 0644, 422 .proc_handler = proc_do_dev_weight, 423 .extra1 = SYSCTL_ONE, 424 }, 425 { 426 .procname = "netdev_max_backlog", 427 .data = &net_hotdata.max_backlog, 428 .maxlen = sizeof(int), 429 .mode = 0644, 430 .proc_handler = proc_dointvec 431 }, 432 { 433 .procname = "qdisc_max_burst", 434 .data = &net_hotdata.qdisc_max_burst, 435 .maxlen = sizeof(int), 436 .mode = 0644, 437 .proc_handler = proc_dointvec 438 }, 439 { 440 .procname = "netdev_rss_key", 441 .data = &netdev_rss_key, 442 .maxlen = sizeof(int), 443 .mode = 0444, 444 .proc_handler = proc_do_rss_key, 445 }, 446#ifdef CONFIG_BPF_JIT 447 { 448 .procname = "bpf_jit_enable", 449 .data = &bpf_jit_enable, 450 .maxlen = sizeof(int), 451 .mode = 0644, 452 .proc_handler = proc_dointvec_minmax_bpf_enable, 453# ifdef CONFIG_BPF_JIT_ALWAYS_ON 454 .extra1 = SYSCTL_ONE, 455 .extra2 = SYSCTL_ONE, 456# else 457 .extra1 = SYSCTL_ZERO, 458 .extra2 = SYSCTL_TWO, 459# endif 460 }, 461# ifdef CONFIG_HAVE_EBPF_JIT 462 { 463 .procname = "bpf_jit_harden", 464 .data = &bpf_jit_harden, 465 .maxlen = sizeof(int), 466 .mode = 0600, 467 .proc_handler = proc_dointvec_minmax_bpf_restricted, 468 .extra1 = SYSCTL_ZERO, 469 .extra2 = SYSCTL_TWO, 470 }, 471 { 472 .procname = "bpf_jit_kallsyms", 473 .data = &bpf_jit_kallsyms, 474 .maxlen = sizeof(int), 475 .mode = 0600, 476 .proc_handler = proc_dointvec_minmax_bpf_restricted, 477 .extra1 = SYSCTL_ZERO, 478 .extra2 = SYSCTL_ONE, 479 }, 480# endif 481 { 482 .procname = "bpf_jit_limit", 483 .data = &bpf_jit_limit, 484 .maxlen = sizeof(long), 485 .mode = 0600, 486 .proc_handler = proc_dolongvec_minmax_bpf_restricted, 487 .extra1 = SYSCTL_LONG_ONE, 488 .extra2 = &bpf_jit_limit_max, 489 }, 490#endif 491 { 492 .procname = "netdev_tstamp_prequeue", 493 .data = &net_hotdata.tstamp_prequeue, 494 .maxlen = sizeof(int), 495 .mode = 0644, 496 .proc_handler = proc_dointvec 497 }, 498 { 499 .procname = "message_cost", 500 .data = &net_ratelimit_state.interval, 501 .maxlen = sizeof(int), 502 .mode = 0644, 503 .proc_handler = proc_dointvec_jiffies, 504 }, 505 { 506 .procname = "message_burst", 507 .data = &net_ratelimit_state.burst, 508 .maxlen = sizeof(int), 509 .mode = 0644, 510 .proc_handler = proc_dointvec, 511 }, 512#ifdef CONFIG_RPS 513 { 514 .procname = "rps_sock_flow_entries", 515 .maxlen = sizeof(int), 516 .mode = 0644, 517 .proc_handler = rps_sock_flow_sysctl 518 }, 519#endif 520#ifdef CONFIG_NET_FLOW_LIMIT 521 { 522 .procname = "flow_limit_cpu_bitmap", 523 .mode = 0644, 524 .proc_handler = flow_limit_cpu_sysctl 525 }, 526 { 527 .procname = "flow_limit_table_len", 528 .data = &netdev_flow_limit_table_len, 529 .maxlen = sizeof(int), 530 .mode = 0644, 531 .proc_handler = flow_limit_table_len_sysctl 532 }, 533#endif /* CONFIG_NET_FLOW_LIMIT */ 534#ifdef CONFIG_NET_RX_BUSY_POLL 535 { 536 .procname = "busy_poll", 537 .data = &sysctl_net_busy_poll, 538 .maxlen = sizeof(unsigned int), 539 .mode = 0644, 540 .proc_handler = proc_dointvec_minmax, 541 .extra1 = SYSCTL_ZERO, 542 }, 543 { 544 .procname = "busy_read", 545 .data = &sysctl_net_busy_read, 546 .maxlen = sizeof(unsigned int), 547 .mode = 0644, 548 .proc_handler = proc_dointvec_minmax, 549 .extra1 = SYSCTL_ZERO, 550 }, 551#endif 552#ifdef CONFIG_NET_SCHED 553 { 554 .procname = "default_qdisc", 555 .mode = 0644, 556 .maxlen = IFNAMSIZ, 557 .proc_handler = set_default_qdisc 558 }, 559#endif 560 { 561 .procname = "netdev_budget", 562 .data = &net_hotdata.netdev_budget, 563 .maxlen = sizeof(int), 564 .mode = 0644, 565 .proc_handler = proc_dointvec 566 }, 567 { 568 .procname = "warnings", 569 .data = &net_msg_warn, 570 .maxlen = sizeof(int), 571 .mode = 0644, 572 .proc_handler = proc_dointvec 573 }, 574 { 575 .procname = "max_skb_frags", 576 .data = &net_hotdata.sysctl_max_skb_frags, 577 .maxlen = sizeof(int), 578 .mode = 0644, 579 .proc_handler = proc_dointvec_minmax, 580 .extra1 = SYSCTL_ONE, 581 .extra2 = &max_skb_frags, 582 }, 583 { 584 .procname = "netdev_budget_usecs", 585 .data = &net_hotdata.netdev_budget_usecs, 586 .maxlen = sizeof(unsigned int), 587 .mode = 0644, 588 .proc_handler = proc_dointvec_minmax, 589 .extra1 = &netdev_budget_usecs_min, 590 }, 591 { 592 .procname = "fb_tunnels_only_for_init_net", 593 .data = &sysctl_fb_tunnels_only_for_init_net, 594 .maxlen = sizeof(int), 595 .mode = 0644, 596 .proc_handler = proc_dointvec_minmax, 597 .extra1 = SYSCTL_ZERO, 598 .extra2 = SYSCTL_TWO, 599 }, 600 { 601 .procname = "devconf_inherit_init_net", 602 .data = &sysctl_devconf_inherit_init_net, 603 .maxlen = sizeof(int), 604 .mode = 0644, 605 .proc_handler = proc_dointvec_minmax, 606 .extra1 = SYSCTL_ZERO, 607 .extra2 = SYSCTL_THREE, 608 }, 609 { 610 .procname = "high_order_alloc_disable", 611 .data = &net_high_order_alloc_disable_key.key, 612 .maxlen = sizeof(net_high_order_alloc_disable_key), 613 .mode = 0644, 614 .proc_handler = proc_do_static_key, 615 }, 616 { 617 .procname = "gro_normal_batch", 618 .data = &net_hotdata.gro_normal_batch, 619 .maxlen = sizeof(unsigned int), 620 .mode = 0644, 621 .proc_handler = proc_dointvec_minmax, 622 .extra1 = SYSCTL_ONE, 623 }, 624 { 625 .procname = "netdev_unregister_timeout_secs", 626 .data = &netdev_unregister_timeout_secs, 627 .maxlen = sizeof(unsigned int), 628 .mode = 0644, 629 .proc_handler = proc_dointvec_minmax, 630 .extra1 = SYSCTL_ONE, 631 .extra2 = &int_3600, 632 }, 633 { 634 .procname = "skb_defer_max", 635 .data = &net_hotdata.sysctl_skb_defer_max, 636 .maxlen = sizeof(unsigned int), 637 .mode = 0644, 638 .proc_handler = proc_dointvec_minmax, 639 .extra1 = SYSCTL_ZERO, 640 }, 641}; 642 643static struct ctl_table netns_core_table[] = { 644#if IS_ENABLED(CONFIG_RPS) 645 { 646 .procname = "rps_default_mask", 647 .data = &init_net, 648 .mode = 0644, 649 .proc_handler = rps_default_mask_sysctl 650 }, 651#endif 652 { 653 .procname = "somaxconn", 654 .data = &init_net.core.sysctl_somaxconn, 655 .maxlen = sizeof(int), 656 .mode = 0644, 657 .extra1 = SYSCTL_ZERO, 658 .proc_handler = proc_dointvec_minmax 659 }, 660 { 661 .procname = "optmem_max", 662 .data = &init_net.core.sysctl_optmem_max, 663 .maxlen = sizeof(int), 664 .mode = 0644, 665 .extra1 = SYSCTL_ZERO, 666 .proc_handler = proc_dointvec_minmax 667 }, 668 { 669 .procname = "txrehash", 670 .data = &init_net.core.sysctl_txrehash, 671 .maxlen = sizeof(u8), 672 .mode = 0644, 673 .extra1 = SYSCTL_ZERO, 674 .extra2 = SYSCTL_ONE, 675 .proc_handler = proc_dou8vec_minmax, 676 }, 677 { 678 .procname = "txq_reselection_ms", 679 .data = &init_net.core.sysctl_txq_reselection, 680 .maxlen = sizeof(int), 681 .mode = 0644, 682 .proc_handler = proc_dointvec_ms_jiffies, 683 }, 684 { 685 .procname = "tstamp_allow_data", 686 .data = &init_net.core.sysctl_tstamp_allow_data, 687 .maxlen = sizeof(u8), 688 .mode = 0644, 689 .proc_handler = proc_dou8vec_minmax, 690 .extra1 = SYSCTL_ZERO, 691 .extra2 = SYSCTL_ONE 692 }, 693 { 694 .procname = "bypass_prot_mem", 695 .data = &init_net.core.sysctl_bypass_prot_mem, 696 .maxlen = sizeof(u8), 697 .mode = 0644, 698 .proc_handler = proc_dou8vec_minmax, 699 .extra1 = SYSCTL_ZERO, 700 .extra2 = SYSCTL_ONE 701 }, 702 /* sysctl_core_net_init() will set the values after this 703 * to readonly in network namespaces 704 */ 705 { 706 .procname = "wmem_max", 707 .data = &sysctl_wmem_max, 708 .maxlen = sizeof(int), 709 .mode = 0644, 710 .proc_handler = proc_dointvec_minmax, 711 .extra1 = &min_sndbuf, 712 }, 713 { 714 .procname = "rmem_max", 715 .data = &sysctl_rmem_max, 716 .maxlen = sizeof(int), 717 .mode = 0644, 718 .proc_handler = proc_dointvec_minmax, 719 .extra1 = &min_rcvbuf, 720 }, 721 { 722 .procname = "wmem_default", 723 .data = &sysctl_wmem_default, 724 .maxlen = sizeof(int), 725 .mode = 0644, 726 .proc_handler = proc_dointvec_minmax, 727 .extra1 = &min_sndbuf, 728 }, 729 { 730 .procname = "rmem_default", 731 .data = &sysctl_rmem_default, 732 .maxlen = sizeof(int), 733 .mode = 0644, 734 .proc_handler = proc_dointvec_minmax, 735 .extra1 = &min_rcvbuf, 736 }, 737}; 738 739static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) 740{ 741 /* fallback tunnels for initns only */ 742 if (!strncmp(str, "initns", 6)) 743 sysctl_fb_tunnels_only_for_init_net = 1; 744 /* no fallback tunnels anywhere */ 745 else if (!strncmp(str, "none", 4)) 746 sysctl_fb_tunnels_only_for_init_net = 2; 747 748 return 1; 749} 750__setup("fb_tunnels=", fb_tunnels_only_for_init_net_sysctl_setup); 751 752static __net_init int sysctl_core_net_init(struct net *net) 753{ 754 size_t table_size = ARRAY_SIZE(netns_core_table); 755 struct ctl_table *tbl; 756 757 tbl = netns_core_table; 758 if (!net_eq(net, &init_net)) { 759 int i; 760 tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); 761 if (tbl == NULL) 762 goto err_dup; 763 764 for (i = 0; i < table_size; ++i) { 765 if (tbl[i].data == &sysctl_wmem_max) 766 break; 767 768 tbl[i].data += (char *)net - (char *)&init_net; 769 } 770 for (; i < table_size; ++i) 771 tbl[i].mode &= ~0222; 772 } 773 774 net->core.sysctl_hdr = register_net_sysctl_sz(net, "net/core", tbl, table_size); 775 if (net->core.sysctl_hdr == NULL) 776 goto err_reg; 777 778 return 0; 779 780err_reg: 781 if (tbl != netns_core_table) 782 kfree(tbl); 783err_dup: 784 return -ENOMEM; 785} 786 787static __net_exit void sysctl_core_net_exit(struct net *net) 788{ 789 const struct ctl_table *tbl; 790 791 tbl = net->core.sysctl_hdr->ctl_table_arg; 792 unregister_net_sysctl_table(net->core.sysctl_hdr); 793 BUG_ON(tbl == netns_core_table); 794#if IS_ENABLED(CONFIG_RPS) 795 kfree(net->core.rps_default_mask); 796#endif 797 kfree(tbl); 798} 799 800static __net_initdata struct pernet_operations sysctl_core_ops = { 801 .init = sysctl_core_net_init, 802 .exit = sysctl_core_net_exit, 803}; 804 805static __init int sysctl_core_init(void) 806{ 807 register_net_sysctl(&init_net, "net/core", net_core_table); 808 return register_pernet_subsys(&sysctl_core_ops); 809} 810 811fs_initcall(sysctl_core_init);