Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: make default_rps_mask a per netns attribute

That really was meant to be a per netns attribute from the beginning.

The idea is that once proper isolation is in place in the main
namespace, additional demux in the child namespaces will be redundant.
Let's make child netns default rps mask empty by default.

To avoid bloating the netns with a possibly large cpumask, allocate
it on-demand during the first write operation.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Paolo Abeni and committed by
David S. Miller
50bcfe8d e469b626

+59 -21
-1
include/linux/netdevice.h
··· 224 224 #include <linux/static_key.h> 225 225 extern struct static_key_false rps_needed; 226 226 extern struct static_key_false rfs_needed; 227 - extern struct cpumask rps_default_mask; 228 227 #endif 229 228 230 229 struct neighbour;
+5
include/net/netns/core.h
··· 6 6 7 7 struct ctl_table_header; 8 8 struct prot_inuse; 9 + struct cpumask; 9 10 10 11 struct netns_core { 11 12 /* core sysctls */ ··· 17 16 18 17 #ifdef CONFIG_PROC_FS 19 18 struct prot_inuse __percpu *prot_inuse; 19 + #endif 20 + 21 + #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) 22 + struct cpumask *rps_default_mask; 20 23 #endif 21 24 }; 22 25
+16 -7
net/core/net-sysfs.c
··· 1060 1060 .get_ownership = rx_queue_get_ownership, 1061 1061 }; 1062 1062 1063 + static int rx_queue_default_mask(struct net_device *dev, 1064 + struct netdev_rx_queue *queue) 1065 + { 1066 + #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) 1067 + struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask); 1068 + 1069 + if (rps_default_mask && !cpumask_empty(rps_default_mask)) 1070 + return netdev_rx_queue_set_rps_mask(queue, rps_default_mask); 1071 + #endif 1072 + return 0; 1073 + } 1074 + 1063 1075 static int rx_queue_add_kobject(struct net_device *dev, int index) 1064 1076 { 1065 1077 struct netdev_rx_queue *queue = dev->_rx + index; ··· 1095 1083 goto err; 1096 1084 } 1097 1085 1098 - #if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL) 1099 - if (!cpumask_empty(&rps_default_mask)) { 1100 - error = netdev_rx_queue_set_rps_mask(queue, &rps_default_mask); 1101 - if (error) 1102 - goto err; 1103 - } 1104 - #endif 1086 + error = rx_queue_default_mask(dev, queue); 1087 + if (error) 1088 + goto err; 1089 + 1105 1090 kobject_uevent(kobj, KOBJ_ADD); 1106 1091 1107 1092 return error;
+38 -13
net/core/sysctl_net_core.c
··· 74 74 #endif 75 75 76 76 #ifdef CONFIG_RPS 77 - struct cpumask rps_default_mask; 77 + 78 + static struct cpumask *rps_default_mask_cow_alloc(struct net *net) 79 + { 80 + struct cpumask *rps_default_mask; 81 + 82 + if (net->core.rps_default_mask) 83 + return net->core.rps_default_mask; 84 + 85 + rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL); 86 + if (!rps_default_mask) 87 + return NULL; 88 + 89 + /* pairs with READ_ONCE in rx_queue_default_mask() */ 90 + WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); 91 + return rps_default_mask; 92 + } 78 93 79 94 static int rps_default_mask_sysctl(struct ctl_table *table, int write, 80 95 void *buffer, size_t *lenp, loff_t *ppos) 81 96 { 97 + struct net *net = (struct net *)table->data; 82 98 int err = 0; 83 99 84 100 rtnl_lock(); 85 101 if (write) { 86 - err = cpumask_parse(buffer, &rps_default_mask); 102 + struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); 103 + 104 + err = -ENOMEM; 105 + if (!rps_default_mask) 106 + goto done; 107 + 108 + err = cpumask_parse(buffer, rps_default_mask); 87 109 if (err) 88 110 goto done; 89 111 90 - err = rps_cpumask_housekeeping(&rps_default_mask); 112 + err = rps_cpumask_housekeeping(rps_default_mask); 91 113 if (err) 92 114 goto done; 93 115 } else { 94 - dump_cpumask(buffer, lenp, ppos, &rps_default_mask); 116 + dump_cpumask(buffer, lenp, ppos, 117 + net->core.rps_default_mask ? : cpu_none_mask); 95 118 } 96 119 97 120 done: ··· 531 508 .mode = 0644, 532 509 .proc_handler = rps_sock_flow_sysctl 533 510 }, 534 - { 535 - .procname = "rps_default_mask", 536 - .mode = 0644, 537 - .proc_handler = rps_default_mask_sysctl 538 - }, 539 511 #endif 540 512 #ifdef CONFIG_NET_FLOW_LIMIT 541 513 { ··· 657 639 }; 658 640 659 641 static struct ctl_table netns_core_table[] = { 642 + #if IS_ENABLED(CONFIG_RPS) 643 + { 644 + .procname = "rps_default_mask", 645 + .data = &init_net, 646 + .mode = 0644, 647 + .proc_handler = rps_default_mask_sysctl 648 + }, 649 + #endif 660 650 { 661 651 .procname = "somaxconn", 662 652 .data = &init_net.core.sysctl_somaxconn, ··· 732 706 tbl = net->core.sysctl_hdr->ctl_table_arg; 733 707 unregister_net_sysctl_table(net->core.sysctl_hdr); 734 708 BUG_ON(tbl == netns_core_table); 709 + #if IS_ENABLED(CONFIG_RPS) 710 + kfree(net->core.rps_default_mask); 711 + #endif 735 712 kfree(tbl); 736 713 } 737 714 ··· 745 716 746 717 static __init int sysctl_core_init(void) 747 718 { 748 - #if IS_ENABLED(CONFIG_RPS) 749 - cpumask_copy(&rps_default_mask, cpu_none_mask); 750 - #endif 751 - 752 719 register_net_sysctl(&init_net, "net/core", net_core_table); 753 720 return register_pernet_subsys(&sysctl_core_ops); 754 721 }