Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: xtables: don't hook tables by default

delay hook registration until the table is being requested inside a
namespace.

Historically, a particular table (iptables mangle, ip6tables filter, etc)
was registered on module load.

When netns support was added to iptables only the ip/ip6tables ruleset was
made namespace aware, not the actual hook points.

This means f.e. that when ipt_filter table/module is loaded on a system,
then each namespace on that system has an (empty) iptables filter ruleset.

In other words, if a namespace sends a packet, such skb is 'caught' by
netfilter machinery and fed to hooking points for that table (i.e. INPUT,
FORWARD, etc).

Thanks to Eric Biederman, hooks are no longer global, but per namespace.

This means that we can avoid allocation of empty ruleset in a namespace and
defer hook registration until we need the functionality.

We register a tables hook entry points ONLY in the initial namespace.
When an iptables get/setockopt is issued inside a given namespace, we check
if the table is found in the per-namespace list.

If not, we attempt to find it in the initial namespace, and, if found,
create an empty default table in the requesting namespace and register the
needed hooks.

Hook points are destroyed only once namespace is deleted, there is no
'usage count' (it makes no sense since there is no 'remove table' operation
in xtables api).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Florian Westphal and committed by
Pablo Neira Ayuso
b9e69e12 a67dd266

+368 -215
+4 -2
include/linux/netfilter/x_tables.h
··· 200 200 u_int8_t af; /* address/protocol family */ 201 201 int priority; /* hook order */ 202 202 203 + /* called when table is needed in the given netns */ 204 + int (*table_init)(struct net *net); 205 + 203 206 /* A unique name... */ 204 207 const char name[XT_TABLE_MAXNAMELEN]; 205 208 }; ··· 411 408 return cnt; 412 409 } 413 410 414 - struct nf_hook_ops *xt_hook_link(const struct xt_table *, nf_hookfn *); 415 - void xt_hook_unlink(const struct xt_table *, struct nf_hook_ops *); 411 + struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); 416 412 417 413 #ifdef CONFIG_COMPAT 418 414 #include <net/compat.h>
+27 -14
net/ipv4/netfilter/arp_tables.c
··· 1780 1780 return ret; 1781 1781 } 1782 1782 1783 + static void __arpt_unregister_table(struct xt_table *table) 1784 + { 1785 + struct xt_table_info *private; 1786 + void *loc_cpu_entry; 1787 + struct module *table_owner = table->me; 1788 + struct arpt_entry *iter; 1789 + 1790 + private = xt_unregister_table(table); 1791 + 1792 + /* Decrease module usage counts and free resources */ 1793 + loc_cpu_entry = private->entries; 1794 + xt_entry_foreach(iter, loc_cpu_entry, private->size) 1795 + cleanup_entry(iter); 1796 + if (private->number > private->initial_entries) 1797 + module_put(table_owner); 1798 + xt_free_table_info(private); 1799 + } 1800 + 1783 1801 int arpt_register_table(struct net *net, 1784 1802 const struct xt_table *table, 1785 1803 const struct arpt_replace *repl, ··· 1828 1810 goto out_free; 1829 1811 } 1830 1812 1813 + /* set res now, will see skbs right after nf_register_net_hooks */ 1831 1814 WRITE_ONCE(*res, new_table); 1815 + 1816 + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); 1817 + if (ret != 0) { 1818 + __arpt_unregister_table(new_table); 1819 + *res = NULL; 1820 + } 1832 1821 1833 1822 return ret; 1834 1823 ··· 1847 1822 void arpt_unregister_table(struct net *net, struct xt_table *table, 1848 1823 const struct nf_hook_ops *ops) 1849 1824 { 1850 - struct xt_table_info *private; 1851 - void *loc_cpu_entry; 1852 - struct module *table_owner = table->me; 1853 - struct arpt_entry *iter; 1854 - 1855 - private = xt_unregister_table(table); 1856 - 1857 - /* Decrease module usage counts and free resources */ 1858 - loc_cpu_entry = private->entries; 1859 - xt_entry_foreach(iter, loc_cpu_entry, private->size) 1860 - cleanup_entry(iter); 1861 - if (private->number > private->initial_entries) 1862 - module_put(table_owner); 1863 - xt_free_table_info(private); 1825 + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); 1826 + __arpt_unregister_table(table); 1864 1827 } 1865 1828 1866 1829 /* The built-in targets: standard (NULL) and error. */
+17 -12
net/ipv4/netfilter/arptable_filter.c
··· 17 17 #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ 18 18 (1 << NF_ARP_FORWARD)) 19 19 20 + static int __net_init arptable_filter_table_init(struct net *net); 21 + 20 22 static const struct xt_table packet_filter = { 21 23 .name = "filter", 22 24 .valid_hooks = FILTER_VALID_HOOKS, 23 25 .me = THIS_MODULE, 24 26 .af = NFPROTO_ARP, 25 27 .priority = NF_IP_PRI_FILTER, 28 + .table_init = arptable_filter_table_init, 26 29 }; 27 30 28 31 /* The work comes in here from netfilter.c */ ··· 38 35 39 36 static struct nf_hook_ops *arpfilter_ops __read_mostly; 40 37 41 - static int __net_init arptable_filter_net_init(struct net *net) 38 + static int __net_init arptable_filter_table_init(struct net *net) 42 39 { 43 40 struct arpt_replace *repl; 44 41 int err; 42 + 43 + if (net->ipv4.arptable_filter) 44 + return 0; 45 45 46 46 repl = arpt_alloc_initial_table(&packet_filter); 47 47 if (repl == NULL) ··· 57 51 58 52 static void __net_exit arptable_filter_net_exit(struct net *net) 59 53 { 54 + if (!net->ipv4.arptable_filter) 55 + return; 60 56 arpt_unregister_table(net, net->ipv4.arptable_filter, arpfilter_ops); 57 + net->ipv4.arptable_filter = NULL; 61 58 } 62 59 63 60 static struct pernet_operations arptable_filter_net_ops = { 64 - .init = arptable_filter_net_init, 65 61 .exit = arptable_filter_net_exit, 66 62 }; 67 63 ··· 71 63 { 72 64 int ret; 73 65 66 + arpfilter_ops = xt_hook_ops_alloc(&packet_filter, arptable_filter_hook); 67 + if (IS_ERR(arpfilter_ops)) 68 + return PTR_ERR(arpfilter_ops); 69 + 74 70 ret = register_pernet_subsys(&arptable_filter_net_ops); 75 - if (ret < 0) 71 + if (ret < 0) { 72 + kfree(arpfilter_ops); 76 73 return ret; 77 - 78 - arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); 79 - if (IS_ERR(arpfilter_ops)) { 80 - ret = PTR_ERR(arpfilter_ops); 81 - goto cleanup_table; 82 74 } 83 - return ret; 84 75 85 - cleanup_table: 86 - unregister_pernet_subsys(&arptable_filter_net_ops); 87 76 return ret; 88 77 } 89 78 90 79 static void __exit arptable_filter_fini(void) 91 80 { 92 - xt_hook_unlink(&packet_filter, arpfilter_ops); 93 81 unregister_pernet_subsys(&arptable_filter_net_ops); 82 + kfree(arpfilter_ops); 94 83 } 95 84 96 85 module_init(arptable_filter_init);
+28 -14
net/ipv4/netfilter/ip_tables.c
··· 2062 2062 return ret; 2063 2063 } 2064 2064 2065 + static void __ipt_unregister_table(struct net *net, struct xt_table *table) 2066 + { 2067 + struct xt_table_info *private; 2068 + void *loc_cpu_entry; 2069 + struct module *table_owner = table->me; 2070 + struct ipt_entry *iter; 2071 + 2072 + private = xt_unregister_table(table); 2073 + 2074 + /* Decrease module usage counts and free resources */ 2075 + loc_cpu_entry = private->entries; 2076 + xt_entry_foreach(iter, loc_cpu_entry, private->size) 2077 + cleanup_entry(iter, net); 2078 + if (private->number > private->initial_entries) 2079 + module_put(table_owner); 2080 + xt_free_table_info(private); 2081 + } 2082 + 2065 2083 int ipt_register_table(struct net *net, const struct xt_table *table, 2066 2084 const struct ipt_replace *repl, 2067 2085 const struct nf_hook_ops *ops, struct xt_table **res) ··· 2107 2089 goto out_free; 2108 2090 } 2109 2091 2092 + /* set res now, will see skbs right after nf_register_net_hooks */ 2110 2093 WRITE_ONCE(*res, new_table); 2094 + 2095 + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); 2096 + if (ret != 0) { 2097 + __ipt_unregister_table(net, new_table); 2098 + *res = NULL; 2099 + } 2100 + 2111 2101 return ret; 2112 2102 2113 2103 out_free: ··· 2126 2100 void ipt_unregister_table(struct net *net, struct xt_table *table, 2127 2101 const struct nf_hook_ops *ops) 2128 2102 { 2129 - struct xt_table_info *private; 2130 - void *loc_cpu_entry; 2131 - struct module *table_owner = table->me; 2132 - struct ipt_entry *iter; 2133 - 2134 - private = xt_unregister_table(table); 2135 - 2136 - /* Decrease module usage counts and free resources */ 2137 - loc_cpu_entry = private->entries; 2138 - xt_entry_foreach(iter, loc_cpu_entry, private->size) 2139 - cleanup_entry(iter, net); 2140 - if (private->number > private->initial_entries) 2141 - module_put(table_owner); 2142 - xt_free_table_info(private); 2103 + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); 2104 + __ipt_unregister_table(net, table); 2143 2105 } 2144 2106 2145 2107 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
+24 -11
net/ipv4/netfilter/iptable_filter.c
··· 23 23 #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ 24 24 (1 << NF_INET_FORWARD) | \ 25 25 (1 << NF_INET_LOCAL_OUT)) 26 + static int __net_init iptable_filter_table_init(struct net *net); 26 27 27 28 static const struct xt_table packet_filter = { 28 29 .name = "filter", ··· 31 30 .me = THIS_MODULE, 32 31 .af = NFPROTO_IPV4, 33 32 .priority = NF_IP_PRI_FILTER, 33 + .table_init = iptable_filter_table_init, 34 34 }; 35 35 36 36 static unsigned int ··· 50 48 static struct nf_hook_ops *filter_ops __read_mostly; 51 49 52 50 /* Default to forward because I got too much mail already. */ 53 - static bool forward = true; 51 + static bool forward __read_mostly = true; 54 52 module_param(forward, bool, 0000); 55 53 56 - static int __net_init iptable_filter_net_init(struct net *net) 54 + static int __net_init iptable_filter_table_init(struct net *net) 57 55 { 58 56 struct ipt_replace *repl; 59 57 int err; 58 + 59 + if (net->ipv4.iptable_filter) 60 + return 0; 60 61 61 62 repl = ipt_alloc_initial_table(&packet_filter); 62 63 if (repl == NULL) ··· 74 69 return err; 75 70 } 76 71 72 + static int __net_init iptable_filter_net_init(struct net *net) 73 + { 74 + if (net == &init_net || !forward) 75 + return iptable_filter_table_init(net); 76 + 77 + return 0; 78 + } 79 + 77 80 static void __net_exit iptable_filter_net_exit(struct net *net) 78 81 { 82 + if (!net->ipv4.iptable_filter) 83 + return; 79 84 ipt_unregister_table(net, net->ipv4.iptable_filter, filter_ops); 85 + net->ipv4.iptable_filter = NULL; 80 86 } 81 87 82 88 static struct pernet_operations iptable_filter_net_ops = { ··· 99 83 { 100 84 int ret; 101 85 86 + filter_ops = xt_hook_ops_alloc(&packet_filter, iptable_filter_hook); 87 + if (IS_ERR(filter_ops)) 88 + return PTR_ERR(filter_ops); 89 + 102 90 ret = register_pernet_subsys(&iptable_filter_net_ops); 103 91 if (ret < 0) 104 - return ret; 105 - 106 - /* Register hooks */ 107 - filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); 108 - if (IS_ERR(filter_ops)) { 109 - ret = PTR_ERR(filter_ops); 110 - unregister_pernet_subsys(&iptable_filter_net_ops); 111 - } 92 + kfree(filter_ops); 112 93 113 94 return ret; 114 95 } 115 96 116 97 static void __exit iptable_filter_fini(void) 117 98 { 118 - xt_hook_unlink(&packet_filter, filter_ops); 119 99 unregister_pernet_subsys(&iptable_filter_net_ops); 100 + kfree(filter_ops); 120 101 } 121 102 122 103 module_init(iptable_filter_init);
+24 -10
net/ipv4/netfilter/iptable_mangle.c
··· 28 28 (1 << NF_INET_LOCAL_OUT) | \ 29 29 (1 << NF_INET_POST_ROUTING)) 30 30 31 + static int __net_init iptable_mangle_table_init(struct net *net); 32 + 31 33 static const struct xt_table packet_mangler = { 32 34 .name = "mangle", 33 35 .valid_hooks = MANGLE_VALID_HOOKS, 34 36 .me = THIS_MODULE, 35 37 .af = NFPROTO_IPV4, 36 38 .priority = NF_IP_PRI_MANGLE, 39 + .table_init = iptable_mangle_table_init, 37 40 }; 38 41 39 42 static unsigned int ··· 95 92 } 96 93 97 94 static struct nf_hook_ops *mangle_ops __read_mostly; 98 - 99 - static int __net_init iptable_mangle_net_init(struct net *net) 95 + static int __net_init iptable_mangle_table_init(struct net *net) 100 96 { 101 97 struct ipt_replace *repl; 102 98 int ret; 99 + 100 + if (net->ipv4.iptable_mangle) 101 + return 0; 103 102 104 103 repl = ipt_alloc_initial_table(&packet_mangler); 105 104 if (repl == NULL) ··· 114 109 115 110 static void __net_exit iptable_mangle_net_exit(struct net *net) 116 111 { 112 + if (!net->ipv4.iptable_mangle) 113 + return; 117 114 ipt_unregister_table(net, net->ipv4.iptable_mangle, mangle_ops); 115 + net->ipv4.iptable_mangle = NULL; 118 116 } 119 117 120 118 static struct pernet_operations iptable_mangle_net_ops = { 121 - .init = iptable_mangle_net_init, 122 119 .exit = iptable_mangle_net_exit, 123 120 }; 124 121 ··· 128 121 { 129 122 int ret; 130 123 131 - ret = register_pernet_subsys(&iptable_mangle_net_ops); 132 - if (ret < 0) 133 - return ret; 134 - 135 - /* Register hooks */ 136 - mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); 124 + mangle_ops = xt_hook_ops_alloc(&packet_mangler, iptable_mangle_hook); 137 125 if (IS_ERR(mangle_ops)) { 138 126 ret = PTR_ERR(mangle_ops); 127 + return ret; 128 + } 129 + 130 + ret = register_pernet_subsys(&iptable_mangle_net_ops); 131 + if (ret < 0) { 132 + kfree(mangle_ops); 133 + return ret; 134 + } 135 + 136 + ret = iptable_mangle_table_init(&init_net); 137 + if (ret) { 139 138 unregister_pernet_subsys(&iptable_mangle_net_ops); 139 + kfree(mangle_ops); 140 140 } 141 141 142 142 return ret; ··· 151 137 152 138 static void __exit iptable_mangle_fini(void) 153 139 { 154 - xt_hook_unlink(&packet_mangler, mangle_ops); 155 140 unregister_pernet_subsys(&iptable_mangle_net_ops); 141 + kfree(mangle_ops); 156 142 } 157 143 158 144 module_init(iptable_mangle_init);
+17 -16
net/ipv4/netfilter/iptable_nat.c
··· 18 18 #include <net/netfilter/nf_nat_core.h> 19 19 #include <net/netfilter/nf_nat_l3proto.h> 20 20 21 + static int __net_init iptable_nat_table_init(struct net *net); 22 + 21 23 static const struct xt_table nf_nat_ipv4_table = { 22 24 .name = "nat", 23 25 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | ··· 28 26 (1 << NF_INET_LOCAL_IN), 29 27 .me = THIS_MODULE, 30 28 .af = NFPROTO_IPV4, 29 + .table_init = iptable_nat_table_init, 31 30 }; 32 31 33 32 static unsigned int iptable_nat_do_chain(void *priv, ··· 98 95 }, 99 96 }; 100 97 101 - static int __net_init iptable_nat_net_init(struct net *net) 98 + static int __net_init iptable_nat_table_init(struct net *net) 102 99 { 103 100 struct ipt_replace *repl; 104 101 int ret; 102 + 103 + if (net->ipv4.nat_table) 104 + return 0; 105 105 106 106 repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); 107 107 if (repl == NULL) ··· 117 111 118 112 static void __net_exit iptable_nat_net_exit(struct net *net) 119 113 { 114 + if (!net->ipv4.nat_table) 115 + return; 120 116 ipt_unregister_table(net, net->ipv4.nat_table, nf_nat_ipv4_ops); 117 + net->ipv4.nat_table = NULL; 121 118 } 122 119 123 120 static struct pernet_operations iptable_nat_net_ops = { 124 - .init = iptable_nat_net_init, 125 121 .exit = iptable_nat_net_exit, 126 122 }; 127 123 128 124 static int __init iptable_nat_init(void) 129 125 { 130 - int err; 126 + int ret = register_pernet_subsys(&iptable_nat_net_ops); 131 127 132 - err = register_pernet_subsys(&iptable_nat_net_ops); 133 - if (err < 0) 134 - goto err1; 128 + if (ret) 129 + return ret; 135 130 136 - err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 137 - if (err < 0) 138 - goto err2; 139 - return 0; 140 - 141 - err2: 142 - unregister_pernet_subsys(&iptable_nat_net_ops); 143 - err1: 144 - return err; 131 + ret = iptable_nat_table_init(&init_net); 132 + if (ret) 133 + unregister_pernet_subsys(&iptable_nat_net_ops); 134 + return ret; 145 135 } 146 136 147 137 static void __exit iptable_nat_exit(void) 148 138 { 149 - nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); 150 139 unregister_pernet_subsys(&iptable_nat_net_ops); 151 140 } 152 141
+23 -10
net/ipv4/netfilter/iptable_raw.c
··· 10 10 11 11 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 12 12 13 + static int __net_init iptable_raw_table_init(struct net *net); 14 + 13 15 static const struct xt_table packet_raw = { 14 16 .name = "raw", 15 17 .valid_hooks = RAW_VALID_HOOKS, 16 18 .me = THIS_MODULE, 17 19 .af = NFPROTO_IPV4, 18 20 .priority = NF_IP_PRI_RAW, 21 + .table_init = iptable_raw_table_init, 19 22 }; 20 23 21 24 /* The work comes in here from netfilter.c. */ ··· 37 34 38 35 static struct nf_hook_ops *rawtable_ops __read_mostly; 39 36 40 - static int __net_init iptable_raw_net_init(struct net *net) 37 + static int __net_init iptable_raw_table_init(struct net *net) 41 38 { 42 39 struct ipt_replace *repl; 43 40 int ret; 41 + 42 + if (net->ipv4.iptable_raw) 43 + return 0; 44 44 45 45 repl = ipt_alloc_initial_table(&packet_raw); 46 46 if (repl == NULL) ··· 56 50 57 51 static void __net_exit iptable_raw_net_exit(struct net *net) 58 52 { 53 + if (!net->ipv4.iptable_raw) 54 + return; 59 55 ipt_unregister_table(net, net->ipv4.iptable_raw, rawtable_ops); 56 + net->ipv4.iptable_raw = NULL; 60 57 } 61 58 62 59 static struct pernet_operations iptable_raw_net_ops = { 63 - .init = iptable_raw_net_init, 64 60 .exit = iptable_raw_net_exit, 65 61 }; 66 62 ··· 70 62 { 71 63 int ret; 72 64 73 - ret = register_pernet_subsys(&iptable_raw_net_ops); 74 - if (ret < 0) 75 - return ret; 65 + rawtable_ops = xt_hook_ops_alloc(&packet_raw, iptable_raw_hook); 66 + if (IS_ERR(rawtable_ops)) 67 + return PTR_ERR(rawtable_ops); 76 68 77 - /* Register hooks */ 78 - rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); 79 - if (IS_ERR(rawtable_ops)) { 80 - ret = PTR_ERR(rawtable_ops); 69 + ret = register_pernet_subsys(&iptable_raw_net_ops); 70 + if (ret < 0) { 71 + kfree(rawtable_ops); 72 + return ret; 73 + } 74 + 75 + ret = iptable_raw_table_init(&init_net); 76 + if (ret) { 81 77 unregister_pernet_subsys(&iptable_raw_net_ops); 78 + kfree(rawtable_ops); 82 79 } 83 80 84 81 return ret; ··· 91 78 92 79 static void __exit iptable_raw_fini(void) 93 80 { 94 - xt_hook_unlink(&packet_raw, rawtable_ops); 95 81 unregister_pernet_subsys(&iptable_raw_net_ops); 82 + kfree(rawtable_ops); 96 83 } 97 84 98 85 module_init(iptable_raw_init);
+24 -13
net/ipv4/netfilter/iptable_security.c
··· 28 28 (1 << NF_INET_FORWARD) | \ 29 29 (1 << NF_INET_LOCAL_OUT) 30 30 31 + static int __net_init iptable_security_table_init(struct net *net); 32 + 31 33 static const struct xt_table security_table = { 32 34 .name = "security", 33 35 .valid_hooks = SECURITY_VALID_HOOKS, 34 36 .me = THIS_MODULE, 35 37 .af = NFPROTO_IPV4, 36 38 .priority = NF_IP_PRI_SECURITY, 39 + .table_init = iptable_security_table_init, 37 40 }; 38 41 39 42 static unsigned int ··· 54 51 55 52 static struct nf_hook_ops *sectbl_ops __read_mostly; 56 53 57 - static int __net_init iptable_security_net_init(struct net *net) 54 + static int __net_init iptable_security_table_init(struct net *net) 58 55 { 59 56 struct ipt_replace *repl; 60 57 int ret; 58 + 59 + if (net->ipv4.iptable_security) 60 + return 0; 61 61 62 62 repl = ipt_alloc_initial_table(&security_table); 63 63 if (repl == NULL) ··· 73 67 74 68 static void __net_exit iptable_security_net_exit(struct net *net) 75 69 { 70 + if (!net->ipv4.iptable_security) 71 + return; 72 + 76 73 ipt_unregister_table(net, net->ipv4.iptable_security, sectbl_ops); 74 + net->ipv4.iptable_security = NULL; 77 75 } 78 76 79 77 static struct pernet_operations iptable_security_net_ops = { 80 - .init = iptable_security_net_init, 81 78 .exit = iptable_security_net_exit, 82 79 }; 83 80 ··· 88 79 { 89 80 int ret; 90 81 91 - ret = register_pernet_subsys(&iptable_security_net_ops); 92 - if (ret < 0) 93 - return ret; 82 + sectbl_ops = xt_hook_ops_alloc(&security_table, iptable_security_hook); 83 + if (IS_ERR(sectbl_ops)) 84 + return PTR_ERR(sectbl_ops); 94 85 95 - sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); 96 - if (IS_ERR(sectbl_ops)) { 97 - ret = PTR_ERR(sectbl_ops); 98 - goto cleanup_table; 86 + ret = register_pernet_subsys(&iptable_security_net_ops); 87 + if (ret < 0) { 88 + kfree(sectbl_ops); 89 + return ret; 99 90 } 100 91 101 - return ret; 92 + ret = iptable_security_table_init(&init_net); 93 + if (ret) { 94 + unregister_pernet_subsys(&iptable_security_net_ops); 95 + kfree(sectbl_ops); 96 + } 102 97 103 - cleanup_table: 104 - unregister_pernet_subsys(&iptable_security_net_ops); 105 98 return ret; 106 99 } 107 100 108 101 static void __exit iptable_security_fini(void) 109 102 { 110 - xt_hook_unlink(&security_table, sectbl_ops); 111 103 unregister_pernet_subsys(&iptable_security_net_ops); 104 + kfree(sectbl_ops); 112 105 } 113 106 114 107 module_init(iptable_security_init);
+28 -14
net/ipv6/netfilter/ip6_tables.c
··· 2071 2071 return ret; 2072 2072 } 2073 2073 2074 + static void __ip6t_unregister_table(struct net *net, struct xt_table *table) 2075 + { 2076 + struct xt_table_info *private; 2077 + void *loc_cpu_entry; 2078 + struct module *table_owner = table->me; 2079 + struct ip6t_entry *iter; 2080 + 2081 + private = xt_unregister_table(table); 2082 + 2083 + /* Decrease module usage counts and free resources */ 2084 + loc_cpu_entry = private->entries; 2085 + xt_entry_foreach(iter, loc_cpu_entry, private->size) 2086 + cleanup_entry(iter, net); 2087 + if (private->number > private->initial_entries) 2088 + module_put(table_owner); 2089 + xt_free_table_info(private); 2090 + } 2091 + 2074 2092 int ip6t_register_table(struct net *net, const struct xt_table *table, 2075 2093 const struct ip6t_replace *repl, 2076 2094 const struct nf_hook_ops *ops, ··· 2117 2099 goto out_free; 2118 2100 } 2119 2101 2102 + /* set res now, will see skbs right after nf_register_net_hooks */ 2120 2103 WRITE_ONCE(*res, new_table); 2104 + 2105 + ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); 2106 + if (ret != 0) { 2107 + __ip6t_unregister_table(net, new_table); 2108 + *res = NULL; 2109 + } 2110 + 2121 2111 return ret; 2122 2112 2123 2113 out_free: ··· 2136 2110 void ip6t_unregister_table(struct net *net, struct xt_table *table, 2137 2111 const struct nf_hook_ops *ops) 2138 2112 { 2139 - struct xt_table_info *private; 2140 - void *loc_cpu_entry; 2141 - struct module *table_owner = table->me; 2142 - struct ip6t_entry *iter; 2143 - 2144 - private = xt_unregister_table(table); 2145 - 2146 - /* Decrease module usage counts and free resources */ 2147 - loc_cpu_entry = private->entries; 2148 - xt_entry_foreach(iter, loc_cpu_entry, private->size) 2149 - cleanup_entry(iter, net); 2150 - if (private->number > private->initial_entries) 2151 - module_put(table_owner); 2152 - xt_free_table_info(private); 2113 + nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); 2114 + __ip6t_unregister_table(net, table); 2153 2115 } 2154 2116 2155 2117 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
+24 -14
net/ipv6/netfilter/ip6table_filter.c
··· 22 22 (1 << NF_INET_FORWARD) | \ 23 23 (1 << NF_INET_LOCAL_OUT)) 24 24 25 + static int __net_init ip6table_filter_table_init(struct net *net); 26 + 25 27 static const struct xt_table packet_filter = { 26 28 .name = "filter", 27 29 .valid_hooks = FILTER_VALID_HOOKS, 28 30 .me = THIS_MODULE, 29 31 .af = NFPROTO_IPV6, 30 32 .priority = NF_IP6_PRI_FILTER, 33 + .table_init = ip6table_filter_table_init, 31 34 }; 32 35 33 36 /* The work comes in here from netfilter.c. */ ··· 47 44 static bool forward = true; 48 45 module_param(forward, bool, 0000); 49 46 50 - static int __net_init ip6table_filter_net_init(struct net *net) 47 + static int __net_init ip6table_filter_table_init(struct net *net) 51 48 { 52 49 struct ip6t_replace *repl; 53 50 int err; 51 + 52 + if (net->ipv6.ip6table_filter) 53 + return 0; 54 54 55 55 repl = ip6t_alloc_initial_table(&packet_filter); 56 56 if (repl == NULL) ··· 68 62 return err; 69 63 } 70 64 65 + static int __net_init ip6table_filter_net_init(struct net *net) 66 + { 67 + if (net == &init_net || !forward) 68 + return ip6table_filter_table_init(net); 69 + 70 + return 0; 71 + } 72 + 71 73 static void __net_exit ip6table_filter_net_exit(struct net *net) 72 74 { 75 + if (!net->ipv6.ip6table_filter) 76 + return; 73 77 ip6t_unregister_table(net, net->ipv6.ip6table_filter, filter_ops); 78 + net->ipv6.ip6table_filter = NULL; 74 79 } 75 80 76 81 static struct pernet_operations ip6table_filter_net_ops = { ··· 93 76 { 94 77 int ret; 95 78 79 + filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook); 80 + if (IS_ERR(filter_ops)) 81 + return PTR_ERR(filter_ops); 82 + 96 83 ret = register_pernet_subsys(&ip6table_filter_net_ops); 97 84 if (ret < 0) 98 - return ret; 85 + kfree(filter_ops); 99 86 100 - /* Register hooks */ 101 - filter_ops = xt_hook_link(&packet_filter, ip6table_filter_hook); 102 - if (IS_ERR(filter_ops)) { 103 - ret = PTR_ERR(filter_ops); 104 - goto cleanup_table; 105 - } 106 - 107 - return ret; 108 - 109 - cleanup_table: 110 - unregister_pernet_subsys(&ip6table_filter_net_ops); 111 87 return ret; 112 88 } 113 89 114 90 static void __exit ip6table_filter_fini(void) 115 91 { 116 - xt_hook_unlink(&packet_filter, filter_ops); 117 92 unregister_pernet_subsys(&ip6table_filter_net_ops); 93 + kfree(filter_ops); 118 94 } 119 95 120 96 module_init(ip6table_filter_init);
+24 -15
net/ipv6/netfilter/ip6table_mangle.c
··· 23 23 (1 << NF_INET_LOCAL_OUT) | \ 24 24 (1 << NF_INET_POST_ROUTING)) 25 25 26 + static int __net_init ip6table_mangle_table_init(struct net *net); 27 + 26 28 static const struct xt_table packet_mangler = { 27 29 .name = "mangle", 28 30 .valid_hooks = MANGLE_VALID_HOOKS, 29 31 .me = THIS_MODULE, 30 32 .af = NFPROTO_IPV6, 31 33 .priority = NF_IP6_PRI_MANGLE, 34 + .table_init = ip6table_mangle_table_init, 32 35 }; 33 36 34 37 static unsigned int ··· 91 88 } 92 89 93 90 static struct nf_hook_ops *mangle_ops __read_mostly; 94 - static int __net_init ip6table_mangle_net_init(struct net *net) 91 + static int __net_init ip6table_mangle_table_init(struct net *net) 95 92 { 96 93 struct ip6t_replace *repl; 97 94 int ret; 95 + 96 + if (net->ipv6.ip6table_mangle) 97 + return 0; 98 98 99 99 repl = ip6t_alloc_initial_table(&packet_mangler); 100 100 if (repl == NULL) ··· 110 104 111 105 static void __net_exit ip6table_mangle_net_exit(struct net *net) 112 106 { 107 + if (!net->ipv6.ip6table_mangle) 108 + return; 109 + 113 110 ip6t_unregister_table(net, net->ipv6.ip6table_mangle, mangle_ops); 111 + net->ipv6.ip6table_mangle = NULL; 114 112 } 115 113 116 114 static struct pernet_operations ip6table_mangle_net_ops = { 117 - .init = ip6table_mangle_net_init, 118 115 .exit = ip6table_mangle_net_exit, 119 116 }; 120 117 ··· 125 116 { 126 117 int ret; 127 118 128 - ret = register_pernet_subsys(&ip6table_mangle_net_ops); 129 - if (ret < 0) 130 - return ret; 119 + mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook); 120 + if (IS_ERR(mangle_ops)) 121 + return PTR_ERR(mangle_ops); 131 122 132 - /* Register hooks */ 133 - mangle_ops = xt_hook_link(&packet_mangler, ip6table_mangle_hook); 134 - if (IS_ERR(mangle_ops)) { 135 - ret = PTR_ERR(mangle_ops); 136 - goto cleanup_table; 123 + ret = register_pernet_subsys(&ip6table_mangle_net_ops); 124 + if (ret < 0) { 125 + kfree(mangle_ops); 126 + return ret; 137 127 } 138 128 139 - return ret; 140 - 141 - cleanup_table: 142 - unregister_pernet_subsys(&ip6table_mangle_net_ops); 129 + ret = ip6table_mangle_table_init(&init_net); 130 + if (ret) { 131 + unregister_pernet_subsys(&ip6table_mangle_net_ops); 132 + kfree(mangle_ops); 133 + } 143 134 return ret; 144 135 } 145 136 146 137 static void __exit ip6table_mangle_fini(void) 147 138 { 148 - xt_hook_unlink(&packet_mangler, mangle_ops); 149 139 unregister_pernet_subsys(&ip6table_mangle_net_ops); 140 + kfree(mangle_ops); 150 141 } 151 142 152 143 module_init(ip6table_mangle_init);
+17 -16
net/ipv6/netfilter/ip6table_nat.c
··· 20 20 #include <net/netfilter/nf_nat_core.h> 21 21 #include <net/netfilter/nf_nat_l3proto.h> 22 22 23 + static int __net_init ip6table_nat_table_init(struct net *net); 24 + 23 25 static const struct xt_table nf_nat_ipv6_table = { 24 26 .name = "nat", 25 27 .valid_hooks = (1 << NF_INET_PRE_ROUTING) | ··· 30 28 (1 << NF_INET_LOCAL_IN), 31 29 .me = THIS_MODULE, 32 30 .af = NFPROTO_IPV6, 31 + .table_init = ip6table_nat_table_init, 33 32 }; 34 33 35 34 static unsigned int ip6table_nat_do_chain(void *priv, ··· 100 97 }, 101 98 }; 102 99 103 - static int __net_init ip6table_nat_net_init(struct net *net) 100 + static int __net_init ip6table_nat_table_init(struct net *net) 104 101 { 105 102 struct ip6t_replace *repl; 106 103 int ret; 104 + 105 + if (net->ipv6.ip6table_nat) 106 + return 0; 107 107 108 108 repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); 109 109 if (repl == NULL) ··· 119 113 120 114 static void __net_exit ip6table_nat_net_exit(struct net *net) 121 115 { 116 + if (!net->ipv6.ip6table_nat) 117 + return; 122 118 ip6t_unregister_table(net, net->ipv6.ip6table_nat, nf_nat_ipv6_ops); 119 + net->ipv6.ip6table_nat = NULL; 123 120 } 124 121 125 122 static struct pernet_operations ip6table_nat_net_ops = { 126 - .init = ip6table_nat_net_init, 127 123 .exit = ip6table_nat_net_exit, 128 124 }; 129 125 130 126 static int __init ip6table_nat_init(void) 131 127 { 132 - int err; 128 + int ret = register_pernet_subsys(&ip6table_nat_net_ops); 133 129 134 - err = register_pernet_subsys(&ip6table_nat_net_ops); 135 - if (err < 0) 136 - goto err1; 130 + if (ret) 131 + return ret; 137 132 138 - err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 139 - if (err < 0) 140 - goto err2; 141 - return 0; 142 - 143 - err2: 144 - unregister_pernet_subsys(&ip6table_nat_net_ops); 145 - err1: 146 - return err; 133 + ret = ip6table_nat_table_init(&init_net); 134 + if (ret) 135 + unregister_pernet_subsys(&ip6table_nat_net_ops); 136 + return ret; 147 137 } 148 138 149 139 static void __exit ip6table_nat_exit(void) 150 140 { 151 - nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); 152 141 unregister_pernet_subsys(&ip6table_nat_net_ops); 153 142 } 154 143
+24 -15
net/ipv6/netfilter/ip6table_raw.c
··· 9 9 10 10 #define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) 11 11 12 + static int __net_init ip6table_raw_table_init(struct net *net); 13 + 12 14 static const struct xt_table packet_raw = { 13 15 .name = "raw", 14 16 .valid_hooks = RAW_VALID_HOOKS, 15 17 .me = THIS_MODULE, 16 18 .af = NFPROTO_IPV6, 17 19 .priority = NF_IP6_PRI_RAW, 20 + .table_init = ip6table_raw_table_init, 18 21 }; 19 22 20 23 /* The work comes in here from netfilter.c. */ ··· 30 27 31 28 static struct nf_hook_ops *rawtable_ops __read_mostly; 32 29 33 - static int __net_init ip6table_raw_net_init(struct net *net) 30 + static int __net_init ip6table_raw_table_init(struct net *net) 34 31 { 35 32 struct ip6t_replace *repl; 36 33 int ret; 34 + 35 + if (net->ipv6.ip6table_raw) 36 + return 0; 37 37 38 38 repl = ip6t_alloc_initial_table(&packet_raw); 39 39 if (repl == NULL) ··· 49 43 50 44 static void __net_exit ip6table_raw_net_exit(struct net *net) 51 45 { 46 + if (!net->ipv6.ip6table_raw) 47 + return; 52 48 ip6t_unregister_table(net, net->ipv6.ip6table_raw, rawtable_ops); 49 + net->ipv6.ip6table_raw = NULL; 53 50 } 54 51 55 52 static struct pernet_operations ip6table_raw_net_ops = { 56 - .init = ip6table_raw_net_init, 57 53 .exit = ip6table_raw_net_exit, 58 54 }; 59 55 ··· 63 55 { 64 56 int ret; 65 57 66 - ret = register_pernet_subsys(&ip6table_raw_net_ops); 67 - if (ret < 0) 68 - return ret; 69 - 70 58 /* Register hooks */ 71 - rawtable_ops = xt_hook_link(&packet_raw, ip6table_raw_hook); 72 - if (IS_ERR(rawtable_ops)) { 73 - ret = PTR_ERR(rawtable_ops); 74 - goto cleanup_table; 59 + rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook); 60 + if (IS_ERR(rawtable_ops)) 61 + return PTR_ERR(rawtable_ops); 62 + 63 + ret = register_pernet_subsys(&ip6table_raw_net_ops); 64 + if (ret < 0) { 65 + kfree(rawtable_ops); 66 + return ret; 75 67 } 76 68 77 - return ret; 78 - 79 - cleanup_table: 80 - unregister_pernet_subsys(&ip6table_raw_net_ops); 69 + ret = ip6table_raw_table_init(&init_net); 70 + if (ret) { 71 + unregister_pernet_subsys(&ip6table_raw_net_ops); 72 + kfree(rawtable_ops); 73 + } 81 74 return ret; 82 75 } 83 76 84 77 static void __exit ip6table_raw_fini(void) 85 78 { 86 - xt_hook_unlink(&packet_raw, rawtable_ops); 87 79 unregister_pernet_subsys(&ip6table_raw_net_ops); 80 + kfree(rawtable_ops); 88 81 } 89 82 90 83 module_init(ip6table_raw_init);
+23 -14
net/ipv6/netfilter/ip6table_security.c
··· 27 27 (1 << NF_INET_FORWARD) | \ 28 28 (1 << NF_INET_LOCAL_OUT) 29 29 30 + static int __net_init ip6table_security_table_init(struct net *net); 31 + 30 32 static const struct xt_table security_table = { 31 33 .name = "security", 32 34 .valid_hooks = SECURITY_VALID_HOOKS, 33 35 .me = THIS_MODULE, 34 36 .af = NFPROTO_IPV6, 35 37 .priority = NF_IP6_PRI_SECURITY, 38 + .table_init = ip6table_security_table_init, 36 39 }; 37 40 38 41 static unsigned int ··· 47 44 48 45 static struct nf_hook_ops *sectbl_ops __read_mostly; 49 46 50 - static int __net_init ip6table_security_net_init(struct net *net) 47 + static int __net_init ip6table_security_table_init(struct net *net) 51 48 { 52 49 struct ip6t_replace *repl; 53 50 int ret; 51 + 52 + if (net->ipv6.ip6table_security) 53 + return 0; 54 54 55 55 repl = ip6t_alloc_initial_table(&security_table); 56 56 if (repl == NULL) ··· 66 60 67 61 static void __net_exit ip6table_security_net_exit(struct net *net) 68 62 { 63 + if (!net->ipv6.ip6table_security) 64 + return; 69 65 ip6t_unregister_table(net, net->ipv6.ip6table_security, sectbl_ops); 66 + net->ipv6.ip6table_security = NULL; 70 67 } 71 68 72 69 static struct pernet_operations ip6table_security_net_ops = { 73 - .init = ip6table_security_net_init, 74 70 .exit = ip6table_security_net_exit, 75 71 }; 76 72 ··· 80 72 { 81 73 int ret; 82 74 83 - ret = register_pernet_subsys(&ip6table_security_net_ops); 84 - if (ret < 0) 85 - return ret; 75 + sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook); 76 + if (IS_ERR(sectbl_ops)) 77 + return PTR_ERR(sectbl_ops); 86 78 87 - sectbl_ops = xt_hook_link(&security_table, ip6table_security_hook); 88 - if (IS_ERR(sectbl_ops)) { 89 - ret = PTR_ERR(sectbl_ops); 90 - goto cleanup_table; 79 + ret = register_pernet_subsys(&ip6table_security_net_ops); 80 + if (ret < 0) { 81 + kfree(sectbl_ops); 82 + return ret; 91 83 } 92 84 93 - return ret; 94 - 95 - cleanup_table: 96 - unregister_pernet_subsys(&ip6table_security_net_ops); 85 + ret = ip6table_security_table_init(&init_net); 86 + if (ret) { 87 + unregister_pernet_subsys(&ip6table_security_net_ops); 88 + kfree(sectbl_ops); 89 + } 97 90 return ret; 98 91 } 99 92 100 93 static void __exit ip6table_security_fini(void) 101 94 { 102 - xt_hook_unlink(&security_table, sectbl_ops); 103 95 unregister_pernet_subsys(&ip6table_security_net_ops); 96 + kfree(sectbl_ops); 104 97 } 105 98 106 99 module_init(ip6table_security_init);
+40 -25
net/netfilter/x_tables.c
··· 694 694 struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 695 695 const char *name) 696 696 { 697 - struct xt_table *t; 697 + struct xt_table *t, *found = NULL; 698 698 699 699 mutex_lock(&xt[af].mutex); 700 700 list_for_each_entry(t, &net->xt.tables[af], list) 701 701 if (strcmp(t->name, name) == 0 && try_module_get(t->me)) 702 702 return t; 703 + 704 + if (net == &init_net) 705 + goto out; 706 + 707 + /* Table doesn't exist in this netns, re-try init */ 708 + list_for_each_entry(t, &init_net.xt.tables[af], list) { 709 + if (strcmp(t->name, name)) 710 + continue; 711 + if (!try_module_get(t->me)) 712 + return NULL; 713 + 714 + mutex_unlock(&xt[af].mutex); 715 + if (t->table_init(net) != 0) { 716 + module_put(t->me); 717 + return NULL; 718 + } 719 + 720 + found = t; 721 + 722 + mutex_lock(&xt[af].mutex); 723 + break; 724 + } 725 + 726 + if (!found) 727 + goto out; 728 + 729 + /* and once again: */ 730 + list_for_each_entry(t, &net->xt.tables[af], list) 731 + if (strcmp(t->name, name) == 0) 732 + return t; 733 + 734 + module_put(found->me); 735 + out: 703 736 mutex_unlock(&xt[af].mutex); 704 737 return NULL; 705 738 } ··· 1203 1170 #endif /* CONFIG_PROC_FS */ 1204 1171 1205 1172 /** 1206 - * xt_hook_link - set up hooks for a new table 1173 + * xt_hook_ops_alloc - set up hooks for a new table 1207 1174 * @table: table with metadata needed to set up hooks 1208 1175 * @fn: Hook function 1209 1176 * 1210 - * This function will take care of creating and registering the necessary 1211 - * Netfilter hooks for XT tables. 1177 + * This function will create the nf_hook_ops that the x_table needs 1178 + * to hand to xt_hook_link_net(). 1212 1179 */ 1213 - struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn) 1180 + struct nf_hook_ops * 1181 + xt_hook_ops_alloc(const struct xt_table *table, nf_hookfn *fn) 1214 1182 { 1215 1183 unsigned int hook_mask = table->valid_hooks; 1216 1184 uint8_t i, num_hooks = hweight32(hook_mask); 1217 1185 uint8_t hooknum; 1218 1186 struct nf_hook_ops *ops; 1219 - int ret; 1220 1187 1221 1188 ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL); 1222 1189 if (ops == NULL) ··· 1233 1200 ++i; 1234 1201 } 1235 1202 1236 - ret = nf_register_hooks(ops, num_hooks); 1237 - if (ret < 0) { 1238 - kfree(ops); 1239 - return ERR_PTR(ret); 1240 - } 1241 - 1242 1203 return ops; 1243 1204 } 1244 - EXPORT_SYMBOL_GPL(xt_hook_link); 1245 - 1246 - /** 1247 - * xt_hook_unlink - remove hooks for a table 1248 - * @ops: nf_hook_ops array as returned by nf_hook_link 1249 - * @hook_mask: the very same mask that was passed to nf_hook_link 1250 - */ 1251 - void xt_hook_unlink(const struct xt_table *table, struct nf_hook_ops *ops) 1252 - { 1253 - nf_unregister_hooks(ops, hweight32(table->valid_hooks)); 1254 - kfree(ops); 1255 - } 1256 - EXPORT_SYMBOL_GPL(xt_hook_unlink); 1205 + EXPORT_SYMBOL_GPL(xt_hook_ops_alloc); 1257 1206 1258 1207 int xt_proto_init(struct net *net, u_int8_t af) 1259 1208 {