Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipvlan: use pernet operations and restrict l3s hooks to master netns

commit 4fbae7d83c98c30efc ("ipvlan: Introduce l3s mode") added
registration of netfilter hooks via nf_register_hooks().

This API provides the illusion of 'global' netfilter hooks by placing the
hooks in all current and future network namespaces.

In case of ipvlan the hook appears to be only needed in the namespace
that contains the ipvlan master device (i.e., usually init_net), so
placing them in all namespaces is not needed.

This switches ipvlan driver to pernet operations, and then only registers
hooks in namespaces where a ipvlan master device is set to l3s mode.

Extra care has to be taken when the master device is moved to another
namespace, as we might have to 'move' the netfilter hooks too.

This is done by storing the namespace the ipvlan port was created in.
On REGISTER event, do (un)register operations in the old/new namespaces.

This will also allow removal of the nf_register_hooks() in a future patch.

Cc: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Florian Westphal and committed by
David S. Miller
3133822f 86a5df14

+70 -15
+2
drivers/net/ipvlan/ipvlan.h
··· 26 26 #include <linux/netfilter.h> 27 27 #include <net/ip.h> 28 28 #include <net/ip6_route.h> 29 + #include <net/netns/generic.h> 29 30 #include <net/rtnetlink.h> 30 31 #include <net/route.h> 31 32 #include <net/addrconf.h> ··· 92 91 93 92 struct ipvl_port { 94 93 struct net_device *dev; 94 + possible_net_t pnet; 95 95 struct hlist_head hlhead[IPVLAN_HASH_SIZE]; 96 96 struct list_head ipvlans; 97 97 u16 mode;
+68 -15
drivers/net/ipvlan/ipvlan_main.c
··· 9 9 10 10 #include "ipvlan.h" 11 11 12 - static u32 ipvl_nf_hook_refcnt = 0; 12 + static unsigned int ipvlan_netid __read_mostly; 13 + 14 + struct ipvlan_netns { 15 + unsigned int ipvl_nf_hook_refcnt; 16 + }; 13 17 14 18 static struct nf_hook_ops ipvl_nfops[] __read_mostly = { 15 19 { ··· 39 35 ipvlan->dev->mtu = dev->mtu; 40 36 } 41 37 42 - static int ipvlan_register_nf_hook(void) 38 + static int ipvlan_register_nf_hook(struct net *net) 43 39 { 40 + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); 44 41 int err = 0; 45 42 46 - if (!ipvl_nf_hook_refcnt) { 47 - err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); 43 + if (!vnet->ipvl_nf_hook_refcnt) { 44 + err = nf_register_net_hooks(net, ipvl_nfops, 45 + ARRAY_SIZE(ipvl_nfops)); 48 46 if (!err) 49 - ipvl_nf_hook_refcnt = 1; 47 + vnet->ipvl_nf_hook_refcnt = 1; 50 48 } else { 51 - ipvl_nf_hook_refcnt++; 49 + vnet->ipvl_nf_hook_refcnt++; 52 50 } 53 51 54 52 return err; 55 53 } 56 54 57 - static void ipvlan_unregister_nf_hook(void) 55 + static void ipvlan_unregister_nf_hook(struct net *net) 58 56 { 59 - WARN_ON(!ipvl_nf_hook_refcnt); 57 + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); 60 58 61 - ipvl_nf_hook_refcnt--; 62 - if (!ipvl_nf_hook_refcnt) 63 - _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); 59 + if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) 60 + return; 61 + 62 + vnet->ipvl_nf_hook_refcnt--; 63 + if (!vnet->ipvl_nf_hook_refcnt) 64 + nf_unregister_net_hooks(net, ipvl_nfops, 65 + ARRAY_SIZE(ipvl_nfops)); 64 66 } 65 67 66 68 static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) ··· 79 69 if (port->mode != nval) { 80 70 if (nval == IPVLAN_MODE_L3S) { 81 71 /* New mode is L3S */ 82 - err = ipvlan_register_nf_hook(); 72 + err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); 83 73 if (!err) { 84 74 mdev->l3mdev_ops = &ipvl_l3mdev_ops; 85 75 mdev->priv_flags |= IFF_L3MDEV_MASTER; ··· 88 78 } else if (port->mode == IPVLAN_MODE_L3S) { 89 79 /* Old mode was L3S */ 90 80 mdev->priv_flags &= ~IFF_L3MDEV_MASTER; 91 - ipvlan_unregister_nf_hook(); 81 + ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); 92 82 mdev->l3mdev_ops = NULL; 93 83 } 94 84 list_for_each_entry(ipvlan, &port->ipvlans, pnode) { ··· 121 111 if (!port) 122 112 return -ENOMEM; 123 113 114 + write_pnet(&port->pnet, dev_net(dev)); 124 115 port->dev = dev; 125 116 port->mode = IPVLAN_MODE_L3; 126 117 INIT_LIST_HEAD(&port->ipvlans); ··· 153 142 dev->priv_flags &= ~IFF_IPVLAN_MASTER; 154 143 if (port->mode == IPVLAN_MODE_L3S) { 155 144 dev->priv_flags &= ~IFF_L3MDEV_MASTER; 156 - ipvlan_unregister_nf_hook(); 145 + ipvlan_unregister_nf_hook(dev_net(dev)); 157 146 dev->l3mdev_ops = NULL; 158 147 } 159 148 netdev_rx_handler_unregister(dev); ··· 684 673 ipvlan->dev); 685 674 break; 686 675 676 + case NETDEV_REGISTER: { 677 + struct net *oldnet, *newnet = dev_net(dev); 678 + struct ipvlan_netns *old_vnet; 679 + 680 + oldnet = read_pnet(&port->pnet); 681 + if (net_eq(newnet, oldnet)) 682 + break; 683 + 684 + write_pnet(&port->pnet, newnet); 685 + 686 + old_vnet = net_generic(oldnet, ipvlan_netid); 687 + if (!old_vnet->ipvl_nf_hook_refcnt) 688 + break; 689 + 690 + ipvlan_register_nf_hook(newnet); 691 + ipvlan_unregister_nf_hook(oldnet); 692 + break; 693 + } 687 694 case NETDEV_UNREGISTER: 688 695 if (dev->reg_state != NETREG_UNREGISTERING) 689 696 break; ··· 883 854 .notifier_call = ipvlan_addr6_event, 884 855 }; 885 856 857 + static void ipvlan_ns_exit(struct net *net) 858 + { 859 + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); 860 + 861 + if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { 862 + vnet->ipvl_nf_hook_refcnt = 0; 863 + nf_unregister_net_hooks(net, ipvl_nfops, 864 + ARRAY_SIZE(ipvl_nfops)); 865 + } 866 + } 867 + 868 + static struct pernet_operations ipvlan_net_ops = { 869 + .id = &ipvlan_netid, 870 + .size = sizeof(struct ipvlan_netns), 871 + .exit = ipvlan_ns_exit, 872 + }; 873 + 886 874 static int __init ipvlan_init_module(void) 887 875 { 888 876 int err; ··· 909 863 register_inet6addr_notifier(&ipvlan_addr6_notifier_block); 910 864 register_inetaddr_notifier(&ipvlan_addr4_notifier_block); 911 865 912 - err = ipvlan_link_register(&ipvlan_link_ops); 866 + err = register_pernet_subsys(&ipvlan_net_ops); 913 867 if (err < 0) 914 868 goto error; 869 + 870 + err = ipvlan_link_register(&ipvlan_link_ops); 871 + if (err < 0) { 872 + unregister_pernet_subsys(&ipvlan_net_ops); 873 + goto error; 874 + } 915 875 916 876 return 0; 917 877 error: ··· 930 878 static void __exit ipvlan_cleanup_module(void) 931 879 { 932 880 rtnl_link_unregister(&ipvlan_link_ops); 881 + unregister_pernet_subsys(&ipvlan_net_ops); 933 882 unregister_netdevice_notifier(&ipvlan_notifier_block); 934 883 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block); 935 884 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);