Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[NETNS][IPV6] route6 - make route6 per namespace

This patch makes the routing engine use the network namespaces to
access routing informations: Add a network namespace parameter to
ipv6_route_ioctl and propagate the network namespace value to all the
routing code that have not yet been changed.

Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Daniel Lezcano and committed by
David S. Miller
5578689a 7b4da532

+54 -51
+3 -1
include/net/ip6_route.h
··· 49 49 extern int ip6_route_init(void); 50 50 extern void ip6_route_cleanup(void); 51 51 52 - extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); 52 + extern int ipv6_route_ioctl(struct net *net, 53 + unsigned int cmd, 54 + void __user *arg); 53 55 54 56 extern int ip6_route_add(struct fib6_config *cfg); 55 57 extern int ip6_ins_rt(struct rt6_info *);
+2 -1
net/ipv6/af_inet6.c
··· 440 440 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 441 441 { 442 442 struct sock *sk = sock->sk; 443 + struct net *net = sk->sk_net; 443 444 444 445 switch(cmd) 445 446 { ··· 453 452 case SIOCADDRT: 454 453 case SIOCDELRT: 455 454 456 - return(ipv6_route_ioctl(cmd,(void __user *)arg)); 455 + return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); 457 456 458 457 case SIOCSIFADDR: 459 458 return addrconf_add_ifaddr((void __user *) arg);
+49 -49
net/ipv6/route.c
··· 609 609 int ip6_ins_rt(struct rt6_info *rt) 610 610 { 611 611 struct nl_info info = { 612 - .nl_net = &init_net, 612 + .nl_net = rt->rt6i_dev->nd_net, 613 613 }; 614 614 return __ip6_ins_rt(rt, &info); 615 615 } ··· 746 746 void ip6_route_input(struct sk_buff *skb) 747 747 { 748 748 struct ipv6hdr *iph = ipv6_hdr(skb); 749 + struct net *net = skb->dev->nd_net; 749 750 int flags = RT6_LOOKUP_F_HAS_SADDR; 750 751 struct flowi fl = { 751 752 .iif = skb->dev->ifindex, ··· 764 763 if (rt6_need_strict(&iph->daddr)) 765 764 flags |= RT6_LOOKUP_F_IFACE; 766 765 767 - skb->dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_input); 766 + skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input); 768 767 } 769 768 770 769 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, ··· 892 891 893 892 static int ipv6_get_mtu(struct net_device *dev); 894 893 895 - static inline unsigned int ipv6_advmss(unsigned int mtu) 894 + static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) 896 895 { 897 896 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 898 897 899 - if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss) 900 - mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss; 898 + if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) 899 + mtu = net->ipv6.sysctl.ip6_rt_min_advmss; 901 900 902 901 /* 903 902 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and ··· 919 918 { 920 919 struct rt6_info *rt; 921 920 struct inet6_dev *idev = in6_dev_get(dev); 921 + struct net *net = dev->nd_net; 922 922 923 923 if (unlikely(idev == NULL)) 924 924 return NULL; ··· 942 940 atomic_set(&rt->u.dst.__refcnt, 1); 943 941 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255; 944 942 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 945 - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 943 + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 946 944 rt->u.dst.output = ip6_output; 947 945 948 946 #if 0 /* there's no chance to use these for ndisc */ ··· 958 956 icmp6_dst_gc_list = &rt->u.dst; 959 957 spin_unlock_bh(&icmp6_dst_lock); 960 958 961 - fib6_force_start_gc(dev->nd_net); 959 + fib6_force_start_gc(net); 962 960 963 961 out: 964 962 return &rt->u.dst; ··· 1051 1049 int ip6_route_add(struct fib6_config *cfg) 1052 1050 { 1053 1051 int err; 1052 + struct net *net = cfg->fc_nlinfo.nl_net; 1054 1053 struct rt6_info *rt = NULL; 1055 1054 struct net_device *dev = NULL; 1056 1055 struct inet6_dev *idev = NULL; ··· 1066 1063 #endif 1067 1064 if (cfg->fc_ifindex) { 1068 1065 err = -ENODEV; 1069 - dev = dev_get_by_index(&init_net, cfg->fc_ifindex); 1066 + dev = dev_get_by_index(net, cfg->fc_ifindex); 1070 1067 if (!dev) 1071 1068 goto out; 1072 1069 idev = in6_dev_get(dev); ··· 1077 1074 if (cfg->fc_metric == 0) 1078 1075 cfg->fc_metric = IP6_RT_PRIO_USER; 1079 1076 1080 - table = fib6_new_table(&init_net, cfg->fc_table); 1077 + table = fib6_new_table(net, cfg->fc_table); 1081 1078 if (table == NULL) { 1082 1079 err = -ENOBUFS; 1083 1080 goto out; ··· 1124 1121 if ((cfg->fc_flags & RTF_REJECT) || 1125 1122 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1126 1123 /* hold loopback dev/idev if we haven't done so. */ 1127 - if (dev != init_net.loopback_dev) { 1124 + if (dev != net->loopback_dev) { 1128 1125 if (dev) { 1129 1126 dev_put(dev); 1130 1127 in6_dev_put(idev); 1131 1128 } 1132 - dev = init_net.loopback_dev; 1129 + dev = net->loopback_dev; 1133 1130 dev_hold(dev); 1134 1131 idev = in6_dev_get(dev); 1135 1132 if (!idev) { ··· 1166 1163 if (!(gwa_type&IPV6_ADDR_UNICAST)) 1167 1164 goto out; 1168 1165 1169 - grt = rt6_lookup(&init_net, gw_addr, NULL, cfg->fc_ifindex, 1); 1166 + grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1); 1170 1167 1171 1168 err = -EHOSTUNREACH; 1172 1169 if (grt == NULL) ··· 1233 1230 if (!rt->u.dst.metrics[RTAX_MTU-1]) 1234 1231 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev); 1235 1232 if (!rt->u.dst.metrics[RTAX_ADVMSS-1]) 1236 - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1233 + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1237 1234 rt->u.dst.dev = dev; 1238 1235 rt->rt6i_idev = idev; 1239 1236 rt->rt6i_table = table; ··· 1274 1271 int ip6_del_rt(struct rt6_info *rt) 1275 1272 { 1276 1273 struct nl_info info = { 1277 - .nl_net = &init_net, 1274 + .nl_net = rt->rt6i_dev->nd_net, 1278 1275 }; 1279 1276 return __ip6_del_rt(rt, &info); 1280 1277 } ··· 1286 1283 struct rt6_info *rt; 1287 1284 int err = -ESRCH; 1288 1285 1289 - table = fib6_get_table(&init_net, cfg->fc_table); 1286 + table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); 1290 1287 if (table == NULL) 1291 1288 return err; 1292 1289 ··· 1385 1382 struct net_device *dev) 1386 1383 { 1387 1384 int flags = RT6_LOOKUP_F_HAS_SADDR; 1385 + struct net *net = dev->nd_net; 1388 1386 struct ip6rd_flowi rdfl = { 1389 1387 .fl = { 1390 1388 .oif = dev->ifindex, ··· 1402 1398 if (rt6_need_strict(dest)) 1403 1399 flags |= RT6_LOOKUP_F_IFACE; 1404 1400 1405 - return (struct rt6_info *)fib6_rule_lookup(&init_net, 1406 - (struct flowi *)&rdfl, 1401 + return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl, 1407 1402 flags, __ip6_route_redirect); 1408 1403 } 1409 1404 ··· 1460 1457 nrt->rt6i_nexthop = neigh_clone(neigh); 1461 1458 /* Reset pmtu, it may be better */ 1462 1459 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); 1463 - nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); 1460 + nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net, 1461 + dst_mtu(&nrt->u.dst)); 1464 1462 1465 1463 if (ip6_ins_rt(nrt)) 1466 1464 goto out; ··· 1489 1485 struct net_device *dev, u32 pmtu) 1490 1486 { 1491 1487 struct rt6_info *rt, *nrt; 1488 + struct net *net = dev->nd_net; 1492 1489 int allfrag = 0; 1493 1490 1494 - rt = rt6_lookup(dev->nd_net, daddr, saddr, dev->ifindex, 0); 1491 + rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0); 1495 1492 if (rt == NULL) 1496 1493 return; 1497 1494 ··· 1525 1520 rt->u.dst.metrics[RTAX_MTU-1] = pmtu; 1526 1521 if (allfrag) 1527 1522 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG; 1528 - dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); 1523 + dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1529 1524 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES; 1530 1525 goto out; 1531 1526 } ··· 1551 1546 * which is 10 mins. After 10 mins the decreased pmtu is expired 1552 1547 * and detecting PMTU increase will be automatically happened. 1553 1548 */ 1554 - dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires); 1549 + dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires); 1555 1550 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; 1556 1551 1557 1552 ip6_ins_rt(nrt); ··· 1664 1659 struct rt6_info *rt; 1665 1660 struct fib6_table *table; 1666 1661 1667 - table = fib6_get_table(&init_net, RT6_TABLE_DFLT); 1662 + table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT); 1668 1663 if (table == NULL) 1669 1664 return NULL; 1670 1665 ··· 1693 1688 .fc_ifindex = dev->ifindex, 1694 1689 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | 1695 1690 RTF_UP | RTF_EXPIRES | RTF_PREF(pref), 1691 + .fc_nlinfo.pid = 0, 1692 + .fc_nlinfo.nlh = NULL, 1693 + .fc_nlinfo.nl_net = dev->nd_net, 1696 1694 }; 1697 1695 1698 1696 ipv6_addr_copy(&cfg.fc_gateway, gwaddr); ··· 1728 1720 read_unlock_bh(&table->tb6_lock); 1729 1721 } 1730 1722 1731 - static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, 1723 + static void rtmsg_to_fib6_config(struct net *net, 1724 + struct in6_rtmsg *rtmsg, 1732 1725 struct fib6_config *cfg) 1733 1726 { 1734 1727 memset(cfg, 0, sizeof(*cfg)); ··· 1742 1733 cfg->fc_src_len = rtmsg->rtmsg_src_len; 1743 1734 cfg->fc_flags = rtmsg->rtmsg_flags; 1744 1735 1745 - cfg->fc_nlinfo.nl_net = &init_net; 1736 + cfg->fc_nlinfo.nl_net = net; 1746 1737 1747 1738 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); 1748 1739 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); 1749 1740 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); 1750 1741 } 1751 1742 1752 - int ipv6_route_ioctl(unsigned int cmd, void __user *arg) 1743 + int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) 1753 1744 { 1754 1745 struct fib6_config cfg; 1755 1746 struct in6_rtmsg rtmsg; ··· 1765 1756 if (err) 1766 1757 return -EFAULT; 1767 1758 1768 - rtmsg_to_fib6_config(&rtmsg, &cfg); 1759 + rtmsg_to_fib6_config(net, &rtmsg, &cfg); 1769 1760 1770 1761 rtnl_lock(); 1771 1762 switch (cmd) { ··· 1844 1835 const struct in6_addr *addr, 1845 1836 int anycast) 1846 1837 { 1838 + struct net *net = idev->dev->nd_net; 1847 1839 struct rt6_info *rt = ip6_dst_alloc(); 1848 1840 1849 1841 if (rt == NULL) 1850 1842 return ERR_PTR(-ENOMEM); 1851 1843 1852 - dev_hold(init_net.loopback_dev); 1844 + dev_hold(net->loopback_dev); 1853 1845 in6_dev_hold(idev); 1854 1846 1855 1847 rt->u.dst.flags = DST_HOST; 1856 1848 rt->u.dst.input = ip6_input; 1857 1849 rt->u.dst.output = ip6_output; 1858 - rt->rt6i_dev = init_net.loopback_dev; 1850 + rt->rt6i_dev = net->loopback_dev; 1859 1851 rt->rt6i_idev = idev; 1860 1852 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev); 1861 - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); 1853 + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst)); 1862 1854 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1; 1863 1855 rt->u.dst.obsolete = -1; 1864 1856 ··· 1876 1866 1877 1867 ipv6_addr_copy(&rt->rt6i_dst.addr, addr); 1878 1868 rt->rt6i_dst.plen = 128; 1879 - rt->rt6i_table = fib6_get_table(&init_net, RT6_TABLE_LOCAL); 1869 + rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); 1880 1870 1881 1871 atomic_set(&rt->u.dst.__refcnt, 1); 1882 1872 ··· 1908 1898 { 1909 1899 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 1910 1900 struct inet6_dev *idev; 1901 + struct net *net = arg->dev->nd_net; 1911 1902 1912 1903 /* In IPv6 pmtu discovery is not optional, 1913 1904 so that RTAX_MTU lock cannot disable it. ··· 1940 1929 (dst_mtu(&rt->u.dst) < arg->mtu && 1941 1930 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) { 1942 1931 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu; 1943 - rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu); 1932 + rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu); 1944 1933 } 1945 1934 return 0; 1946 1935 } ··· 2035 2024 2036 2025 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2037 2026 { 2038 - struct net *net = skb->sk->sk_net; 2039 2027 struct fib6_config cfg; 2040 2028 int err; 2041 - 2042 - if (net != &init_net) 2043 - return -EINVAL; 2044 2029 2045 2030 err = rtm_to_fib6_config(skb, nlh, &cfg); 2046 2031 if (err < 0) ··· 2047 2040 2048 2041 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) 2049 2042 { 2050 - struct net *net = skb->sk->sk_net; 2051 2043 struct fib6_config cfg; 2052 2044 int err; 2053 - 2054 - if (net != &init_net) 2055 - return -EINVAL; 2056 2045 2057 2046 err = rtm_to_fib6_config(skb, nlh, &cfg); 2058 2047 if (err < 0) ··· 2193 2190 struct flowi fl; 2194 2191 int err, iif = 0; 2195 2192 2196 - if (net != &init_net) 2197 - return -EINVAL; 2198 - 2199 2193 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); 2200 2194 if (err < 0) 2201 2195 goto errout; ··· 2222 2222 2223 2223 if (iif) { 2224 2224 struct net_device *dev; 2225 - dev = __dev_get_by_index(&init_net, iif); 2225 + dev = __dev_get_by_index(net, iif); 2226 2226 if (!dev) { 2227 2227 err = -ENODEV; 2228 2228 goto errout; ··· 2252 2252 goto errout; 2253 2253 } 2254 2254 2255 - err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); 2255 + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); 2256 2256 errout: 2257 2257 return err; 2258 2258 } ··· 2260 2260 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) 2261 2261 { 2262 2262 struct sk_buff *skb; 2263 + struct net *net = info->nl_net; 2263 2264 u32 seq; 2264 2265 int err; 2265 2266 ··· 2279 2278 kfree_skb(skb); 2280 2279 goto errout; 2281 2280 } 2282 - err = rtnl_notify(skb, &init_net, info->pid, 2283 - RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); 2281 + err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2282 + info->nlh, gfp_any()); 2284 2283 errout: 2285 2284 if (err < 0) 2286 - rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err); 2285 + rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2287 2286 } 2288 2287 2289 2288 /* ··· 2545 2544 proc_net_remove(net, "ipv6_route"); 2546 2545 proc_net_remove(net, "rt6_stats"); 2547 2546 #endif 2547 + rt6_ifdown(net, NULL); 2548 2548 } 2549 2549 2550 2550 static struct pernet_operations ip6_route_net_ops = { ··· 2594 2592 xfrm6_init: 2595 2593 xfrm6_fini(); 2596 2594 out_fib6_init: 2597 - rt6_ifdown(&init_net, NULL); 2598 2595 fib6_gc_cleanup(); 2599 2596 out_kmem_cache: 2600 2597 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); ··· 2605 2604 unregister_pernet_subsys(&ip6_route_net_ops); 2606 2605 fib6_rules_cleanup(); 2607 2606 xfrm6_fini(); 2608 - rt6_ifdown(&init_net, NULL); 2609 2607 fib6_gc_cleanup(); 2610 2608 kmem_cache_destroy(ip6_dst_ops.kmem_cachep); 2611 2609 }