Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

rtnetlink: use netnsid to query interface

Currently, when an application gets netnsid from the kernel (for example as
the result of RTM_GETLINK call on one end of the veth pair), it's not much
useful. There's no reliable way to get to the netns fd from the netnsid, nor
does any kernel API accept netnsid.

Extend the RTM_GETLINK call to also accept netnsid. It will operate on the
netns with the given netnsid in such case. Of course, the calling process
needs to have enough capabilities in the target name space; for now, require
CAP_NET_ADMIN. This can be relaxed in the future.

To signal to the calling process that the kernel understood the new
IFLA_IF_NETNSID attribute in the query, it will include it in the response.
This is needed to detect older kernels, as they will just ignore
IFLA_IF_NETNSID and query in the current name space.

This patch implemetns IFLA_IF_NETNSID only for get and dump. For set
operations, this can be extended later.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Jiri Benc and committed by
David S. Miller
79e1ad14 9354d452

+86 -18
+1
include/uapi/linux/if_link.h
··· 160 160 IFLA_XDP, 161 161 IFLA_EVENT, 162 162 IFLA_NEW_NETNSID, 163 + IFLA_IF_NETNSID, 163 164 __IFLA_MAX 164 165 }; 165 166
+85 -18
net/core/rtnetlink.c
··· 921 921 + nla_total_size(4) /* IFLA_EVENT */ 922 922 + nla_total_size(4) /* IFLA_NEW_NETNSID */ 923 923 + nla_total_size(1); /* IFLA_PROTO_DOWN */ 924 - 924 + + nla_total_size(4) /* IFLA_IF_NETNSID */ 925 + + 0; 925 926 } 926 927 927 928 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) ··· 1371 1370 } 1372 1371 1373 1372 static int rtnl_fill_link_netnsid(struct sk_buff *skb, 1374 - const struct net_device *dev) 1373 + const struct net_device *dev, 1374 + struct net *src_net) 1375 1375 { 1376 1376 if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { 1377 1377 struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); 1378 1378 1379 1379 if (!net_eq(dev_net(dev), link_net)) { 1380 - int id = peernet2id_alloc(dev_net(dev), link_net); 1380 + int id = peernet2id_alloc(src_net, link_net); 1381 1381 1382 1382 if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) 1383 1383 return -EMSGSIZE; ··· 1429 1427 return 0; 1430 1428 } 1431 1429 1432 - static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, 1430 + static int rtnl_fill_ifinfo(struct sk_buff *skb, 1431 + struct net_device *dev, struct net *src_net, 1433 1432 int type, u32 pid, u32 seq, u32 change, 1434 1433 unsigned int flags, u32 ext_filter_mask, 1435 - u32 event, int *new_nsid) 1434 + u32 event, int *new_nsid, int tgt_netnsid) 1436 1435 { 1437 1436 struct ifinfomsg *ifm; 1438 1437 struct nlmsghdr *nlh; ··· 1450 1447 ifm->ifi_index = dev->ifindex; 1451 1448 ifm->ifi_flags = dev_get_flags(dev); 1452 1449 ifm->ifi_change = change; 1450 + 1451 + if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid)) 1452 + goto nla_put_failure; 1453 1453 1454 1454 if (nla_put_string(skb, IFLA_IFNAME, dev->name) || 1455 1455 nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || ··· 1519 1513 goto nla_put_failure; 1520 1514 } 1521 1515 1522 - if (rtnl_fill_link_netnsid(skb, dev)) 1516 + if (rtnl_fill_link_netnsid(skb, dev, src_net)) 1523 1517 goto nla_put_failure; 1524 1518 1525 1519 if (new_nsid && ··· 1577 1571 [IFLA_XDP] = { .type = NLA_NESTED }, 1578 1572 [IFLA_EVENT] = { .type = NLA_U32 }, 1579 1573 [IFLA_GROUP] = { .type = NLA_U32 }, 1574 + [IFLA_IF_NETNSID] = { .type = NLA_S32 }, 1580 1575 }; 1581 1576 1582 1577 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { ··· 1681 1674 return false; 1682 1675 } 1683 1676 1677 + static struct net *get_target_net(struct sk_buff *skb, int netnsid) 1678 + { 1679 + struct net *net; 1680 + 1681 + net = get_net_ns_by_id(sock_net(skb->sk), netnsid); 1682 + if (!net) 1683 + return ERR_PTR(-EINVAL); 1684 + 1685 + /* For now, the caller is required to have CAP_NET_ADMIN in 1686 + * the user namespace owning the target net ns. 1687 + */ 1688 + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { 1689 + put_net(net); 1690 + return ERR_PTR(-EACCES); 1691 + } 1692 + return net; 1693 + } 1694 + 1684 1695 static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) 1685 1696 { 1686 1697 struct net *net = sock_net(skb->sk); 1698 + struct net *tgt_net = net; 1687 1699 int h, s_h; 1688 1700 int idx = 0, s_idx; 1689 1701 struct net_device *dev; ··· 1712 1686 const struct rtnl_link_ops *kind_ops = NULL; 1713 1687 unsigned int flags = NLM_F_MULTI; 1714 1688 int master_idx = 0; 1689 + int netnsid = -1; 1715 1690 int err; 1716 1691 int hdrlen; 1717 1692 ··· 1731 1704 1732 1705 if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, 1733 1706 ifla_policy, NULL) >= 0) { 1707 + if (tb[IFLA_IF_NETNSID]) { 1708 + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); 1709 + tgt_net = get_target_net(skb, netnsid); 1710 + if (IS_ERR(tgt_net)) { 1711 + tgt_net = net; 1712 + netnsid = -1; 1713 + } 1714 + } 1715 + 1734 1716 if (tb[IFLA_EXT_MASK]) 1735 1717 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 1736 1718 ··· 1755 1719 1756 1720 for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1757 1721 idx = 0; 1758 - head = &net->dev_index_head[h]; 1722 + head = &tgt_net->dev_index_head[h]; 1759 1723 hlist_for_each_entry(dev, head, index_hlist) { 1760 1724 if (link_dump_filtered(dev, master_idx, kind_ops)) 1761 1725 goto cont; 1762 1726 if (idx < s_idx) 1763 1727 goto cont; 1764 - err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, 1728 + err = rtnl_fill_ifinfo(skb, dev, net, 1729 + RTM_NEWLINK, 1765 1730 NETLINK_CB(cb->skb).portid, 1766 1731 cb->nlh->nlmsg_seq, 0, 1767 1732 flags, 1768 - ext_filter_mask, 0, NULL); 1733 + ext_filter_mask, 0, NULL, 1734 + netnsid); 1769 1735 1770 1736 if (err < 0) { 1771 1737 if (likely(skb->len)) ··· 1786 1748 cb->args[0] = h; 1787 1749 cb->seq = net->dev_base_seq; 1788 1750 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 1751 + if (netnsid >= 0) 1752 + put_net(tgt_net); 1789 1753 1790 1754 return err; 1791 1755 } ··· 2400 2360 if (err < 0) 2401 2361 goto errout; 2402 2362 2363 + if (tb[IFLA_IF_NETNSID]) 2364 + return -EOPNOTSUPP; 2365 + 2403 2366 if (tb[IFLA_IFNAME]) 2404 2367 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2405 2368 else ··· 2496 2453 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); 2497 2454 if (err < 0) 2498 2455 return err; 2456 + 2457 + if (tb[IFLA_IF_NETNSID]) 2458 + return -EOPNOTSUPP; 2499 2459 2500 2460 if (tb[IFLA_IFNAME]) 2501 2461 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); ··· 2630 2584 err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); 2631 2585 if (err < 0) 2632 2586 return err; 2587 + 2588 + if (tb[IFLA_IF_NETNSID]) 2589 + return -EOPNOTSUPP; 2633 2590 2634 2591 if (tb[IFLA_IFNAME]) 2635 2592 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); ··· 2867 2818 struct netlink_ext_ack *extack) 2868 2819 { 2869 2820 struct net *net = sock_net(skb->sk); 2821 + struct net *tgt_net = net; 2870 2822 struct ifinfomsg *ifm; 2871 2823 char ifname[IFNAMSIZ]; 2872 2824 struct nlattr *tb[IFLA_MAX+1]; 2873 2825 struct net_device *dev = NULL; 2874 2826 struct sk_buff *nskb; 2827 + int netnsid = -1; 2875 2828 int err; 2876 2829 u32 ext_filter_mask = 0; 2877 2830 ··· 2881 2830 if (err < 0) 2882 2831 return err; 2883 2832 2833 + if (tb[IFLA_IF_NETNSID]) { 2834 + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); 2835 + tgt_net = get_target_net(skb, netnsid); 2836 + if (IS_ERR(tgt_net)) 2837 + return PTR_ERR(tgt_net); 2838 + } 2839 + 2884 2840 if (tb[IFLA_IFNAME]) 2885 2841 nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 2886 2842 2887 2843 if (tb[IFLA_EXT_MASK]) 2888 2844 ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); 2889 2845 2846 + err = -EINVAL; 2890 2847 ifm = nlmsg_data(nlh); 2891 2848 if (ifm->ifi_index > 0) 2892 - dev = __dev_get_by_index(net, ifm->ifi_index); 2849 + dev = __dev_get_by_index(tgt_net, ifm->ifi_index); 2893 2850 else if (tb[IFLA_IFNAME]) 2894 - dev = __dev_get_by_name(net, ifname); 2851 + dev = __dev_get_by_name(tgt_net, ifname); 2895 2852 else 2896 - return -EINVAL; 2853 + goto out; 2897 2854 2855 + err = -ENODEV; 2898 2856 if (dev == NULL) 2899 - return -ENODEV; 2857 + goto out; 2900 2858 2859 + err = -ENOBUFS; 2901 2860 nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); 2902 2861 if (nskb == NULL) 2903 - return -ENOBUFS; 2862 + goto out; 2904 2863 2905 - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, 2906 - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL); 2864 + err = rtnl_fill_ifinfo(nskb, dev, net, 2865 + RTM_NEWLINK, NETLINK_CB(skb).portid, 2866 + nlh->nlmsg_seq, 0, 0, ext_filter_mask, 2867 + 0, NULL, netnsid); 2907 2868 if (err < 0) { 2908 2869 /* -EMSGSIZE implies BUG in if_nlmsg_size */ 2909 2870 WARN_ON(err == -EMSGSIZE); 2910 2871 kfree_skb(nskb); 2911 2872 } else 2912 2873 err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); 2874 + out: 2875 + if (netnsid >= 0) 2876 + put_net(tgt_net); 2913 2877 2914 2878 return err; 2915 2879 } ··· 3014 2948 if (skb == NULL) 3015 2949 goto errout; 3016 2950 3017 - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event, 3018 - new_nsid); 2951 + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), 2952 + type, 0, 0, change, 0, 0, event, 2953 + new_nsid, -1); 3019 2954 if (err < 0) { 3020 2955 /* -EMSGSIZE implies BUG in if_nlmsg_size() */ 3021 2956 WARN_ON(err == -EMSGSIZE);