Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'inet_dump_ifaddr-no-rtnl'

Eric Dumazet says:

====================
inet: no longer use RTNL to protect inet_dump_ifaddr()

This series convert inet so that a dump of addresses (ip -4 addr)
no longer requires RTNL.
====================

Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

+79 -92
+3 -2
net/core/dev.c
··· 180 180 181 181 static inline void dev_base_seq_inc(struct net *net) 182 182 { 183 - while (++net->dev_base_seq == 0) 184 - ; 183 + unsigned int val = net->dev_base_seq + 1; 184 + 185 + WRITE_ONCE(net->dev_base_seq, val ?: 1); 185 186 } 186 187 187 188 static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
+76 -90
net/ipv4/devinet.c
··· 713 713 714 714 rcu_read_lock(); 715 715 hlist_for_each_entry_rcu(ifa, &inet_addr_lst[i], hash) { 716 - unsigned long age; 716 + unsigned long age, tstamp; 717 + u32 preferred_lft; 718 + u32 valid_lft; 719 + u32 flags; 717 720 718 - if (ifa->ifa_flags & IFA_F_PERMANENT) 721 + flags = READ_ONCE(ifa->ifa_flags); 722 + if (flags & IFA_F_PERMANENT) 719 723 continue; 720 724 725 + preferred_lft = READ_ONCE(ifa->ifa_preferred_lft); 726 + valid_lft = READ_ONCE(ifa->ifa_valid_lft); 727 + tstamp = READ_ONCE(ifa->ifa_tstamp); 721 728 /* We try to batch several events at once. */ 722 - age = (now - ifa->ifa_tstamp + 729 + age = (now - tstamp + 723 730 ADDRCONF_TIMER_FUZZ_MINUS) / HZ; 724 731 725 - if (ifa->ifa_valid_lft != INFINITY_LIFE_TIME && 726 - age >= ifa->ifa_valid_lft) { 732 + if (valid_lft != INFINITY_LIFE_TIME && 733 + age >= valid_lft) { 727 734 change_needed = true; 728 - } else if (ifa->ifa_preferred_lft == 735 + } else if (preferred_lft == 729 736 INFINITY_LIFE_TIME) { 730 737 continue; 731 - } else if (age >= ifa->ifa_preferred_lft) { 732 - if (time_before(ifa->ifa_tstamp + 733 - ifa->ifa_valid_lft * HZ, next)) 734 - next = ifa->ifa_tstamp + 735 - ifa->ifa_valid_lft * HZ; 738 + } else if (age >= preferred_lft) { 739 + if (time_before(tstamp + valid_lft * HZ, next)) 740 + next = tstamp + valid_lft * HZ; 736 741 737 - if (!(ifa->ifa_flags & IFA_F_DEPRECATED)) 742 + if (!(flags & IFA_F_DEPRECATED)) 738 743 change_needed = true; 739 - } else if (time_before(ifa->ifa_tstamp + 740 - ifa->ifa_preferred_lft * HZ, 744 + } else if (time_before(tstamp + preferred_lft * HZ, 741 745 next)) { 742 - next = ifa->ifa_tstamp + 743 - ifa->ifa_preferred_lft * HZ; 746 + next = tstamp + preferred_lft * HZ; 744 747 } 745 748 } 746 749 rcu_read_unlock(); ··· 807 804 __u32 prefered_lft) 808 805 { 809 806 unsigned long timeout; 807 + u32 flags; 810 808 811 - ifa->ifa_flags &= ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); 809 + flags = ifa->ifa_flags & ~(IFA_F_PERMANENT | IFA_F_DEPRECATED); 812 810 813 811 timeout = addrconf_timeout_fixup(valid_lft, HZ); 814 812 if (addrconf_finite_timeout(timeout)) 815 - ifa->ifa_valid_lft = timeout; 813 + WRITE_ONCE(ifa->ifa_valid_lft, timeout); 816 814 else 817 - ifa->ifa_flags |= IFA_F_PERMANENT; 815 + flags |= IFA_F_PERMANENT; 818 816 819 817 timeout = addrconf_timeout_fixup(prefered_lft, HZ); 820 818 if (addrconf_finite_timeout(timeout)) { 821 819 if (timeout == 0) 822 - ifa->ifa_flags |= IFA_F_DEPRECATED; 823 - ifa->ifa_preferred_lft = timeout; 820 + flags |= IFA_F_DEPRECATED; 821 + WRITE_ONCE(ifa->ifa_preferred_lft, timeout); 824 822 } 825 - ifa->ifa_tstamp = jiffies; 823 + WRITE_ONCE(ifa->ifa_flags, flags); 824 + WRITE_ONCE(ifa->ifa_tstamp, jiffies); 826 825 if (!ifa->ifa_cstamp) 827 - ifa->ifa_cstamp = ifa->ifa_tstamp; 826 + WRITE_ONCE(ifa->ifa_cstamp, ifa->ifa_tstamp); 828 827 } 829 828 830 829 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ··· 1317 1312 const struct in_ifaddr *ifa; 1318 1313 1319 1314 in_dev_for_each_ifa_rcu(ifa, in_dev) { 1320 - if (ifa->ifa_flags & IFA_F_SECONDARY) 1315 + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) 1321 1316 continue; 1322 1317 if (ifa->ifa_scope != RT_SCOPE_LINK && 1323 1318 ifa->ifa_scope <= scope) ··· 1345 1340 localnet_scope = RT_SCOPE_LINK; 1346 1341 1347 1342 in_dev_for_each_ifa_rcu(ifa, in_dev) { 1348 - if (ifa->ifa_flags & IFA_F_SECONDARY) 1343 + if (READ_ONCE(ifa->ifa_flags) & IFA_F_SECONDARY) 1349 1344 continue; 1350 1345 if (min(ifa->ifa_scope, localnet_scope) > scope) 1351 1346 continue; ··· 1676 1671 return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); 1677 1672 } 1678 1673 1679 - static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, 1674 + static int inet_fill_ifaddr(struct sk_buff *skb, const struct in_ifaddr *ifa, 1680 1675 struct inet_fill_args *args) 1681 1676 { 1682 1677 struct ifaddrmsg *ifm; 1683 1678 struct nlmsghdr *nlh; 1679 + unsigned long tstamp; 1684 1680 u32 preferred, valid; 1685 1681 1686 1682 nlh = nlmsg_put(skb, args->portid, args->seq, args->event, sizeof(*ifm), ··· 1692 1686 ifm = nlmsg_data(nlh); 1693 1687 ifm->ifa_family = AF_INET; 1694 1688 ifm->ifa_prefixlen = ifa->ifa_prefixlen; 1695 - ifm->ifa_flags = ifa->ifa_flags; 1689 + ifm->ifa_flags = READ_ONCE(ifa->ifa_flags); 1696 1690 ifm->ifa_scope = ifa->ifa_scope; 1697 1691 ifm->ifa_index = ifa->ifa_dev->dev->ifindex; 1698 1692 ··· 1700 1694 nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) 1701 1695 goto nla_put_failure; 1702 1696 1697 + tstamp = READ_ONCE(ifa->ifa_tstamp); 1703 1698 if (!(ifm->ifa_flags & IFA_F_PERMANENT)) { 1704 - preferred = ifa->ifa_preferred_lft; 1705 - valid = ifa->ifa_valid_lft; 1699 + preferred = READ_ONCE(ifa->ifa_preferred_lft); 1700 + valid = READ_ONCE(ifa->ifa_valid_lft); 1706 1701 if (preferred != INFINITY_LIFE_TIME) { 1707 - long tval = (jiffies - ifa->ifa_tstamp) / HZ; 1702 + long tval = (jiffies - tstamp) / HZ; 1708 1703 1709 1704 if (preferred > tval) 1710 1705 preferred -= tval; ··· 1732 1725 nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) || 1733 1726 (ifa->ifa_proto && 1734 1727 nla_put_u8(skb, IFA_PROTO, ifa->ifa_proto)) || 1735 - nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) || 1728 + nla_put_u32(skb, IFA_FLAGS, ifm->ifa_flags) || 1736 1729 (ifa->ifa_rt_priority && 1737 1730 nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) || 1738 - put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp, 1731 + put_cacheinfo(skb, READ_ONCE(ifa->ifa_cstamp), tstamp, 1739 1732 preferred, valid)) 1740 1733 goto nla_put_failure; 1741 1734 ··· 1805 1798 } 1806 1799 1807 1800 static int in_dev_dump_addr(struct in_device *in_dev, struct sk_buff *skb, 1808 - struct netlink_callback *cb, int s_ip_idx, 1801 + struct netlink_callback *cb, int *s_ip_idx, 1809 1802 struct inet_fill_args *fillargs) 1810 1803 { 1811 1804 struct in_ifaddr *ifa; 1812 1805 int ip_idx = 0; 1813 1806 int err; 1814 1807 1815 - in_dev_for_each_ifa_rtnl(ifa, in_dev) { 1816 - if (ip_idx < s_ip_idx) { 1808 + in_dev_for_each_ifa_rcu(ifa, in_dev) { 1809 + if (ip_idx < *s_ip_idx) { 1817 1810 ip_idx++; 1818 1811 continue; 1819 1812 } ··· 1825 1818 ip_idx++; 1826 1819 } 1827 1820 err = 0; 1828 - 1821 + ip_idx = 0; 1829 1822 done: 1830 - cb->args[2] = ip_idx; 1823 + *s_ip_idx = ip_idx; 1831 1824 1832 1825 return err; 1833 1826 } ··· 1837 1830 static u32 inet_base_seq(const struct net *net) 1838 1831 { 1839 1832 u32 res = atomic_read(&net->ipv4.dev_addr_genid) + 1840 - net->dev_base_seq; 1833 + READ_ONCE(net->dev_base_seq); 1841 1834 1842 1835 /* Must not return 0 (see nl_dump_check_consistent()). 1843 1836 * Chose a value far away from 0. ··· 1859 1852 }; 1860 1853 struct net *net = sock_net(skb->sk); 1861 1854 struct net *tgt_net = net; 1862 - int h, s_h; 1863 - int idx, s_idx; 1864 - int s_ip_idx; 1865 - struct net_device *dev; 1855 + struct { 1856 + unsigned long ifindex; 1857 + int ip_idx; 1858 + } *ctx = (void *)cb->ctx; 1866 1859 struct in_device *in_dev; 1867 - struct hlist_head *head; 1860 + struct net_device *dev; 1868 1861 int err = 0; 1869 1862 1870 - s_h = cb->args[0]; 1871 - s_idx = idx = cb->args[1]; 1872 - s_ip_idx = cb->args[2]; 1873 - 1863 + rcu_read_lock(); 1874 1864 if (cb->strict_check) { 1875 1865 err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net, 1876 1866 skb->sk, cb); 1877 1867 if (err < 0) 1878 - goto put_tgt_net; 1868 + goto done; 1879 1869 1880 - err = 0; 1881 1870 if (fillargs.ifindex) { 1882 - dev = __dev_get_by_index(tgt_net, fillargs.ifindex); 1883 - if (!dev) { 1884 - err = -ENODEV; 1885 - goto put_tgt_net; 1886 - } 1887 - 1888 - in_dev = __in_dev_get_rtnl(dev); 1889 - if (in_dev) { 1890 - err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx, 1891 - &fillargs); 1892 - } 1893 - goto put_tgt_net; 1894 - } 1895 - } 1896 - 1897 - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { 1898 - idx = 0; 1899 - head = &tgt_net->dev_index_head[h]; 1900 - rcu_read_lock(); 1901 - cb->seq = inet_base_seq(tgt_net); 1902 - hlist_for_each_entry_rcu(dev, head, index_hlist) { 1903 - if (idx < s_idx) 1904 - goto cont; 1905 - if (h > s_h || idx > s_idx) 1906 - s_ip_idx = 0; 1871 + err = -ENODEV; 1872 + dev = dev_get_by_index_rcu(tgt_net, fillargs.ifindex); 1873 + if (!dev) 1874 + goto done; 1907 1875 in_dev = __in_dev_get_rcu(dev); 1908 1876 if (!in_dev) 1909 - goto cont; 1910 - 1911 - err = in_dev_dump_addr(in_dev, skb, cb, s_ip_idx, 1912 - &fillargs); 1913 - if (err < 0) { 1914 - rcu_read_unlock(); 1915 1877 goto done; 1916 - } 1917 - cont: 1918 - idx++; 1878 + err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, 1879 + &fillargs); 1880 + goto done; 1919 1881 } 1920 - rcu_read_unlock(); 1921 1882 } 1922 1883 1884 + cb->seq = inet_base_seq(tgt_net); 1885 + 1886 + for_each_netdev_dump(net, dev, ctx->ifindex) { 1887 + in_dev = __in_dev_get_rcu(dev); 1888 + if (!in_dev) 1889 + continue; 1890 + err = in_dev_dump_addr(in_dev, skb, cb, &ctx->ip_idx, 1891 + &fillargs); 1892 + if (err < 0) 1893 + goto done; 1894 + } 1923 1895 done: 1924 - cb->args[0] = h; 1925 - cb->args[1] = idx; 1926 - put_tgt_net: 1896 + if (err < 0 && likely(skb->len)) 1897 + err = skb->len; 1927 1898 if (fillargs.netnsid >= 0) 1928 1899 put_net(tgt_net); 1929 - 1930 - return skb->len ? : err; 1900 + rcu_read_unlock(); 1901 + return err; 1931 1902 } 1932 1903 1933 1904 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, ··· 2796 2811 2797 2812 rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); 2798 2813 rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); 2799 - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); 2814 + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 2815 + RTNL_FLAG_DUMP_UNLOCKED); 2800 2816 rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, 2801 2817 inet_netconf_dump_devconf, 2802 2818 RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED);