Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mld: convert ip6_sf_list to RCU

The ip6_sf_list has been protected by mca_lock(spin_lock) so that the
critical section is atomic context. In order to switch this context,
changing locking is needed. The ip6_sf_list actually already protected
by RTNL So if it's converted to use RCU, its control path context can
be switched to sleepable.
But It doesn't remove mca_lock yet because ifmcaddr6 isn't converted
to RCU yet. So, It's not fully converted to the sleepable context.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Taehee Yoo and committed by
David S. Miller
4b200e39 882ba1f7

+130 -77
+4 -3
include/net/if_inet6.h
··· 97 97 }; 98 98 99 99 struct ip6_sf_list { 100 - struct ip6_sf_list *sf_next; 100 + struct ip6_sf_list __rcu *sf_next; 101 101 struct in6_addr sf_addr; 102 102 unsigned long sf_count[2]; /* include/exclude counts */ 103 103 unsigned char sf_gsresp; /* include in g & s response? */ 104 104 unsigned char sf_oldin; /* change state */ 105 105 unsigned char sf_crcount; /* retrans. left to send */ 106 + struct rcu_head rcu; 106 107 }; 107 108 108 109 #define MAF_TIMER_RUNNING 0x01 ··· 116 115 struct in6_addr mca_addr; 117 116 struct inet6_dev *idev; 118 117 struct ifmcaddr6 *next; 119 - struct ip6_sf_list *mca_sources; 120 - struct ip6_sf_list *mca_tomb; 118 + struct ip6_sf_list __rcu *mca_sources; 119 + struct ip6_sf_list __rcu *mca_tomb; 121 120 unsigned int mca_sfmode; 122 121 unsigned char mca_crcount; 123 122 unsigned long mca_sfcount[2];
+126 -74
net/ipv6/mcast.c
··· 113 113 */ 114 114 115 115 #define for_each_pmc_rcu(np, pmc) \ 116 - for (pmc = rcu_dereference(np->ipv6_mc_list); \ 117 - pmc != NULL; \ 116 + for (pmc = rcu_dereference((np)->ipv6_mc_list); \ 117 + pmc; \ 118 118 pmc = rcu_dereference(pmc->next)) 119 + 120 + #define for_each_psf_rtnl(mc, psf) \ 121 + for (psf = rtnl_dereference((mc)->mca_sources); \ 122 + psf; \ 123 + psf = rtnl_dereference(psf->sf_next)) 124 + 125 + #define for_each_psf_rcu(mc, psf) \ 126 + for (psf = rcu_dereference((mc)->mca_sources); \ 127 + psf; \ 128 + psf = rcu_dereference(psf->sf_next)) 129 + 130 + #define for_each_psf_tomb(mc, psf) \ 131 + for (psf = rtnl_dereference((mc)->mca_tomb); \ 132 + psf; \ 133 + psf = rtnl_dereference(psf->sf_next)) 119 134 120 135 static int unsolicited_report_interval(struct inet6_dev *idev) 121 136 { ··· 749 734 if (pmc->mca_sfmode == MCAST_INCLUDE) { 750 735 struct ip6_sf_list *psf; 751 736 752 - pmc->mca_tomb = im->mca_tomb; 753 - pmc->mca_sources = im->mca_sources; 754 - im->mca_tomb = im->mca_sources = NULL; 755 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) 737 + rcu_assign_pointer(pmc->mca_tomb, 738 + rtnl_dereference(im->mca_tomb)); 739 + rcu_assign_pointer(pmc->mca_sources, 740 + rtnl_dereference(im->mca_sources)); 741 + RCU_INIT_POINTER(im->mca_tomb, NULL); 742 + RCU_INIT_POINTER(im->mca_sources, NULL); 743 + 744 + for_each_psf_rtnl(pmc, psf) 756 745 psf->sf_crcount = pmc->mca_crcount; 757 746 } 758 747 spin_unlock_bh(&im->mca_lock); ··· 767 748 768 749 static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im) 769 750 { 770 - struct ifmcaddr6 *pmc, *pmc_prev; 771 - struct ip6_sf_list *psf; 751 + struct ip6_sf_list *psf, *sources, *tomb; 772 752 struct in6_addr *pmca = &im->mca_addr; 753 + struct ifmcaddr6 *pmc, *pmc_prev; 773 754 774 755 pmc_prev = NULL; 775 756 for (pmc = idev->mc_tomb; pmc; pmc = pmc->next) { ··· 788 769 if (pmc) { 789 770 im->idev = pmc->idev; 790 771 if (im->mca_sfmode == MCAST_INCLUDE) { 791 - swap(im->mca_tomb, pmc->mca_tomb); 792 - swap(im->mca_sources, pmc->mca_sources); 793 - for (psf = im->mca_sources; psf; psf = psf->sf_next) 772 + tomb = rcu_replace_pointer(im->mca_tomb, 773 + rtnl_dereference(pmc->mca_tomb), 774 + lockdep_rtnl_is_held()); 775 + rcu_assign_pointer(pmc->mca_tomb, tomb); 776 + 777 + sources = rcu_replace_pointer(im->mca_sources, 778 + rtnl_dereference(pmc->mca_sources), 779 + lockdep_rtnl_is_held()); 780 + rcu_assign_pointer(pmc->mca_sources, sources); 781 + for_each_psf_rtnl(im, psf) 794 782 psf->sf_crcount = idev->mc_qrv; 795 783 } else { 796 784 im->mca_crcount = idev->mc_qrv; ··· 829 803 struct ip6_sf_list *psf, *psf_next; 830 804 831 805 spin_lock_bh(&pmc->mca_lock); 832 - psf = pmc->mca_tomb; 833 - pmc->mca_tomb = NULL; 806 + psf = rtnl_dereference(pmc->mca_tomb); 807 + RCU_INIT_POINTER(pmc->mca_tomb, NULL); 834 808 spin_unlock_bh(&pmc->mca_lock); 835 809 for (; psf; psf = psf_next) { 836 - psf_next = psf->sf_next; 837 - kfree(psf); 810 + psf_next = rtnl_dereference(psf->sf_next); 811 + kfree_rcu(psf, rcu); 838 812 } 839 813 } 840 814 read_unlock_bh(&idev->lock); ··· 1016 990 struct ip6_sf_list *psf; 1017 991 1018 992 spin_lock_bh(&mc->mca_lock); 1019 - for (psf = mc->mca_sources; psf; psf = psf->sf_next) { 993 + for_each_psf_rcu(mc, psf) { 1020 994 if (ipv6_addr_equal(&psf->sf_addr, src_addr)) 1021 995 break; 1022 996 } ··· 1115 1089 int i, scount; 1116 1090 1117 1091 scount = 0; 1118 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { 1092 + for_each_psf_rcu(pmc, psf) { 1119 1093 if (scount == nsrcs) 1120 1094 break; 1121 1095 for (i = 0; i < nsrcs; i++) { ··· 1148 1122 /* mark INCLUDE-mode sources */ 1149 1123 1150 1124 scount = 0; 1151 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { 1125 + for_each_psf_rcu(pmc, psf) { 1152 1126 if (scount == nsrcs) 1153 1127 break; 1154 1128 for (i = 0; i < nsrcs; i++) { ··· 1558 1532 struct ip6_sf_list *psf; 1559 1533 int scount = 0; 1560 1534 1561 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { 1535 + for_each_psf_rtnl(pmc, psf) { 1562 1536 if (!is_in(pmc, psf, type, gdeleted, sdeleted)) 1563 1537 continue; 1564 1538 scount++; ··· 1733 1707 #define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0) 1734 1708 1735 1709 static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, 1736 - int type, int gdeleted, int sdeleted, int crsend) 1710 + int type, int gdeleted, int sdeleted, 1711 + int crsend) 1737 1712 { 1713 + struct ip6_sf_list *psf, *psf_prev, *psf_next; 1714 + int scount, stotal, first, isquery, truncate; 1715 + struct ip6_sf_list __rcu **psf_list; 1738 1716 struct inet6_dev *idev = pmc->idev; 1739 1717 struct net_device *dev = idev->dev; 1740 - struct mld2_report *pmr; 1741 1718 struct mld2_grec *pgr = NULL; 1742 - struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; 1743 - int scount, stotal, first, isquery, truncate; 1719 + struct mld2_report *pmr; 1744 1720 unsigned int mtu; 1745 1721 1746 1722 if (pmc->mca_flags & MAF_NOREPORT) ··· 1761 1733 1762 1734 psf_list = sdeleted ? &pmc->mca_tomb : &pmc->mca_sources; 1763 1735 1764 - if (!*psf_list) 1736 + if (!rcu_access_pointer(*psf_list)) 1765 1737 goto empty_source; 1766 1738 1767 1739 pmr = skb ? (struct mld2_report *)skb_transport_header(skb) : NULL; ··· 1777 1749 } 1778 1750 first = 1; 1779 1751 psf_prev = NULL; 1780 - for (psf = *psf_list; psf; psf = psf_next) { 1752 + for (psf = rtnl_dereference(*psf_list); 1753 + psf; 1754 + psf = psf_next) { 1781 1755 struct in6_addr *psrc; 1782 1756 1783 - psf_next = psf->sf_next; 1757 + psf_next = rtnl_dereference(psf->sf_next); 1784 1758 1785 1759 if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) { 1786 1760 psf_prev = psf; ··· 1829 1799 psf->sf_crcount--; 1830 1800 if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { 1831 1801 if (psf_prev) 1832 - psf_prev->sf_next = psf->sf_next; 1802 + rcu_assign_pointer(psf_prev->sf_next, 1803 + rtnl_dereference(psf->sf_next)); 1833 1804 else 1834 - *psf_list = psf->sf_next; 1835 - kfree(psf); 1805 + rcu_assign_pointer(*psf_list, 1806 + rtnl_dereference(psf->sf_next)); 1807 + kfree_rcu(psf, rcu); 1836 1808 continue; 1837 1809 } 1838 1810 } ··· 1898 1866 /* 1899 1867 * remove zero-count source records from a source filter list 1900 1868 */ 1901 - static void mld_clear_zeros(struct ip6_sf_list **ppsf) 1869 + static void mld_clear_zeros(struct ip6_sf_list __rcu **ppsf) 1902 1870 { 1903 1871 struct ip6_sf_list *psf_prev, *psf_next, *psf; 1904 1872 1905 1873 psf_prev = NULL; 1906 - for (psf = *ppsf; psf; psf = psf_next) { 1907 - psf_next = psf->sf_next; 1874 + for (psf = rtnl_dereference(*ppsf); 1875 + psf; 1876 + psf = psf_next) { 1877 + psf_next = rtnl_dereference(psf->sf_next); 1908 1878 if (psf->sf_crcount == 0) { 1909 1879 if (psf_prev) 1910 - psf_prev->sf_next = psf->sf_next; 1880 + rcu_assign_pointer(psf_prev->sf_next, 1881 + rtnl_dereference(psf->sf_next)); 1911 1882 else 1912 - *ppsf = psf->sf_next; 1913 - kfree(psf); 1914 - } else 1883 + rcu_assign_pointer(*ppsf, 1884 + rtnl_dereference(psf->sf_next)); 1885 + kfree_rcu(psf, rcu); 1886 + } else { 1915 1887 psf_prev = psf; 1888 + } 1916 1889 } 1917 1890 } 1918 1891 ··· 1950 1913 mld_clear_zeros(&pmc->mca_sources); 1951 1914 } 1952 1915 } 1953 - if (pmc->mca_crcount == 0 && !pmc->mca_tomb && 1954 - !pmc->mca_sources) { 1916 + if (pmc->mca_crcount == 0 && 1917 + !rcu_access_pointer(pmc->mca_tomb) && 1918 + !rcu_access_pointer(pmc->mca_sources)) { 1955 1919 if (pmc_prev) 1956 1920 pmc_prev->next = pmc_next; 1957 1921 else ··· 2149 2111 int rv = 0; 2150 2112 2151 2113 psf_prev = NULL; 2152 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { 2114 + for_each_psf_rtnl(pmc, psf) { 2153 2115 if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) 2154 2116 break; 2155 2117 psf_prev = psf; ··· 2164 2126 2165 2127 /* no more filters for this source */ 2166 2128 if (psf_prev) 2167 - psf_prev->sf_next = psf->sf_next; 2129 + rcu_assign_pointer(psf_prev->sf_next, 2130 + rtnl_dereference(psf->sf_next)); 2168 2131 else 2169 - pmc->mca_sources = psf->sf_next; 2132 + rcu_assign_pointer(pmc->mca_sources, 2133 + rtnl_dereference(psf->sf_next)); 2134 + 2170 2135 if (psf->sf_oldin && !(pmc->mca_flags & MAF_NOREPORT) && 2171 2136 !mld_in_v1_mode(idev)) { 2172 2137 psf->sf_crcount = idev->mc_qrv; 2173 - psf->sf_next = pmc->mca_tomb; 2174 - pmc->mca_tomb = psf; 2138 + rcu_assign_pointer(psf->sf_next, 2139 + rtnl_dereference(pmc->mca_tomb)); 2140 + rcu_assign_pointer(pmc->mca_tomb, psf); 2175 2141 rv = 1; 2176 - } else 2177 - kfree(psf); 2142 + } else { 2143 + kfree_rcu(psf, rcu); 2144 + } 2178 2145 } 2179 2146 return rv; 2180 2147 } ··· 2231 2188 pmc->mca_sfmode = MCAST_INCLUDE; 2232 2189 pmc->mca_crcount = idev->mc_qrv; 2233 2190 idev->mc_ifc_count = pmc->mca_crcount; 2234 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) 2191 + for_each_psf_rtnl(pmc, psf) 2235 2192 psf->sf_crcount = 0; 2236 2193 mld_ifc_event(pmc->idev); 2237 2194 } else if (sf_setstate(pmc) || changerec) ··· 2250 2207 struct ip6_sf_list *psf, *psf_prev; 2251 2208 2252 2209 psf_prev = NULL; 2253 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { 2210 + for_each_psf_rtnl(pmc, psf) { 2254 2211 if (ipv6_addr_equal(&psf->sf_addr, psfsrc)) 2255 2212 break; 2256 2213 psf_prev = psf; ··· 2262 2219 2263 2220 psf->sf_addr = *psfsrc; 2264 2221 if (psf_prev) { 2265 - psf_prev->sf_next = psf; 2266 - } else 2267 - pmc->mca_sources = psf; 2222 + rcu_assign_pointer(psf_prev->sf_next, psf); 2223 + } else { 2224 + rcu_assign_pointer(pmc->mca_sources, psf); 2225 + } 2268 2226 } 2269 2227 psf->sf_count[sfmode]++; 2270 2228 return 0; ··· 2276 2232 struct ip6_sf_list *psf; 2277 2233 int mca_xcount = pmc->mca_sfcount[MCAST_EXCLUDE]; 2278 2234 2279 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) 2235 + for_each_psf_rtnl(pmc, psf) { 2280 2236 if (pmc->mca_sfcount[MCAST_EXCLUDE]) { 2281 2237 psf->sf_oldin = mca_xcount == 2282 2238 psf->sf_count[MCAST_EXCLUDE] && 2283 2239 !psf->sf_count[MCAST_INCLUDE]; 2284 - } else 2240 + } else { 2285 2241 psf->sf_oldin = psf->sf_count[MCAST_INCLUDE] != 0; 2242 + } 2243 + } 2286 2244 } 2287 2245 2288 2246 static int sf_setstate(struct ifmcaddr6 *pmc) ··· 2295 2249 int new_in, rv; 2296 2250 2297 2251 rv = 0; 2298 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) { 2252 + for_each_psf_rtnl(pmc, psf) { 2299 2253 if (pmc->mca_sfcount[MCAST_EXCLUDE]) { 2300 2254 new_in = mca_xcount == psf->sf_count[MCAST_EXCLUDE] && 2301 2255 !psf->sf_count[MCAST_INCLUDE]; ··· 2305 2259 if (!psf->sf_oldin) { 2306 2260 struct ip6_sf_list *prev = NULL; 2307 2261 2308 - for (dpsf = pmc->mca_tomb; dpsf; 2309 - dpsf = dpsf->sf_next) { 2262 + for_each_psf_tomb(pmc, dpsf) { 2310 2263 if (ipv6_addr_equal(&dpsf->sf_addr, 2311 2264 &psf->sf_addr)) 2312 2265 break; ··· 2313 2268 } 2314 2269 if (dpsf) { 2315 2270 if (prev) 2316 - prev->sf_next = dpsf->sf_next; 2271 + rcu_assign_pointer(prev->sf_next, 2272 + rtnl_dereference(dpsf->sf_next)); 2317 2273 else 2318 - pmc->mca_tomb = dpsf->sf_next; 2319 - kfree(dpsf); 2274 + rcu_assign_pointer(pmc->mca_tomb, 2275 + rtnl_dereference(dpsf->sf_next)); 2276 + kfree_rcu(dpsf, rcu); 2320 2277 } 2321 2278 psf->sf_crcount = qrv; 2322 2279 rv++; ··· 2329 2282 * add or update "delete" records if an active filter 2330 2283 * is now inactive 2331 2284 */ 2332 - for (dpsf = pmc->mca_tomb; dpsf; dpsf = dpsf->sf_next) 2285 + 2286 + for_each_psf_tomb(pmc, dpsf) 2333 2287 if (ipv6_addr_equal(&dpsf->sf_addr, 2334 2288 &psf->sf_addr)) 2335 2289 break; ··· 2339 2291 if (!dpsf) 2340 2292 continue; 2341 2293 *dpsf = *psf; 2342 - /* pmc->mca_lock held by callers */ 2343 - dpsf->sf_next = pmc->mca_tomb; 2344 - pmc->mca_tomb = dpsf; 2294 + rcu_assign_pointer(dpsf->sf_next, 2295 + rtnl_dereference(pmc->mca_tomb)); 2296 + rcu_assign_pointer(pmc->mca_tomb, dpsf); 2345 2297 } 2346 2298 dpsf->sf_crcount = qrv; 2347 2299 rv++; ··· 2404 2356 2405 2357 pmc->mca_crcount = idev->mc_qrv; 2406 2358 idev->mc_ifc_count = pmc->mca_crcount; 2407 - for (psf = pmc->mca_sources; psf; psf = psf->sf_next) 2359 + for_each_psf_rtnl(pmc, psf) 2408 2360 psf->sf_crcount = 0; 2409 2361 mld_ifc_event(idev); 2410 2362 } else if (sf_setstate(pmc)) ··· 2418 2370 { 2419 2371 struct ip6_sf_list *psf, *nextpsf; 2420 2372 2421 - for (psf = pmc->mca_tomb; psf; psf = nextpsf) { 2422 - nextpsf = psf->sf_next; 2423 - kfree(psf); 2373 + for (psf = rtnl_dereference(pmc->mca_tomb); 2374 + psf; 2375 + psf = nextpsf) { 2376 + nextpsf = rtnl_dereference(psf->sf_next); 2377 + kfree_rcu(psf, rcu); 2424 2378 } 2425 - pmc->mca_tomb = NULL; 2426 - for (psf = pmc->mca_sources; psf; psf = nextpsf) { 2427 - nextpsf = psf->sf_next; 2428 - kfree(psf); 2379 + RCU_INIT_POINTER(pmc->mca_tomb, NULL); 2380 + for (psf = rtnl_dereference(pmc->mca_sources); 2381 + psf; 2382 + psf = nextpsf) { 2383 + nextpsf = rtnl_dereference(psf->sf_next); 2384 + kfree_rcu(psf, rcu); 2429 2385 } 2430 - pmc->mca_sources = NULL; 2386 + RCU_INIT_POINTER(pmc->mca_sources, NULL); 2431 2387 pmc->mca_sfmode = MCAST_EXCLUDE; 2432 2388 pmc->mca_sfcount[MCAST_INCLUDE] = 0; 2433 2389 pmc->mca_sfcount[MCAST_EXCLUDE] = 1; ··· 2841 2789 im = idev->mc_list; 2842 2790 if (likely(im)) { 2843 2791 spin_lock_bh(&im->mca_lock); 2844 - psf = im->mca_sources; 2792 + psf = rcu_dereference(im->mca_sources); 2845 2793 if (likely(psf)) { 2846 2794 state->im = im; 2847 2795 state->idev = idev; ··· 2858 2806 { 2859 2807 struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); 2860 2808 2861 - psf = psf->sf_next; 2809 + psf = rcu_dereference(psf->sf_next); 2862 2810 while (!psf) { 2863 2811 spin_unlock_bh(&state->im->mca_lock); 2864 2812 state->im = state->im->next; ··· 2880 2828 if (!state->im) 2881 2829 break; 2882 2830 spin_lock_bh(&state->im->mca_lock); 2883 - psf = state->im->mca_sources; 2831 + psf = rcu_dereference(state->im->mca_sources); 2884 2832 } 2885 2833 out: 2886 2834 return psf;