Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'ipv6-Another-followup-to-the-fib6_info-change'

David Ahern says:

====================
net/ipv6: Another followup to the fib6_info change

Last one - for this week.

Patches 1, 2 and 7 are more cleanup patches - removing dead code,
moving code from a header to near its single caller, and updating
function name.

Patches 3-5 do some refactoring leading up to patch 6 which fixes
a NULL dereference. I have only managed to trigger a panic once, so
I can not definitively confirm it addresses the problem but it seems
pretty clear that it is a race on removing a 'from' reference on
an rt6_info and another path using that 'from' value to do
cookie checking.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+136 -83
+12 -29
include/net/ip6_fib.h
··· 174 174 175 175 struct rt6_info { 176 176 struct dst_entry dst; 177 - struct fib6_info *from; 177 + struct fib6_info __rcu *from; 178 178 179 179 struct rt6key rt6i_dst; 180 180 struct rt6key rt6i_src; ··· 223 223 return false; 224 224 } 225 225 226 - static inline void rt6_clean_expires(struct rt6_info *rt) 227 - { 228 - rt->rt6i_flags &= ~RTF_EXPIRES; 229 - rt->dst.expires = 0; 230 - } 231 - 232 - static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) 233 - { 234 - rt->dst.expires = expires; 235 - rt->rt6i_flags |= RTF_EXPIRES; 236 - } 237 - 238 - static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) 239 - { 240 - if (!(rt0->rt6i_flags & RTF_EXPIRES) && rt0->from) 241 - rt0->dst.expires = rt0->from->expires; 242 - 243 - dst_set_expires(&rt0->dst, timeout); 244 - rt0->rt6i_flags |= RTF_EXPIRES; 245 - } 246 - 247 226 /* Function to safely get fn->sernum for passed in rt 248 227 * and store result in passed in cookie. 249 228 * Return true if we can get cookie safely 250 229 * Return false if not 251 230 */ 252 - static inline bool rt6_get_cookie_safe(const struct fib6_info *f6i, 253 - u32 *cookie) 231 + static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, 232 + u32 *cookie) 254 233 { 255 234 struct fib6_node *fn; 256 235 bool status = false; 257 236 258 - rcu_read_lock(); 259 237 fn = rcu_dereference(f6i->fib6_node); 260 238 261 239 if (fn) { ··· 243 265 status = true; 244 266 } 245 267 246 - rcu_read_unlock(); 247 268 return status; 248 269 } 249 270 250 271 static inline u32 rt6_get_cookie(const struct rt6_info *rt) 251 272 { 273 + struct fib6_info *from; 252 274 u32 cookie = 0; 253 275 254 - if (rt->rt6i_flags & RTF_PCPU || 255 - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) 256 - rt6_get_cookie_safe(rt->from, &cookie); 276 + rcu_read_lock(); 277 + 278 + from = rcu_dereference(rt->from); 279 + if (from && (rt->rt6i_flags & RTF_PCPU || 280 + unlikely(!list_empty(&rt->rt6i_uncached)))) 281 + fib6_get_cookie_safe(from, &cookie); 282 + 283 + rcu_read_unlock(); 257 284 258 285 return cookie; 259 286 }
+27 -18
net/ipv6/ip6_fib.c
··· 860 860 return ln; 861 861 } 862 862 863 + static void fib6_drop_pcpu_from(struct fib6_info *f6i, 864 + const struct fib6_table *table) 865 + { 866 + int cpu; 867 + 868 + /* release the reference to this fib entry from 869 + * all of its cached pcpu routes 870 + */ 871 + for_each_possible_cpu(cpu) { 872 + struct rt6_info **ppcpu_rt; 873 + struct rt6_info *pcpu_rt; 874 + 875 + ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu); 876 + pcpu_rt = *ppcpu_rt; 877 + if (pcpu_rt) { 878 + struct fib6_info *from; 879 + 880 + from = rcu_dereference_protected(pcpu_rt->from, 881 + lockdep_is_held(&table->tb6_lock)); 882 + rcu_assign_pointer(pcpu_rt->from, NULL); 883 + fib6_info_release(from); 884 + } 885 + } 886 + } 887 + 863 888 static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn, 864 889 struct net *net) 865 890 { ··· 912 887 lockdep_is_held(&table->tb6_lock)); 913 888 } 914 889 915 - if (rt->rt6i_pcpu) { 916 - int cpu; 917 - 918 - /* release the reference to this fib entry from 919 - * all of its cached pcpu routes 920 - */ 921 - for_each_possible_cpu(cpu) { 922 - struct rt6_info **ppcpu_rt; 923 - struct rt6_info *pcpu_rt; 924 - 925 - ppcpu_rt = per_cpu_ptr(rt->rt6i_pcpu, cpu); 926 - pcpu_rt = *ppcpu_rt; 927 - if (pcpu_rt) { 928 - fib6_info_release(pcpu_rt->from); 929 - pcpu_rt->from = NULL; 930 - } 931 - } 932 - } 890 + if (rt->rt6i_pcpu) 891 + fib6_drop_pcpu_from(rt, table); 933 892 } 934 893 } 935 894
+7 -2
net/ipv6/ip6_output.c
··· 962 962 * that's why we try it again later. 963 963 */ 964 964 if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { 965 + struct fib6_info *from; 965 966 struct rt6_info *rt; 966 967 bool had_dst = *dst != NULL; 967 968 968 969 if (!had_dst) 969 970 *dst = ip6_route_output(net, sk, fl6); 970 971 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; 971 - err = ip6_route_get_saddr(net, rt ? rt->from : NULL, 972 - &fl6->daddr, 972 + 973 + rcu_read_lock(); 974 + from = rt ? rcu_dereference(rt->from) : NULL; 975 + err = ip6_route_get_saddr(net, from, &fl6->daddr, 973 976 sk ? inet6_sk(sk)->srcprefs : 0, 974 977 &fl6->saddr); 978 + rcu_read_unlock(); 979 + 975 980 if (err) 976 981 goto out_err_release; 977 982
+90 -34
net/ipv6/route.c
··· 359 359 static void ip6_dst_destroy(struct dst_entry *dst) 360 360 { 361 361 struct rt6_info *rt = (struct rt6_info *)dst; 362 - struct fib6_info *from = rt->from; 362 + struct fib6_info *from; 363 363 struct inet6_dev *idev; 364 364 365 365 dst_destroy_metrics_generic(dst); ··· 371 371 in6_dev_put(idev); 372 372 } 373 373 374 - rt->from = NULL; 374 + rcu_read_lock(); 375 + from = rcu_dereference(rt->from); 376 + rcu_assign_pointer(rt->from, NULL); 375 377 fib6_info_release(from); 378 + rcu_read_unlock(); 376 379 } 377 380 378 381 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, ··· 405 402 406 403 static bool rt6_check_expired(const struct rt6_info *rt) 407 404 { 405 + struct fib6_info *from; 406 + 407 + from = rcu_dereference(rt->from); 408 + 408 409 if (rt->rt6i_flags & RTF_EXPIRES) { 409 410 if (time_after(jiffies, rt->dst.expires)) 410 411 return true; 411 - } else if (rt->from) { 412 + } else if (from) { 412 413 return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || 413 - fib6_check_expired(rt->from); 414 + fib6_check_expired(from); 414 415 } 415 416 return false; 416 417 } ··· 970 963 { 971 964 rt->rt6i_flags &= ~RTF_EXPIRES; 972 965 fib6_info_hold(from); 973 - rt->from = from; 966 + rcu_assign_pointer(rt->from, from); 974 967 dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true); 975 968 if (from->fib6_metrics != &dst_default_metrics) { 976 969 rt->dst._metrics |= DST_METRICS_REFCOUNTED; ··· 1171 1164 * Clone the route. 1172 1165 */ 1173 1166 1174 - rcu_read_lock(); 1175 1167 dev = ip6_rt_get_dev_rcu(ort); 1176 1168 rt = ip6_dst_alloc(dev_net(dev), dev, 0); 1177 - rcu_read_unlock(); 1178 1169 if (!rt) 1179 1170 return NULL; 1180 1171 ··· 1860 1855 * the daddr in the skb during the neighbor look-up is different 1861 1856 * from the fl6->daddr used to look-up route here. 1862 1857 */ 1863 - 1864 1858 struct rt6_info *uncached_rt; 1865 1859 1866 - fib6_info_hold(f6i); 1867 - rcu_read_unlock(); 1868 - 1869 1860 uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); 1870 - fib6_info_release(f6i); 1861 + 1862 + rcu_read_unlock(); 1871 1863 1872 1864 if (uncached_rt) { 1873 1865 /* Uncached_rt's refcnt is taken during ip6_rt_cache_alloc() ··· 2130 2128 { 2131 2129 u32 rt_cookie = 0; 2132 2130 2133 - if ((f6i && !rt6_get_cookie_safe(f6i, &rt_cookie)) || 2134 - rt_cookie != cookie) 2131 + if (!fib6_get_cookie_safe(f6i, &rt_cookie) || rt_cookie != cookie) 2135 2132 return false; 2136 2133 2137 2134 if (fib6_check_expired(f6i)) ··· 2139 2138 return true; 2140 2139 } 2141 2140 2142 - static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) 2141 + static struct dst_entry *rt6_check(struct rt6_info *rt, 2142 + struct fib6_info *from, 2143 + u32 cookie) 2143 2144 { 2144 2145 u32 rt_cookie = 0; 2145 2146 2146 - if ((rt->from && !rt6_get_cookie_safe(rt->from, &rt_cookie)) || 2147 + if ((from && !fib6_get_cookie_safe(from, &rt_cookie)) || 2147 2148 rt_cookie != cookie) 2148 2149 return NULL; 2149 2150 ··· 2155 2152 return &rt->dst; 2156 2153 } 2157 2154 2158 - static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) 2155 + static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, 2156 + struct fib6_info *from, 2157 + u32 cookie) 2159 2158 { 2160 2159 if (!__rt6_check_expired(rt) && 2161 2160 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && 2162 - fib6_check(rt->from, cookie)) 2161 + fib6_check(from, cookie)) 2163 2162 return &rt->dst; 2164 2163 else 2165 2164 return NULL; ··· 2169 2164 2170 2165 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) 2171 2166 { 2167 + struct dst_entry *dst_ret; 2168 + struct fib6_info *from; 2172 2169 struct rt6_info *rt; 2173 2170 2174 - rt = (struct rt6_info *) dst; 2171 + rt = container_of(dst, struct rt6_info, dst); 2172 + 2173 + rcu_read_lock(); 2175 2174 2176 2175 /* All IPV6 dsts are created with ->obsolete set to the value 2177 2176 * DST_OBSOLETE_FORCE_CHK which forces validation calls down 2178 2177 * into this function always. 2179 2178 */ 2180 2179 2181 - if (rt->rt6i_flags & RTF_PCPU || 2182 - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) 2183 - return rt6_dst_from_check(rt, cookie); 2180 + from = rcu_dereference(rt->from); 2181 + 2182 + if (from && (rt->rt6i_flags & RTF_PCPU || 2183 + unlikely(!list_empty(&rt->rt6i_uncached)))) 2184 + dst_ret = rt6_dst_from_check(rt, from, cookie); 2184 2185 else 2185 - return rt6_check(rt, cookie); 2186 + dst_ret = rt6_check(rt, from, cookie); 2187 + 2188 + rcu_read_unlock(); 2189 + 2190 + return dst_ret; 2186 2191 } 2187 2192 2188 2193 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) ··· 2224 2209 if (rt->rt6i_flags & RTF_CACHE) { 2225 2210 if (dst_hold_safe(&rt->dst)) 2226 2211 rt6_remove_exception_rt(rt); 2227 - } else if (rt->from) { 2212 + } else { 2213 + struct fib6_info *from; 2228 2214 struct fib6_node *fn; 2229 2215 2230 2216 rcu_read_lock(); 2231 - fn = rcu_dereference(rt->from->fib6_node); 2232 - if (fn && (rt->rt6i_flags & RTF_DEFAULT)) 2233 - fn->fn_sernum = -1; 2217 + from = rcu_dereference(rt->from); 2218 + if (from) { 2219 + fn = rcu_dereference(from->fib6_node); 2220 + if (fn && (rt->rt6i_flags & RTF_DEFAULT)) 2221 + fn->fn_sernum = -1; 2222 + } 2234 2223 rcu_read_unlock(); 2235 2224 } 2236 2225 } 2226 + } 2227 + 2228 + static void rt6_update_expires(struct rt6_info *rt0, int timeout) 2229 + { 2230 + if (!(rt0->rt6i_flags & RTF_EXPIRES)) { 2231 + struct fib6_info *from; 2232 + 2233 + rcu_read_lock(); 2234 + from = rcu_dereference(rt0->from); 2235 + if (from) 2236 + rt0->dst.expires = from->expires; 2237 + rcu_read_unlock(); 2238 + } 2239 + 2240 + dst_set_expires(&rt0->dst, timeout); 2241 + rt0->rt6i_flags |= RTF_EXPIRES; 2237 2242 } 2238 2243 2239 2244 static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) ··· 2267 2232 2268 2233 static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) 2269 2234 { 2235 + bool from_set; 2236 + 2237 + rcu_read_lock(); 2238 + from_set = !!rcu_dereference(rt->from); 2239 + rcu_read_unlock(); 2240 + 2270 2241 return !(rt->rt6i_flags & RTF_CACHE) && 2271 - (rt->rt6i_flags & RTF_PCPU || rt->from); 2242 + (rt->rt6i_flags & RTF_PCPU || from_set); 2272 2243 } 2273 2244 2274 2245 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, ··· 2310 2269 if (rt6->rt6i_flags & RTF_CACHE) 2311 2270 rt6_update_exception_stamp_rt(rt6); 2312 2271 } else if (daddr) { 2272 + struct fib6_info *from; 2313 2273 struct rt6_info *nrt6; 2314 2274 2315 - nrt6 = ip6_rt_cache_alloc(rt6->from, daddr, saddr); 2275 + rcu_read_lock(); 2276 + from = rcu_dereference(rt6->from); 2277 + nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); 2316 2278 if (nrt6) { 2317 2279 rt6_do_update_pmtu(nrt6, mtu); 2318 - if (rt6_insert_exception(nrt6, rt6->from)) 2280 + if (rt6_insert_exception(nrt6, from)) 2319 2281 dst_release_immediate(&nrt6->dst); 2320 2282 } 2283 + rcu_read_unlock(); 2321 2284 } 2322 2285 } 2323 2286 ··· 3254 3209 struct ndisc_options ndopts; 3255 3210 struct inet6_dev *in6_dev; 3256 3211 struct neighbour *neigh; 3212 + struct fib6_info *from; 3257 3213 struct rd_msg *msg; 3258 3214 int optlen, on_link; 3259 3215 u8 *lladdr; ··· 3336 3290 NEIGH_UPDATE_F_ISROUTER)), 3337 3291 NDISC_REDIRECT, &ndopts); 3338 3292 3339 - nrt = ip6_rt_cache_alloc(rt->from, &msg->dest, NULL); 3293 + rcu_read_lock(); 3294 + from = rcu_dereference(rt->from); 3295 + nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); 3296 + rcu_read_unlock(); 3340 3297 if (!nrt) 3341 3298 goto out; 3342 3299 ··· 4721 4672 struct net *net = sock_net(in_skb->sk); 4722 4673 struct nlattr *tb[RTA_MAX+1]; 4723 4674 int err, iif = 0, oif = 0; 4675 + struct fib6_info *from; 4724 4676 struct dst_entry *dst; 4725 4677 struct rt6_info *rt; 4726 4678 struct sk_buff *skb; ··· 4818 4768 } 4819 4769 4820 4770 skb_dst_set(skb, &rt->dst); 4771 + 4772 + rcu_read_lock(); 4773 + from = rcu_dereference(rt->from); 4774 + 4821 4775 if (fibmatch) 4822 - err = rt6_fill_node(net, skb, rt->from, NULL, NULL, NULL, iif, 4776 + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, 4823 4777 RTM_NEWROUTE, NETLINK_CB(in_skb).portid, 4824 4778 nlh->nlmsg_seq, 0); 4825 4779 else 4826 - err = rt6_fill_node(net, skb, rt->from, dst, 4827 - &fl6.daddr, &fl6.saddr, iif, RTM_NEWROUTE, 4780 + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, 4781 + &fl6.saddr, iif, RTM_NEWROUTE, 4828 4782 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 4829 4783 0); 4784 + rcu_read_unlock(); 4785 + 4830 4786 if (err < 0) { 4831 4787 kfree_skb(skb); 4832 4788 goto errout;