Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv4: Properly purge netdev references on uncached routes.

When a device is unregistered, we have to purge all of the
references to it that may exist in the entire system.

If a route is uncached, we currently have no way of accomplishing
this.

So create a global list that is scanned when a network device goes
down. This mirrors the logic in net/core/dst.c's dst_ifdown().

Signed-off-by: David S. Miller <davem@davemloft.net>

+69 -4
+3
include/net/route.h
··· 57 57 58 58 /* Miscellaneous cached information */ 59 59 u32 rt_pmtu; 60 + 61 + struct list_head rt_uncached; 60 62 }; 61 63 62 64 static inline bool rt_is_input_route(const struct rtable *rt) ··· 109 107 struct in_device; 110 108 extern int ip_rt_init(void); 111 109 extern void rt_cache_flush(struct net *net, int how); 110 + extern void rt_flush_dev(struct net_device *dev); 112 111 extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); 113 112 extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, 114 113 struct sock *sk);
+1
net/ipv4/fib_frontend.c
··· 1046 1046 1047 1047 if (event == NETDEV_UNREGISTER) { 1048 1048 fib_disable_ip(dev, 2, -1); 1049 + rt_flush_dev(dev); 1049 1050 return NOTIFY_DONE; 1050 1051 } 1051 1052
+64 -4
net/ipv4/route.c
··· 147 147 struct sk_buff *skb, u32 mtu); 148 148 static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 149 149 struct sk_buff *skb); 150 + static void ipv4_dst_destroy(struct dst_entry *dst); 150 151 151 152 static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, 152 153 int how) ··· 171 170 .default_advmss = ipv4_default_advmss, 172 171 .mtu = ipv4_mtu, 173 172 .cow_metrics = ipv4_cow_metrics, 173 + .destroy = ipv4_dst_destroy, 174 174 .ifdown = ipv4_dst_ifdown, 175 175 .negative_advice = ipv4_negative_advice, 176 176 .link_failure = ipv4_link_failure, ··· 1177 1175 return NULL; 1178 1176 } 1179 1177 1180 - static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1178 + static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, 1181 1179 __be32 daddr) 1182 1180 { 1181 + bool ret = false; 1182 + 1183 1183 spin_lock_bh(&fnhe_lock); 1184 1184 1185 1185 if (daddr == fnhe->fnhe_daddr) { ··· 1207 1203 rt_free(orig); 1208 1204 1209 1205 fnhe->fnhe_stamp = jiffies; 1206 + ret = true; 1210 1207 } else { 1211 1208 /* Routes we intend to cache in nexthop exception have 1212 1209 * the DST_NOCACHE bit clear. However, if we are ··· 1217 1212 rt->dst.flags |= DST_NOCACHE; 1218 1213 } 1219 1214 spin_unlock_bh(&fnhe_lock); 1215 + 1216 + return ret; 1220 1217 } 1221 1218 1222 - static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1219 + static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) 1223 1220 { 1224 1221 struct rtable *orig, *prev, **p; 1222 + bool ret = true; 1225 1223 1226 1224 if (rt_is_input_route(rt)) { 1227 1225 p = (struct rtable **)&nh->nh_rth_input; ··· 1247 1239 */ 1248 1240 nocache: 1249 1241 rt->dst.flags |= DST_NOCACHE; 1242 + ret = false; 1243 + } 1244 + 1245 + return ret; 1246 + } 1247 + 1248 + static DEFINE_SPINLOCK(rt_uncached_lock); 1249 + static LIST_HEAD(rt_uncached_list); 1250 + 1251 + static void rt_add_uncached_list(struct rtable *rt) 1252 + { 1253 + spin_lock_bh(&rt_uncached_lock); 1254 + list_add_tail(&rt->rt_uncached, &rt_uncached_list); 1255 + spin_unlock_bh(&rt_uncached_lock); 1256 + } 1257 + 1258 + static void ipv4_dst_destroy(struct dst_entry *dst) 1259 + { 1260 + struct rtable *rt = (struct rtable *) dst; 1261 + 1262 + if (dst->flags & DST_NOCACHE) { 1263 + spin_lock_bh(&rt_uncached_lock); 1264 + list_del(&rt->rt_uncached); 1265 + spin_unlock_bh(&rt_uncached_lock); 1266 + } 1267 + } 1268 + 1269 + void rt_flush_dev(struct net_device *dev) 1270 + { 1271 + if (!list_empty(&rt_uncached_list)) { 1272 + struct net *net = dev_net(dev); 1273 + struct rtable *rt; 1274 + 1275 + spin_lock_bh(&rt_uncached_lock); 1276 + list_for_each_entry(rt, &rt_uncached_list, rt_uncached) { 1277 + if (rt->dst.dev != dev) 1278 + continue; 1279 + rt->dst.dev = net->loopback_dev; 1280 + dev_hold(rt->dst.dev); 1281 + dev_put(dev); 1282 + } 1283 + spin_unlock_bh(&rt_uncached_lock); 1250 1284 } 1251 1285 } 1252 1286 ··· 1304 1254 struct fib_nh_exception *fnhe, 1305 1255 struct fib_info *fi, u16 type, u32 itag) 1306 1256 { 1257 + bool cached = false; 1258 + 1307 1259 if (fi) { 1308 1260 struct fib_nh *nh = &FIB_RES_NH(*res); 1309 1261 ··· 1316 1264 rt->dst.tclassid = nh->nh_tclassid; 1317 1265 #endif 1318 1266 if (unlikely(fnhe)) 1319 - rt_bind_exception(rt, fnhe, daddr); 1267 + cached = rt_bind_exception(rt, fnhe, daddr); 1320 1268 else if (!(rt->dst.flags & DST_NOCACHE)) 1321 - rt_cache_route(nh, rt); 1269 + cached = rt_cache_route(nh, rt); 1322 1270 } 1271 + if (unlikely(!cached)) 1272 + rt_add_uncached_list(rt); 1323 1273 1324 1274 #ifdef CONFIG_IP_ROUTE_CLASSID 1325 1275 #ifdef CONFIG_IP_MULTIPLE_TABLES ··· 1388 1334 rth->rt_iif = 0; 1389 1335 rth->rt_pmtu = 0; 1390 1336 rth->rt_gateway = 0; 1337 + INIT_LIST_HEAD(&rth->rt_uncached); 1391 1338 if (our) { 1392 1339 rth->dst.input= ip_local_deliver; 1393 1340 rth->rt_flags |= RTCF_LOCAL; ··· 1514 1459 rth->rt_iif = 0; 1515 1460 rth->rt_pmtu = 0; 1516 1461 rth->rt_gateway = 0; 1462 + INIT_LIST_HEAD(&rth->rt_uncached); 1517 1463 1518 1464 rth->dst.input = ip_forward; 1519 1465 rth->dst.output = ip_output; ··· 1681 1625 rth->rt_iif = 0; 1682 1626 rth->rt_pmtu = 0; 1683 1627 rth->rt_gateway = 0; 1628 + INIT_LIST_HEAD(&rth->rt_uncached); 1684 1629 if (res.type == RTN_UNREACHABLE) { 1685 1630 rth->dst.input= ip_error; 1686 1631 rth->dst.error= -err; ··· 1849 1792 rth->rt_iif = orig_oif ? : 0; 1850 1793 rth->rt_pmtu = 0; 1851 1794 rth->rt_gateway = 0; 1795 + INIT_LIST_HEAD(&rth->rt_uncached); 1852 1796 1853 1797 RT_CACHE_STAT_INC(out_slow_tot); 1854 1798 ··· 2128 2070 rt->rt_flags = ort->rt_flags; 2129 2071 rt->rt_type = ort->rt_type; 2130 2072 rt->rt_gateway = ort->rt_gateway; 2073 + 2074 + INIT_LIST_HEAD(&rt->rt_uncached); 2131 2075 2132 2076 dst_free(new); 2133 2077 }
+1
net/ipv4/xfrm4_policy.c
··· 92 92 xdst->u.rt.rt_type = rt->rt_type; 93 93 xdst->u.rt.rt_gateway = rt->rt_gateway; 94 94 xdst->u.rt.rt_pmtu = rt->rt_pmtu; 95 + INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); 95 96 96 97 return 0; 97 98 }