Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv4: introduce rt_uses_gateway

Add new flag to remember when route is via gateway.
We will use it to allow rt_gateway to contain address of
directly connected host for the cases when DST_NOCACHE is
used or when the NH exception caches per-destination route
without DST_NOCACHE flag, i.e. when routes are not used for
other destinations. By this way we force the neighbour
resolving to work with the routed destination but we
can use different address in the packet, feature needed
for IPVS-DR where original packet for virtual IP is routed
via route to real IP.

Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Julian Anastasov and committed by
David S. Miller
155e8336 f8a17175

+34 -28
+2 -1
include/net/route.h
··· 48 48 int rt_genid; 49 49 unsigned int rt_flags; 50 50 __u16 rt_type; 51 - __u16 rt_is_input; 51 + __u8 rt_is_input; 52 + __u8 rt_uses_gateway; 52 53 53 54 int rt_iif; 54 55
+2 -2
net/ipv4/inet_connection_sock.c
··· 406 406 rt = ip_route_output_flow(net, fl4, sk); 407 407 if (IS_ERR(rt)) 408 408 goto no_route; 409 - if (opt && opt->opt.is_strictroute && rt->rt_gateway) 409 + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 410 410 goto route_err; 411 411 return &rt->dst; 412 412 ··· 442 442 rt = ip_route_output_flow(net, fl4, sk); 443 443 if (IS_ERR(rt)) 444 444 goto no_route; 445 - if (opt && opt->opt.is_strictroute && rt->rt_gateway) 445 + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) 446 446 goto route_err; 447 447 rcu_read_unlock(); 448 448 return &rt->dst;
+1 -1
net/ipv4/ip_forward.c
··· 85 85 86 86 rt = skb_rtable(skb); 87 87 88 - if (opt->is_strictroute && rt->rt_gateway) 88 + if (opt->is_strictroute && rt->rt_uses_gateway) 89 89 goto sr_failed; 90 90 91 91 if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
+2 -2
net/ipv4/ip_output.c
··· 193 193 } 194 194 195 195 rcu_read_lock_bh(); 196 - nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; 196 + nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); 197 197 neigh = __ipv4_neigh_lookup_noref(dev, nexthop); 198 198 if (unlikely(!neigh)) 199 199 neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); ··· 371 371 skb_dst_set_noref(skb, &rt->dst); 372 372 373 373 packet_routed: 374 - if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) 374 + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) 375 375 goto no_route; 376 376 377 377 /* OK, we know where to send it, allocate and build IP header. */
+26 -22
net/ipv4/route.c
··· 1126 1126 mtu = dst->dev->mtu; 1127 1127 1128 1128 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { 1129 - if (rt->rt_gateway && mtu > 576) 1129 + if (rt->rt_uses_gateway && mtu > 576) 1130 1130 mtu = 576; 1131 1131 } 1132 1132 ··· 1177 1177 if (fnhe->fnhe_gw) { 1178 1178 rt->rt_flags |= RTCF_REDIRECTED; 1179 1179 rt->rt_gateway = fnhe->fnhe_gw; 1180 - } 1180 + rt->rt_uses_gateway = 1; 1181 + } else if (!rt->rt_gateway) 1182 + rt->rt_gateway = daddr; 1181 1183 1182 1184 orig = rcu_dereference(fnhe->fnhe_rth); 1183 1185 rcu_assign_pointer(fnhe->fnhe_rth, rt); ··· 1188 1186 1189 1187 fnhe->fnhe_stamp = jiffies; 1190 1188 ret = true; 1191 - } else { 1192 - /* Routes we intend to cache in nexthop exception have 1193 - * the DST_NOCACHE bit clear. However, if we are 1194 - * unsuccessful at storing this route into the cache 1195 - * we really need to set it. 1196 - */ 1197 - rt->dst.flags |= DST_NOCACHE; 1198 1189 } 1199 1190 spin_unlock_bh(&fnhe_lock); 1200 1191 ··· 1210 1215 if (prev == orig) { 1211 1216 if (orig) 1212 1217 rt_free(orig); 1213 - } else { 1214 - /* Routes we intend to cache in the FIB nexthop have 1215 - * the DST_NOCACHE bit clear. However, if we are 1216 - * unsuccessful at storing this route into the cache 1217 - * we really need to set it. 1218 - */ 1219 - rt->dst.flags |= DST_NOCACHE; 1218 + } else 1220 1219 ret = false; 1221 - } 1222 1220 1223 1221 return ret; 1224 1222 } ··· 1272 1284 if (fi) { 1273 1285 struct fib_nh *nh = &FIB_RES_NH(*res); 1274 1286 1275 - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) 1287 + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { 1276 1288 rt->rt_gateway = nh->nh_gw; 1289 + rt->rt_uses_gateway = 1; 1290 + } 1277 1291 dst_init_metrics(&rt->dst, fi->fib_metrics, true); 1278 1292 #ifdef CONFIG_IP_ROUTE_CLASSID 1279 1293 rt->dst.tclassid = nh->nh_tclassid; ··· 1284 1294 cached = rt_bind_exception(rt, fnhe, daddr); 1285 1295 else if (!(rt->dst.flags & DST_NOCACHE)) 1286 1296 cached = rt_cache_route(nh, rt); 1287 - } 1288 - if (unlikely(!cached)) 1297 + if (unlikely(!cached)) { 1298 + /* Routes we intend to cache in nexthop exception or 1299 + * FIB nexthop have the DST_NOCACHE bit clear. 1300 + * However, if we are unsuccessful at storing this 1301 + * route into the cache we really need to set it. 1302 + */ 1303 + rt->dst.flags |= DST_NOCACHE; 1304 + if (!rt->rt_gateway) 1305 + rt->rt_gateway = daddr; 1306 + rt_add_uncached_list(rt); 1307 + } 1308 + } else 1289 1309 rt_add_uncached_list(rt); 1290 1310 1291 1311 #ifdef CONFIG_IP_ROUTE_CLASSID ··· 1363 1363 rth->rt_iif = 0; 1364 1364 rth->rt_pmtu = 0; 1365 1365 rth->rt_gateway = 0; 1366 + rth->rt_uses_gateway = 0; 1366 1367 INIT_LIST_HEAD(&rth->rt_uncached); 1367 1368 if (our) { 1368 1369 rth->dst.input= ip_local_deliver; ··· 1433 1432 return -EINVAL; 1434 1433 } 1435 1434 1436 - 1437 1435 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), 1438 1436 in_dev->dev, in_dev, &itag); 1439 1437 if (err < 0) { ··· 1488 1488 rth->rt_iif = 0; 1489 1489 rth->rt_pmtu = 0; 1490 1490 rth->rt_gateway = 0; 1491 + rth->rt_uses_gateway = 0; 1491 1492 INIT_LIST_HEAD(&rth->rt_uncached); 1492 1493 1493 1494 rth->dst.input = ip_forward; ··· 1659 1658 rth->rt_iif = 0; 1660 1659 rth->rt_pmtu = 0; 1661 1660 rth->rt_gateway = 0; 1661 + rth->rt_uses_gateway = 0; 1662 1662 INIT_LIST_HEAD(&rth->rt_uncached); 1663 1663 if (res.type == RTN_UNREACHABLE) { 1664 1664 rth->dst.input= ip_error; ··· 1828 1826 rth->rt_iif = orig_oif ? : 0; 1829 1827 rth->rt_pmtu = 0; 1830 1828 rth->rt_gateway = 0; 1829 + rth->rt_uses_gateway = 0; 1831 1830 INIT_LIST_HEAD(&rth->rt_uncached); 1832 1831 1833 1832 RT_CACHE_STAT_INC(out_slow_tot); ··· 2107 2104 rt->rt_flags = ort->rt_flags; 2108 2105 rt->rt_type = ort->rt_type; 2109 2106 rt->rt_gateway = ort->rt_gateway; 2107 + rt->rt_uses_gateway = ort->rt_uses_gateway; 2110 2108 2111 2109 INIT_LIST_HEAD(&rt->rt_uncached); 2112 2110 ··· 2186 2182 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) 2187 2183 goto nla_put_failure; 2188 2184 } 2189 - if (rt->rt_gateway && 2185 + if (rt->rt_uses_gateway && 2190 2186 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) 2191 2187 goto nla_put_failure; 2192 2188
+1
net/ipv4/xfrm4_policy.c
··· 91 91 RTCF_LOCAL); 92 92 xdst->u.rt.rt_type = rt->rt_type; 93 93 xdst->u.rt.rt_gateway = rt->rt_gateway; 94 + xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; 94 95 xdst->u.rt.rt_pmtu = rt->rt_pmtu; 95 96 INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); 96 97