Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv4: Maintain redirect and PMTU info in struct rtable again.

Maintaining this in the inetpeer entries was not the right way to do
this at all.

Signed-off-by: David S. Miller <davem@davemloft.net>

+41 -154
-4
include/net/inetpeer.h
··· 36 36 u32 metrics[RTAX_MAX]; 37 37 u32 rate_tokens; /* rate limiting for ICMP */ 38 38 unsigned long rate_last; 39 - unsigned long pmtu_expires; 40 - u32 pmtu_orig; 41 - u32 pmtu_learned; 42 - struct inetpeer_addr_base redirect_learned; 43 39 union { 44 40 struct list_head gc_list; 45 41 struct rcu_head gc_rcu;
+1 -1
include/net/route.h
··· 65 65 __be32 rt_gateway; 66 66 67 67 /* Miscellaneous cached information */ 68 - u32 rt_peer_genid; 68 + u32 rt_pmtu; 69 69 unsigned long _peer; /* long-living peer info */ 70 70 struct fib_info *fi; /* for client ref to shared metrics */ 71 71 };
-3
net/ipv4/inetpeer.c
··· 511 511 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; 512 512 p->rate_tokens = 0; 513 513 p->rate_last = 0; 514 - p->pmtu_expires = 0; 515 - p->pmtu_orig = 0; 516 - memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); 517 514 INIT_LIST_HEAD(&p->gc_list); 518 515 519 516 /* Link the node. */
+39 -146
net/ipv4/route.c
··· 669 669 static inline int rt_valuable(struct rtable *rth) 670 670 { 671 671 return (rth->rt_flags & (RTCF_REDIRECTED | RTCF_NOTIFY)) || 672 - (rt_has_peer(rth) && rt_peer_ptr(rth)->pmtu_expires); 672 + rth->dst.expires; 673 673 } 674 674 675 675 static int rt_may_expire(struct rtable *rth, unsigned long tmo1, unsigned long tmo2) ··· 1242 1242 return rt; 1243 1243 } 1244 1244 1245 - static atomic_t __rt_peer_genid = ATOMIC_INIT(0); 1246 - 1247 - static u32 rt_peer_genid(void) 1248 - { 1249 - return atomic_read(&__rt_peer_genid); 1250 - } 1251 - 1252 1245 void rt_bind_peer(struct rtable *rt, __be32 daddr, int create) 1253 1246 { 1254 1247 struct inet_peer_base *base; ··· 1255 1262 if (peer) { 1256 1263 if (!rt_set_peer(rt, peer)) 1257 1264 inet_putpeer(peer); 1258 - else 1259 - rt->rt_peer_genid = rt_peer_genid(); 1260 1265 } 1261 1266 } 1262 1267 ··· 1314 1323 spin_unlock_bh(rt_hash_lock_addr(hash)); 1315 1324 } 1316 1325 1317 - static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer) 1318 - { 1319 - struct rtable *rt = (struct rtable *) dst; 1320 - __be32 orig_gw = rt->rt_gateway; 1321 - struct neighbour *n; 1322 - 1323 - dst_confirm(&rt->dst); 1324 - 1325 - rt->rt_gateway = peer->redirect_learned.a4; 1326 - 1327 - n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway); 1328 - if (!n) { 1329 - rt->rt_gateway = orig_gw; 1330 - return; 1331 - } 1332 - if (!(n->nud_state & NUD_VALID)) { 1333 - neigh_event_send(n, NULL); 1334 - } else { 1335 - rt->rt_flags |= RTCF_REDIRECTED; 1336 - call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); 1337 - } 1338 - neigh_release(n); 1339 - } 1340 - 1341 1326 /* called in rcu_read_lock() section */ 1342 1327 void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, 1343 1328 __be32 saddr, struct net_device *dev) ··· 1322 1355 struct in_device *in_dev = __in_dev_get_rcu(dev); 1323 1356 __be32 skeys[2] = { saddr, 0 }; 1324 1357 int ikeys[2] = { dev->ifindex, 0 }; 1325 - struct inet_peer *peer; 1326 1358 struct net *net; 1327 1359 1328 1360 if (!in_dev) ··· 1354 1388 rthp = &rt_hash_table[hash].chain; 1355 1389 1356 1390 while ((rt = rcu_dereference(*rthp)) != NULL) { 1391 + struct neighbour *n; 1392 + 1357 1393 rthp = &rt->dst.rt_next; 1358 1394 1359 1395 if (rt->rt_key_dst != daddr || ··· 1369 1401 rt->rt_gateway != old_gw) 1370 1402 continue; 1371 1403 1372 - peer = rt_get_peer_create(rt, rt->rt_dst); 1373 - if (peer) { 1374 - if (peer->redirect_learned.a4 != new_gw) { 1375 - peer->redirect_learned.a4 = new_gw; 1376 - atomic_inc(&__rt_peer_genid); 1404 + n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw); 1405 + if (n) { 1406 + if (!(n->nud_state & NUD_VALID)) { 1407 + neigh_event_send(n, NULL); 1408 + } else { 1409 + rt->rt_gateway = new_gw; 1410 + rt->rt_flags |= RTCF_REDIRECTED; 1411 + call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); 1377 1412 } 1378 - check_peer_redir(&rt->dst, peer); 1413 + neigh_release(n); 1379 1414 } 1380 1415 } 1381 1416 } ··· 1396 1425 ; 1397 1426 } 1398 1427 1399 - static bool peer_pmtu_expired(struct inet_peer *peer) 1400 - { 1401 - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); 1402 - 1403 - return orig && 1404 - time_after_eq(jiffies, orig) && 1405 - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; 1406 - } 1407 - 1408 - static bool peer_pmtu_cleaned(struct inet_peer *peer) 1409 - { 1410 - unsigned long orig = ACCESS_ONCE(peer->pmtu_expires); 1411 - 1412 - return orig && 1413 - cmpxchg(&peer->pmtu_expires, orig, 0) == orig; 1414 - } 1415 - 1416 1428 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst) 1417 1429 { 1418 1430 struct rtable *rt = (struct rtable *)dst; ··· 1405 1451 if (dst->obsolete > 0) { 1406 1452 ip_rt_put(rt); 1407 1453 ret = NULL; 1408 - } else if (rt->rt_flags & RTCF_REDIRECTED) { 1454 + } else if ((rt->rt_flags & RTCF_REDIRECTED) || 1455 + rt->dst.expires) { 1409 1456 unsigned int hash = rt_hash(rt->rt_key_dst, rt->rt_key_src, 1410 1457 rt->rt_oif, 1411 1458 rt_genid(dev_net(dst->dev))); 1412 1459 rt_del(hash, rt); 1413 1460 ret = NULL; 1414 - } else if (rt_has_peer(rt)) { 1415 - struct inet_peer *peer = rt_peer_ptr(rt); 1416 - if (peer_pmtu_expired(peer)) 1417 - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); 1418 1461 } 1419 1462 } 1420 1463 return ret; ··· 1555 1604 return 0; 1556 1605 } 1557 1606 1558 - static void check_peer_pmtu(struct dst_entry *dst, struct inet_peer *peer) 1559 - { 1560 - unsigned long expires = ACCESS_ONCE(peer->pmtu_expires); 1561 - 1562 - if (!expires) 1563 - return; 1564 - if (time_before(jiffies, expires)) { 1565 - u32 orig_dst_mtu = dst_mtu(dst); 1566 - if (peer->pmtu_learned < orig_dst_mtu) { 1567 - if (!peer->pmtu_orig) 1568 - peer->pmtu_orig = dst_metric_raw(dst, RTAX_MTU); 1569 - dst_metric_set(dst, RTAX_MTU, peer->pmtu_learned); 1570 - } 1571 - } else if (cmpxchg(&peer->pmtu_expires, expires, 0) == expires) 1572 - dst_metric_set(dst, RTAX_MTU, peer->pmtu_orig); 1573 - } 1574 - 1575 1607 static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) 1576 1608 { 1577 1609 struct rtable *rt = (struct rtable *) dst; 1578 - struct inet_peer *peer; 1579 1610 1580 1611 dst_confirm(dst); 1581 1612 1582 - peer = rt_get_peer_create(rt, rt->rt_dst); 1583 - if (peer) { 1584 - unsigned long pmtu_expires = ACCESS_ONCE(peer->pmtu_expires); 1613 + if (mtu < ip_rt_min_pmtu) 1614 + mtu = ip_rt_min_pmtu; 1585 1615 1586 - if (mtu < ip_rt_min_pmtu) 1587 - mtu = ip_rt_min_pmtu; 1588 - if (!pmtu_expires || mtu < peer->pmtu_learned) { 1589 - 1590 - pmtu_expires = jiffies + ip_rt_mtu_expires; 1591 - if (!pmtu_expires) 1592 - pmtu_expires = 1UL; 1593 - 1594 - peer->pmtu_learned = mtu; 1595 - peer->pmtu_expires = pmtu_expires; 1596 - 1597 - atomic_inc(&__rt_peer_genid); 1598 - rt->rt_peer_genid = rt_peer_genid(); 1599 - } 1600 - check_peer_pmtu(dst, peer); 1601 - } 1616 + rt->rt_pmtu = mtu; 1617 + dst_set_expires(&rt->dst, ip_rt_mtu_expires); 1602 1618 } 1603 1619 1604 1620 void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, ··· 1597 1679 } 1598 1680 EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); 1599 1681 1600 - static void ipv4_validate_peer(struct rtable *rt) 1601 - { 1602 - if (rt->rt_peer_genid != rt_peer_genid()) { 1603 - struct inet_peer *peer = rt_get_peer(rt, rt->rt_dst); 1604 - 1605 - if (peer) { 1606 - check_peer_pmtu(&rt->dst, peer); 1607 - 1608 - if (peer->redirect_learned.a4 && 1609 - peer->redirect_learned.a4 != rt->rt_gateway) 1610 - check_peer_redir(&rt->dst, peer); 1611 - } 1612 - 1613 - rt->rt_peer_genid = rt_peer_genid(); 1614 - } 1615 - } 1616 - 1617 1682 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) 1618 1683 { 1619 1684 struct rtable *rt = (struct rtable *) dst; 1620 1685 1621 1686 if (rt_is_expired(rt)) 1622 1687 return NULL; 1623 - ipv4_validate_peer(rt); 1624 1688 return dst; 1625 1689 } 1626 1690 ··· 1628 1728 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); 1629 1729 1630 1730 rt = skb_rtable(skb); 1631 - if (rt && rt_has_peer(rt)) { 1632 - struct inet_peer *peer = rt_peer_ptr(rt); 1633 - if (peer_pmtu_cleaned(peer)) 1634 - dst_metric_set(&rt->dst, RTAX_MTU, peer->pmtu_orig); 1635 - } 1731 + if (rt) 1732 + dst_set_expires(&rt->dst, 0); 1636 1733 } 1637 1734 1638 1735 static int ip_rt_bug(struct sk_buff *skb) ··· 1709 1812 static unsigned int ipv4_mtu(const struct dst_entry *dst) 1710 1813 { 1711 1814 const struct rtable *rt = (const struct rtable *) dst; 1712 - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); 1815 + unsigned int mtu = rt->rt_pmtu; 1816 + 1817 + if (mtu && time_after_eq(jiffies, rt->dst.expires)) 1818 + mtu = 0; 1819 + 1820 + if (!mtu) 1821 + mtu = dst_metric_raw(dst, RTAX_MTU); 1713 1822 1714 1823 if (mtu && rt_is_output_route(rt)) 1715 1824 return mtu; ··· 1746 1843 peer = inet_getpeer_v4(base, rt->rt_dst, 0); 1747 1844 if (peer) { 1748 1845 __rt_set_peer(rt, peer); 1749 - rt->rt_peer_genid = rt_peer_genid(); 1750 1846 if (inet_metrics_new(peer)) 1751 1847 memcpy(peer->metrics, fi->fib_metrics, 1752 1848 sizeof(u32) * RTAX_MAX); 1753 1849 dst_init_metrics(&rt->dst, peer->metrics, false); 1754 - 1755 - check_peer_pmtu(&rt->dst, peer); 1756 - 1757 - if (peer->redirect_learned.a4 && 1758 - peer->redirect_learned.a4 != rt->rt_gateway) { 1759 - rt->rt_gateway = peer->redirect_learned.a4; 1760 - rt->rt_flags |= RTCF_REDIRECTED; 1761 - } 1762 1850 } else { 1763 1851 if (fi->fib_metrics != (u32 *) dst_default_metrics) { 1764 1852 rt->fi = fi; ··· 1849 1955 rth->rt_iif = dev->ifindex; 1850 1956 rth->rt_oif = 0; 1851 1957 rth->rt_mark = skb->mark; 1958 + rth->rt_pmtu = 0; 1852 1959 rth->rt_gateway = daddr; 1853 - rth->rt_peer_genid = 0; 1854 1960 rt_init_peer(rth, dev_net(dev)->ipv4.peers); 1855 1961 rth->fi = NULL; 1856 1962 if (our) { ··· 1975 2081 rth->rt_iif = in_dev->dev->ifindex; 1976 2082 rth->rt_oif = 0; 1977 2083 rth->rt_mark = skb->mark; 2084 + rth->rt_pmtu = 0; 1978 2085 rth->rt_gateway = daddr; 1979 - rth->rt_peer_genid = 0; 1980 2086 rt_init_peer(rth, &res->table->tb_peers); 1981 2087 rth->fi = NULL; 1982 2088 ··· 2154 2260 rth->rt_iif = dev->ifindex; 2155 2261 rth->rt_oif = 0; 2156 2262 rth->rt_mark = skb->mark; 2263 + rth->rt_pmtu = 0; 2157 2264 rth->rt_gateway = daddr; 2158 - rth->rt_peer_genid = 0; 2159 2265 rt_init_peer(rth, net->ipv4.peers); 2160 2266 rth->fi = NULL; 2161 2267 if (res.type == RTN_UNREACHABLE) { ··· 2231 2337 rth->rt_mark == skb->mark && 2232 2338 net_eq(dev_net(rth->dst.dev), net) && 2233 2339 !rt_is_expired(rth)) { 2234 - ipv4_validate_peer(rth); 2235 2340 if (noref) { 2236 2341 dst_use_noref(&rth->dst, jiffies); 2237 2342 skb_dst_set_noref(skb, &rth->dst); ··· 2352 2459 rth->rt_iif = orig_oif ? : dev_out->ifindex; 2353 2460 rth->rt_oif = orig_oif; 2354 2461 rth->rt_mark = fl4->flowi4_mark; 2462 + rth->rt_pmtu = 0; 2355 2463 rth->rt_gateway = fl4->daddr; 2356 - rth->rt_peer_genid = 0; 2357 2464 rt_init_peer(rth, (res->table ? 2358 2465 &res->table->tb_peers : 2359 2466 dev_net(dev_out)->ipv4.peers)); ··· 2610 2717 (IPTOS_RT_MASK | RTO_ONLINK)) && 2611 2718 net_eq(dev_net(rth->dst.dev), net) && 2612 2719 !rt_is_expired(rth)) { 2613 - ipv4_validate_peer(rth); 2614 2720 dst_use(&rth->dst, jiffies); 2615 2721 RT_CACHE_STAT_INC(out_hit); 2616 2722 rcu_read_unlock_bh(); ··· 2686 2794 rt->rt_iif = ort->rt_iif; 2687 2795 rt->rt_oif = ort->rt_oif; 2688 2796 rt->rt_mark = ort->rt_mark; 2797 + rt->rt_pmtu = ort->rt_pmtu; 2689 2798 2690 2799 rt->rt_genid = rt_genid(net); 2691 2800 rt->rt_flags = ort->rt_flags; ··· 2789 2896 const struct inet_peer *peer = rt_peer_ptr(rt); 2790 2897 inet_peer_refcheck(peer); 2791 2898 id = atomic_read(&peer->ip_id_count) & 0xffff; 2792 - expires = ACCESS_ONCE(peer->pmtu_expires); 2793 - if (expires) { 2794 - if (time_before(jiffies, expires)) 2795 - expires -= jiffies; 2796 - else 2797 - expires = 0; 2798 - } 2899 + } 2900 + expires = rt->dst.expires; 2901 + if (expires) { 2902 + if (time_before(jiffies, expires)) 2903 + expires -= jiffies; 2904 + else 2905 + expires = 0; 2799 2906 } 2800 2907 2801 2908 if (rt_is_input_route(rt)) {
+1
net/ipv4/xfrm4_policy.c
··· 100 100 xdst->u.rt.rt_src = rt->rt_src; 101 101 xdst->u.rt.rt_dst = rt->rt_dst; 102 102 xdst->u.rt.rt_gateway = rt->rt_gateway; 103 + xdst->u.rt.rt_pmtu = rt->rt_pmtu; 103 104 104 105 return 0; 105 106 }