Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: Abstract default ADVMSS behind an accessor.

Make all RTAX_ADVMSS metric accesses go through a new helper function,
dst_metric_advmss().

Leave the actual default metric as "zero" in the real metric slot,
and compute the actual default value dynamically via a new dst_ops
AF specific callback.

For stacked IPSEC routes, we use the advmss of the path which
preserves existing behavior.

Unlike ipv4/ipv6, DecNET ties the advmss to the mtu and thus updates
advmss on pmtu updates. This inconsistency in advmss handling
results in more raw metric accesses than I wish we ended up with.

Signed-off-by: David S. Miller <davem@davemloft.net>

+75 -33
+1 -1
drivers/scsi/cxgbi/libcxgbi.c
··· 825 825 unsigned int idx; 826 826 struct dst_entry *dst = csk->dst; 827 827 828 - csk->advmss = dst_metric(dst, RTAX_ADVMSS); 828 + csk->advmss = dst_metric_advmss(dst); 829 829 830 830 if (csk->advmss > pmtu - 40) 831 831 csk->advmss = pmtu - 40;
+13 -1
include/net/dst.h
··· 112 112 static inline u32 113 113 dst_metric(const struct dst_entry *dst, const int metric) 114 114 { 115 - WARN_ON_ONCE(metric == RTAX_HOPLIMIT); 115 + WARN_ON_ONCE(metric == RTAX_HOPLIMIT || 116 + metric == RTAX_ADVMSS); 116 117 return dst_metric_raw(dst, metric); 118 + } 119 + 120 + static inline u32 121 + dst_metric_advmss(const struct dst_entry *dst) 122 + { 123 + u32 advmss = dst_metric_raw(dst, RTAX_ADVMSS); 124 + 125 + if (!advmss) 126 + advmss = dst->ops->default_advmss(dst); 127 + 128 + return advmss; 117 129 } 118 130 119 131 static inline void dst_metric_set(struct dst_entry *dst, int metric, u32 val)
+1
include/net/dst_ops.h
··· 16 16 17 17 int (*gc)(struct dst_ops *ops); 18 18 struct dst_entry * (*check)(struct dst_entry *, __u32 cookie); 19 + unsigned int (*default_advmss)(const struct dst_entry *); 19 20 void (*destroy)(struct dst_entry *); 20 21 void (*ifdown)(struct dst_entry *, 21 22 struct net_device *dev, int how);
+2 -2
net/decnet/af_decnet.c
··· 829 829 return -EINVAL; 830 830 831 831 scp->state = DN_CC; 832 - scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS); 832 + scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk)); 833 833 dn_send_conn_conf(sk, allocation); 834 834 835 835 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); ··· 958 958 sk->sk_route_caps = sk->sk_dst_cache->dev->features; 959 959 sock->state = SS_CONNECTING; 960 960 scp->state = DN_CI; 961 - scp->segsize_loc = dst_metric(sk->sk_dst_cache, RTAX_ADVMSS); 961 + scp->segsize_loc = dst_metric_advmss(sk->sk_dst_cache); 962 962 963 963 dn_nsp_send_conninit(sk, NSP_CI); 964 964 err = -EINPROGRESS;
+16 -6
net/decnet/dn_route.c
··· 110 110 111 111 static int dn_dst_gc(struct dst_ops *ops); 112 112 static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 113 + static unsigned int dn_dst_default_advmss(const struct dst_entry *dst); 113 114 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 114 115 static void dn_dst_link_failure(struct sk_buff *); 115 116 static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); ··· 130 129 .gc_thresh = 128, 131 130 .gc = dn_dst_gc, 132 131 .check = dn_dst_check, 132 + .default_advmss = dn_dst_default_advmss, 133 133 .negative_advice = dn_dst_negative_advice, 134 134 .link_failure = dn_dst_link_failure, 135 135 .update_pmtu = dn_dst_update_pmtu, ··· 247 245 } 248 246 if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { 249 247 u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; 250 - if (dst_metric(dst, RTAX_ADVMSS) > mss) 248 + u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS); 249 + if (!existing_mss || existing_mss > mss) 251 250 dst_metric_set(dst, RTAX_ADVMSS, mss); 252 251 } 253 252 } ··· 798 795 return NET_RX_DROP; 799 796 } 800 797 798 + static unsigned int dn_dst_default_advmss(const struct dst_entry *dst) 799 + { 800 + return dn_mss_from_pmtu(dst->dev, dst_mtu(dst)); 801 + } 802 + 801 803 static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) 802 804 { 803 805 struct dn_fib_info *fi = res->fi; 804 806 struct net_device *dev = rt->dst.dev; 805 807 struct neighbour *n; 806 - unsigned mss; 808 + unsigned int metric; 807 809 808 810 if (fi) { 809 811 if (DN_FIB_RES_GW(*res) && ··· 828 820 if (dst_metric(&rt->dst, RTAX_MTU) == 0 || 829 821 dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu) 830 822 dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu); 831 - mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); 832 - if (dst_metric(&rt->dst, RTAX_ADVMSS) == 0 || 833 - dst_metric(&rt->dst, RTAX_ADVMSS) > mss) 834 - dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); 823 + metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS); 824 + if (metric) { 825 + unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst)); 826 + if (metric > mss) 827 + dst_metric_set(&rt->dst, RTAX_ADVMSS, mss); 828 + } 835 829 return 0; 836 830 } 837 831
+17 -7
net/ipv4/route.c
··· 139 139 */ 140 140 141 141 static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); 142 + static unsigned int ipv4_default_advmss(const struct dst_entry *dst); 142 143 static void ipv4_dst_destroy(struct dst_entry *dst); 143 144 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 144 145 static void ipv4_link_failure(struct sk_buff *skb); ··· 156 155 .protocol = cpu_to_be16(ETH_P_IP), 157 156 .gc = rt_garbage_collect, 158 157 .check = ipv4_dst_check, 158 + .default_advmss = ipv4_default_advmss, 159 159 .destroy = ipv4_dst_destroy, 160 160 .ifdown = ipv4_dst_ifdown, 161 161 .negative_advice = ipv4_negative_advice, ··· 385 383 (__force u32)r->rt_gateway, 386 384 r->rt_flags, atomic_read(&r->dst.__refcnt), 387 385 r->dst.__use, 0, (__force u32)r->rt_src, 388 - (dst_metric(&r->dst, RTAX_ADVMSS) ? 389 - (int)dst_metric(&r->dst, RTAX_ADVMSS) + 40 : 0), 386 + dst_metric_advmss(&r->dst) + 40, 390 387 dst_metric(&r->dst, RTAX_WINDOW), 391 388 (int)((dst_metric(&r->dst, RTAX_RTT) >> 3) + 392 389 dst_metric(&r->dst, RTAX_RTTVAR)), ··· 1799 1798 } 1800 1799 #endif 1801 1800 1801 + static unsigned int ipv4_default_advmss(const struct dst_entry *dst) 1802 + { 1803 + unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS); 1804 + 1805 + if (advmss == 0) { 1806 + advmss = max_t(unsigned int, dst->dev->mtu - 40, 1807 + ip_rt_min_advmss); 1808 + if (advmss > 65535 - 40) 1809 + advmss = 65535 - 40; 1810 + } 1811 + return advmss; 1812 + } 1813 + 1802 1814 static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) 1803 1815 { 1804 1816 struct dst_entry *dst = &rt->dst; ··· 1837 1823 1838 1824 if (dst_mtu(dst) > IP_MAX_MTU) 1839 1825 dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); 1840 - if (dst_metric(dst, RTAX_ADVMSS) == 0) 1841 - dst_metric_set(dst, RTAX_ADVMSS, 1842 - max_t(unsigned int, dst->dev->mtu - 40, 1843 - ip_rt_min_advmss)); 1844 - if (dst_metric(dst, RTAX_ADVMSS) > 65535 - 40) 1826 + if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) 1845 1827 dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); 1846 1828 1847 1829 #ifdef CONFIG_NET_CLS_ROUTE
+1 -1
net/ipv4/tcp_ipv4.c
··· 1436 1436 1437 1437 tcp_mtup_init(newsk); 1438 1438 tcp_sync_mss(newsk, dst_mtu(dst)); 1439 - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 1439 + newtp->advmss = dst_metric_advmss(dst); 1440 1440 if (tcp_sk(sk)->rx_opt.user_mss && 1441 1441 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) 1442 1442 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
+9 -5
net/ipv4/tcp_output.c
··· 119 119 struct dst_entry *dst = __sk_dst_get(sk); 120 120 int mss = tp->advmss; 121 121 122 - if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { 123 - mss = dst_metric(dst, RTAX_ADVMSS); 124 - tp->advmss = mss; 122 + if (dst) { 123 + unsigned int metric = dst_metric_advmss(dst); 124 + 125 + if (metric < mss) { 126 + mss = metric; 127 + tp->advmss = mss; 128 + } 125 129 } 126 130 127 131 return (__u16)mss; ··· 2426 2422 2427 2423 skb_dst_set(skb, dst_clone(dst)); 2428 2424 2429 - mss = dst_metric(dst, RTAX_ADVMSS); 2425 + mss = dst_metric_advmss(dst); 2430 2426 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2431 2427 mss = tp->rx_opt.user_mss; 2432 2428 ··· 2560 2556 2561 2557 if (!tp->window_clamp) 2562 2558 tp->window_clamp = dst_metric(dst, RTAX_WINDOW); 2563 - tp->advmss = dst_metric(dst, RTAX_ADVMSS); 2559 + tp->advmss = dst_metric_advmss(dst); 2564 2560 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) 2565 2561 tp->advmss = tp->rx_opt.user_mss; 2566 2562
+7 -9
net/ipv6/route.c
··· 76 76 77 77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort); 78 78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); 79 + static unsigned int ip6_default_advmss(const struct dst_entry *dst); 79 80 static struct dst_entry *ip6_negative_advice(struct dst_entry *); 80 81 static void ip6_dst_destroy(struct dst_entry *); 81 82 static void ip6_dst_ifdown(struct dst_entry *, ··· 104 103 .gc = ip6_dst_gc, 105 104 .gc_thresh = 1024, 106 105 .check = ip6_dst_check, 106 + .default_advmss = ip6_default_advmss, 107 107 .destroy = ip6_dst_destroy, 108 108 .ifdown = ip6_dst_ifdown, 109 109 .negative_advice = ip6_negative_advice, ··· 939 937 940 938 static int ipv6_get_mtu(struct net_device *dev); 941 939 942 - static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu) 940 + static unsigned int ip6_default_advmss(const struct dst_entry *dst) 943 941 { 942 + struct net_device *dev = dst->dev; 943 + unsigned int mtu = dst_mtu(dst); 944 + struct net *net = dev_net(dev); 945 + 944 946 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr); 945 947 946 948 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss) ··· 996 990 atomic_set(&rt->dst.__refcnt, 1); 997 991 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255); 998 992 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); 999 - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); 1000 993 rt->dst.output = ip6_output; 1001 994 1002 995 #if 0 /* there's no chance to use these for ndisc */ ··· 1317 1312 1318 1313 if (!dst_mtu(&rt->dst)) 1319 1314 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(dev)); 1320 - if (!dst_metric(&rt->dst, RTAX_ADVMSS)) 1321 - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); 1322 1315 rt->dst.dev = dev; 1323 1316 rt->rt6i_idev = idev; 1324 1317 rt->rt6i_table = table; ··· 1543 1540 nrt->rt6i_nexthop = neigh_clone(neigh); 1544 1541 /* Reset pmtu, it may be better */ 1545 1542 dst_metric_set(&nrt->dst, RTAX_MTU, ipv6_get_mtu(neigh->dev)); 1546 - dst_metric_set(&nrt->dst, RTAX_ADVMSS, ipv6_advmss(dev_net(neigh->dev), 1547 - dst_mtu(&nrt->dst))); 1548 1543 1549 1544 if (ip6_ins_rt(nrt)) 1550 1545 goto out; ··· 1972 1971 rt->rt6i_dev = net->loopback_dev; 1973 1972 rt->rt6i_idev = idev; 1974 1973 dst_metric_set(&rt->dst, RTAX_MTU, ipv6_get_mtu(rt->rt6i_dev)); 1975 - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, dst_mtu(&rt->dst))); 1976 1974 dst_metric_set(&rt->dst, RTAX_HOPLIMIT, -1); 1977 1975 rt->dst.obsolete = -1; 1978 1976 ··· 2041 2041 { 2042 2042 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg; 2043 2043 struct inet6_dev *idev; 2044 - struct net *net = dev_net(arg->dev); 2045 2044 2046 2045 /* In IPv6 pmtu discovery is not optional, 2047 2046 so that RTAX_MTU lock cannot disable it. ··· 2072 2073 (dst_mtu(&rt->dst) < arg->mtu && 2073 2074 dst_mtu(&rt->dst) == idev->cnf.mtu6))) { 2074 2075 dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); 2075 - dst_metric_set(&rt->dst, RTAX_ADVMSS, ipv6_advmss(net, arg->mtu)); 2076 2076 } 2077 2077 return 0; 2078 2078 }
+1 -1
net/ipv6/tcp_ipv6.c
··· 1521 1521 1522 1522 tcp_mtup_init(newsk); 1523 1523 tcp_sync_mss(newsk, dst_mtu(dst)); 1524 - newtp->advmss = dst_metric(dst, RTAX_ADVMSS); 1524 + newtp->advmss = dst_metric_advmss(dst); 1525 1525 tcp_initialize_rcv_mss(newsk); 1526 1526 1527 1527 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
+7
net/xfrm/xfrm_policy.c
··· 2361 2361 return 1; 2362 2362 } 2363 2363 2364 + static unsigned int xfrm_default_advmss(const struct dst_entry *dst) 2365 + { 2366 + return dst_metric_advmss(dst->path); 2367 + } 2368 + 2364 2369 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2365 2370 { 2366 2371 struct net *net; ··· 2383 2378 dst_ops->kmem_cachep = xfrm_dst_cache; 2384 2379 if (likely(dst_ops->check == NULL)) 2385 2380 dst_ops->check = xfrm_dst_check; 2381 + if (likely(dst_ops->default_advmss == NULL)) 2382 + dst_ops->default_advmss = xfrm_default_advmss; 2386 2383 if (likely(dst_ops->negative_advice == NULL)) 2387 2384 dst_ops->negative_advice = xfrm_negative_advice; 2388 2385 if (likely(dst_ops->link_failure == NULL))