Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: add bool confirm_neigh parameter for dst_ops.update_pmtu

The MTU update code is supposed to be invoked in response to real
networking events that update the PMTU. In IPv6 PMTU update function
__ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor
confirmed time.

But for tunnel code, it will call pmtu before xmit, like:
- tnl_update_pmtu()
- skb_dst_update_pmtu()
- ip6_rt_update_pmtu()
- __ip6_rt_update_pmtu()
- dst_confirm_neigh()

If the tunnel remote dst mac address changed and we still do the neigh
confirm, we will not be able to update neigh cache and ping6 remote
will failed.

So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we
should not be invoking dst_confirm_neigh() as we have no evidence
of successful two-way communication at this point.

On the other hand it is also important to keep the neigh reachability fresh
for TCP flows, so we cannot remove this dst_confirm_neigh() call.

To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu
to choose whether we should do neigh update or not. I will add the parameter
in this patch and set all the callers to true to comply with the previous
way, and fix the tunnel code one by one on later patches.

v5: No change.
v4: No change.
v3: Do not remove dst_confirm_neigh, but add a new bool parameter in
dst_ops.update_pmtu to control whether we should do neighbor confirm.
Also split the big patch to small ones for each area.
v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu.

Suggested-by: David Miller <davem@davemloft.net>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Hangbin Liu and committed by
David S. Miller
bd085ef6 ff43ae4b

+42 -25
+1 -1
drivers/net/gtp.c
··· 540 540 mtu = dst_mtu(&rt->dst); 541 541 } 542 542 543 - rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu); 543 + rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true); 544 544 545 545 if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && 546 546 mtu < ntohs(iph->tot_len)) {
+1 -1
include/net/dst.h
··· 516 516 struct dst_entry *dst = skb_dst(skb); 517 517 518 518 if (dst && dst->ops->update_pmtu) 519 - dst->ops->update_pmtu(dst, NULL, skb, mtu); 519 + dst->ops->update_pmtu(dst, NULL, skb, mtu, true); 520 520 } 521 521 522 522 static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
+2 -1
include/net/dst_ops.h
··· 27 27 struct dst_entry * (*negative_advice)(struct dst_entry *); 28 28 void (*link_failure)(struct sk_buff *); 29 29 void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, 30 - struct sk_buff *skb, u32 mtu); 30 + struct sk_buff *skb, u32 mtu, 31 + bool confirm_neigh); 31 32 void (*redirect)(struct dst_entry *dst, struct sock *sk, 32 33 struct sk_buff *skb); 33 34 int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
+2 -1
net/bridge/br_nf_core.c
··· 22 22 #endif 23 23 24 24 static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, 25 - struct sk_buff *skb, u32 mtu) 25 + struct sk_buff *skb, u32 mtu, 26 + bool confirm_neigh) 26 27 { 27 28 } 28 29
+4 -2
net/decnet/dn_route.c
··· 110 110 static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 111 111 static void dn_dst_link_failure(struct sk_buff *); 112 112 static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, 113 - struct sk_buff *skb , u32 mtu); 113 + struct sk_buff *skb , u32 mtu, 114 + bool confirm_neigh); 114 115 static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, 115 116 struct sk_buff *skb); 116 117 static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, ··· 252 251 * advertise to the other end). 253 252 */ 254 253 static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, 255 - struct sk_buff *skb, u32 mtu) 254 + struct sk_buff *skb, u32 mtu, 255 + bool confirm_neigh) 256 256 { 257 257 struct dn_route *rt = (struct dn_route *) dst; 258 258 struct neighbour *n = rt->n;
+1 -1
net/ipv4/inet_connection_sock.c
··· 1086 1086 if (!dst) 1087 1087 goto out; 1088 1088 } 1089 - dst->ops->update_pmtu(dst, sk, NULL, mtu); 1089 + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 1090 1090 1091 1091 dst = __sk_dst_check(sk, 0); 1092 1092 if (!dst)
+6 -3
net/ipv4/route.c
··· 139 139 static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); 140 140 static void ipv4_link_failure(struct sk_buff *skb); 141 141 static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 142 - struct sk_buff *skb, u32 mtu); 142 + struct sk_buff *skb, u32 mtu, 143 + bool confirm_neigh); 143 144 static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, 144 145 struct sk_buff *skb); 145 146 static void ipv4_dst_destroy(struct dst_entry *dst); ··· 1044 1043 } 1045 1044 1046 1045 static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 1047 - struct sk_buff *skb, u32 mtu) 1046 + struct sk_buff *skb, u32 mtu, 1047 + bool confirm_neigh) 1048 1048 { 1049 1049 struct rtable *rt = (struct rtable *) dst; 1050 1050 struct flowi4 fl4; ··· 2689 2687 } 2690 2688 2691 2689 static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 2692 - struct sk_buff *skb, u32 mtu) 2690 + struct sk_buff *skb, u32 mtu, 2691 + bool confirm_neigh) 2693 2692 { 2694 2693 } 2695 2694
+3 -2
net/ipv4/xfrm4_policy.c
··· 100 100 } 101 101 102 102 static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, 103 - struct sk_buff *skb, u32 mtu) 103 + struct sk_buff *skb, u32 mtu, 104 + bool confirm_neigh) 104 105 { 105 106 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 106 107 struct dst_entry *path = xdst->route; 107 108 108 - path->ops->update_pmtu(path, sk, skb, mtu); 109 + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); 109 110 } 110 111 111 112 static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
+1 -1
net/ipv6/inet6_connection_sock.c
··· 146 146 147 147 if (IS_ERR(dst)) 148 148 return NULL; 149 - dst->ops->update_pmtu(dst, sk, NULL, mtu); 149 + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); 150 150 151 151 dst = inet6_csk_route_socket(sk, &fl6); 152 152 return IS_ERR(dst) ? NULL : dst;
+1 -1
net/ipv6/ip6_gre.c
··· 1040 1040 1041 1041 /* TooBig packet may have updated dst->dev's mtu */ 1042 1042 if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) 1043 - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); 1043 + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true); 1044 1044 1045 1045 err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, 1046 1046 NEXTHDR_GRE);
+15 -7
net/ipv6/route.c
··· 95 95 static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); 96 96 static void ip6_link_failure(struct sk_buff *skb); 97 97 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 98 - struct sk_buff *skb, u32 mtu); 98 + struct sk_buff *skb, u32 mtu, 99 + bool confirm_neigh); 99 100 static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, 100 101 struct sk_buff *skb); 101 102 static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, ··· 265 264 } 266 265 267 266 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, 268 - struct sk_buff *skb, u32 mtu) 267 + struct sk_buff *skb, u32 mtu, 268 + bool confirm_neigh) 269 269 { 270 270 } 271 271 ··· 2694 2692 } 2695 2693 2696 2694 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, 2697 - const struct ipv6hdr *iph, u32 mtu) 2695 + const struct ipv6hdr *iph, u32 mtu, 2696 + bool confirm_neigh) 2698 2697 { 2699 2698 const struct in6_addr *daddr, *saddr; 2700 2699 struct rt6_info *rt6 = (struct rt6_info *)dst; ··· 2713 2710 daddr = NULL; 2714 2711 saddr = NULL; 2715 2712 } 2716 - dst_confirm_neigh(dst, daddr); 2713 + 2714 + if (confirm_neigh) 2715 + dst_confirm_neigh(dst, daddr); 2716 + 2717 2717 mtu = max_t(u32, mtu, IPV6_MIN_MTU); 2718 2718 if (mtu >= dst_mtu(dst)) 2719 2719 return; ··· 2770 2764 } 2771 2765 2772 2766 static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, 2773 - struct sk_buff *skb, u32 mtu) 2767 + struct sk_buff *skb, u32 mtu, 2768 + bool confirm_neigh) 2774 2769 { 2775 - __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); 2770 + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, 2771 + confirm_neigh); 2776 2772 } 2777 2773 2778 2774 void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, ··· 2793 2785 2794 2786 dst = ip6_route_output(net, NULL, &fl6); 2795 2787 if (!dst->error) 2796 - __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); 2788 + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); 2797 2789 dst_release(dst); 2798 2790 } 2799 2791 EXPORT_SYMBOL_GPL(ip6_update_pmtu);
+3 -2
net/ipv6/xfrm6_policy.c
··· 98 98 } 99 99 100 100 static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, 101 - struct sk_buff *skb, u32 mtu) 101 + struct sk_buff *skb, u32 mtu, 102 + bool confirm_neigh) 102 103 { 103 104 struct xfrm_dst *xdst = (struct xfrm_dst *)dst; 104 105 struct dst_entry *path = xdst->route; 105 106 106 - path->ops->update_pmtu(path, sk, skb, mtu); 107 + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); 107 108 } 108 109 109 110 static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
+1 -1
net/netfilter/ipvs/ip_vs_xmit.c
··· 208 208 struct rtable *ort = skb_rtable(skb); 209 209 210 210 if (!skb->dev && sk && sk_fullsock(sk)) 211 - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); 211 + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); 212 212 } 213 213 214 214 static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+1 -1
net/sctp/transport.c
··· 263 263 264 264 pf->af->from_sk(&addr, sk); 265 265 pf->to_sk_daddr(&t->ipaddr, sk); 266 - dst->ops->update_pmtu(dst, sk, NULL, pmtu); 266 + dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); 267 267 pf->to_sk_daddr(&addr, sk); 268 268 269 269 dst = sctp_transport_dst_check(t);