Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[IPSEC]: Add XFRM_STATE_NOPMTUDISC flag

This patch adds the flag XFRM_STATE_NOPMTUDISC for xfrm states. It is
similar to the nopmtudisc on IPIP/GRE tunnels. It only has an effect
on IPv4 tunnel mode states. For these states, it will ensure that the
DF flag is always cleared.

This is primarily useful to work around ICMP blackholes.

In future this flag could also allow a larger MTU to be set within the
tunnel just like IPIP/GRE tunnels. This could be useful for short haul
tunnels where temporary fragmentation outside the tunnel is desired over
smaller fragments inside the tunnel.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: James Morris <jmorris@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Herbert Xu and committed by
David S. Miller
dd87147e d094cd83

+21 -2
+1
include/linux/pfkeyv2.h
··· 245 245 246 246 /* Security Association flags */ 247 247 #define SADB_SAFLAGS_PFS 1 248 + #define SADB_SAFLAGS_NOPMTUDISC 0x20000000 248 249 #define SADB_SAFLAGS_DECAP_DSCP 0x40000000 249 250 #define SADB_SAFLAGS_NOECN 0x80000000 250 251
+1
include/linux/xfrm.h
··· 196 196 __u8 flags; 197 197 #define XFRM_STATE_NOECN 1 198 198 #define XFRM_STATE_DECAP_DSCP 2 199 + #define XFRM_STATE_NOPMTUDISC 4 199 200 }; 200 201 201 202 struct xfrm_usersa_id {
+6 -2
net/ipv4/xfrm4_output.c
··· 33 33 struct dst_entry *dst = skb->dst; 34 34 struct xfrm_state *x = dst->xfrm; 35 35 struct iphdr *iph, *top_iph; 36 + int flags; 36 37 37 38 iph = skb->nh.iph; 38 39 skb->h.ipiph = iph; ··· 52 51 53 52 /* DS disclosed */ 54 53 top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos); 55 - if (x->props.flags & XFRM_STATE_NOECN) 54 + 55 + flags = x->props.flags; 56 + if (flags & XFRM_STATE_NOECN) 56 57 IP_ECN_clear(top_iph); 57 58 58 - top_iph->frag_off = iph->frag_off & htons(IP_DF); 59 + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 60 + 0 : (iph->frag_off & htons(IP_DF)); 59 61 if (!top_iph->frag_off) 60 62 __ip_select_ident(top_iph, dst, 0); 61 63
+9
net/ipv4/xfrm4_state.c
··· 7 7 * 8 8 */ 9 9 10 + #include <net/ip.h> 10 11 #include <net/xfrm.h> 11 12 #include <linux/pfkeyv2.h> 12 13 #include <linux/ipsec.h> 13 14 14 15 static struct xfrm_state_afinfo xfrm4_state_afinfo; 16 + 17 + static int xfrm4_init_flags(struct xfrm_state *x) 18 + { 19 + if (ipv4_config.no_pmtu_disc) 20 + x->props.flags |= XFRM_STATE_NOPMTUDISC; 21 + return 0; 22 + } 15 23 16 24 static void 17 25 __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, ··· 117 109 static struct xfrm_state_afinfo xfrm4_state_afinfo = { 118 110 .family = AF_INET, 119 111 .lock = RW_LOCK_UNLOCKED, 112 + .init_flags = xfrm4_init_flags, 120 113 .init_tempsel = __xfrm4_init_tempsel, 121 114 .state_lookup = __xfrm4_state_lookup, 122 115 .find_acq = __xfrm4_find_acq,
+4
net/key/af_key.c
··· 690 690 sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; 691 691 if (x->props.flags & XFRM_STATE_DECAP_DSCP) 692 692 sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; 693 + if (x->props.flags & XFRM_STATE_NOPMTUDISC) 694 + sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC; 693 695 694 696 /* hard time */ 695 697 if (hsc & 2) { ··· 976 974 x->props.flags |= XFRM_STATE_NOECN; 977 975 if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) 978 976 x->props.flags |= XFRM_STATE_DECAP_DSCP; 977 + if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) 978 + x->props.flags |= XFRM_STATE_NOPMTUDISC; 979 979 980 980 lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1]; 981 981 if (lifetime != NULL) {