Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mpls: allow TTL propagation from IP packets to be configured

Allow TTL propagation from IP packets to MPLS packets to be
configured. Add a new optional LWT attribute, MPLS_IPTUNNEL_TTL, which
allows the TTL to be set in the resulting MPLS packet, with the value
of 0 having the semantics of enabling propagation of the TTL from the
IP header (i.e. non-zero values disable propagation).

Also allow the configuration to be overridden globally by reusing the
same sysctl to control whether the TTL is propagated from IP packets
into the MPLS header. If the per-LWT attribute is set then it
overrides the global configuration. If the TTL isn't propagated then a
default TTL value is used which can be configured via a new sysctl,
"net.mpls.default_ttl". This is kept separate from the configuration
of whether IP TTL propagation is enabled as it can be used in the
future when non-IP payloads are supported (i.e. where there is no
payload TTL that can be propagated).

Signed-off-by: Robert Shearman <rshearma@brocade.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Tested-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Robert Shearman and committed by
David S. Miller
a59166e4 5b441ac8

+84 -13
+8
Documentation/networking/mpls-sysctl.txt
··· 30 30 0 - disabled / RFC 3443 [Short] Pipe Model 31 31 1 - enabled / RFC 3443 Uniform Model (default) 32 32 33 + default_ttl - BOOL 34 + Default TTL value to use for MPLS packets where it cannot be 35 + propagated from an IP header, either because one isn't present 36 + or ip_ttl_propagate has been disabled. 37 + 38 + Possible values: 1 - 255 39 + Default: 255 40 + 33 41 conf/<interface>/input - BOOL 34 42 Control whether packets can be input on this interface. 35 43
+2
include/net/mpls_iptunnel.h
··· 19 19 struct mpls_iptunnel_encap { 20 20 u32 label[MAX_NEW_LABELS]; 21 21 u8 labels; 22 + u8 ttl_propagate; 23 + u8 default_ttl; 22 24 }; 23 25 24 26 static inline struct mpls_iptunnel_encap *mpls_lwtunnel_encap(struct lwtunnel_state *lwtstate)
+1
include/net/netns/mpls.h
··· 10 10 11 11 struct netns_mpls { 12 12 int ip_ttl_propagate; 13 + int default_ttl; 13 14 size_t platform_labels; 14 15 struct mpls_route __rcu * __rcu *platform_label; 15 16
+2
include/uapi/linux/mpls_iptunnel.h
··· 16 16 /* MPLS tunnel attributes 17 17 * [RTA_ENCAP] = { 18 18 * [MPLS_IPTUNNEL_DST] 19 + * [MPLS_IPTUNNEL_TTL] 19 20 * } 20 21 */ 21 22 enum { 22 23 MPLS_IPTUNNEL_UNSPEC, 23 24 MPLS_IPTUNNEL_DST, 25 + MPLS_IPTUNNEL_TTL, 24 26 __MPLS_IPTUNNEL_MAX, 25 27 }; 26 28 #define MPLS_IPTUNNEL_MAX (__MPLS_IPTUNNEL_MAX - 1)
+11
net/mpls/af_mpls.c
··· 34 34 static int zero = 0; 35 35 static int one = 1; 36 36 static int label_limit = (1 << 20) - 1; 37 + static int ttl_max = 255; 37 38 38 39 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, 39 40 struct nlmsghdr *nlh, struct net *net, u32 portid, ··· 2043 2042 .extra1 = &zero, 2044 2043 .extra2 = &one, 2045 2044 }, 2045 + { 2046 + .procname = "default_ttl", 2047 + .data = MPLS_NS_SYSCTL_OFFSET(mpls.default_ttl), 2048 + .maxlen = sizeof(int), 2049 + .mode = 0644, 2050 + .proc_handler = proc_dointvec_minmax, 2051 + .extra1 = &one, 2052 + .extra2 = &ttl_max, 2053 + }, 2046 2054 { } 2047 2055 }; 2048 2056 ··· 2063 2053 net->mpls.platform_labels = 0; 2064 2054 net->mpls.platform_label = NULL; 2065 2055 net->mpls.ip_ttl_propagate = 1; 2056 + net->mpls.default_ttl = 255; 2066 2057 2067 2058 table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL); 2068 2059 if (table == NULL)
+60 -13
net/mpls/mpls_iptunnel.c
··· 29 29 30 30 static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { 31 31 [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, 32 + [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 }, 32 33 }; 33 34 34 35 static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) ··· 50 49 struct rtable *rt = NULL; 51 50 struct rt6_info *rt6 = NULL; 52 51 struct mpls_dev *out_mdev; 52 + struct net *net; 53 53 int err = 0; 54 54 bool bos; 55 55 int i; ··· 58 56 59 57 /* Find the output device */ 60 58 out_dev = dst->dev; 61 - 62 - /* Obtain the ttl */ 63 - if (dst->ops->family == AF_INET) { 64 - ttl = ip_hdr(skb)->ttl; 65 - rt = (struct rtable *)dst; 66 - } else if (dst->ops->family == AF_INET6) { 67 - ttl = ipv6_hdr(skb)->hop_limit; 68 - rt6 = (struct rt6_info *)dst; 69 - } else { 70 - goto drop; 71 - } 59 + net = dev_net(out_dev); 72 60 73 61 skb_orphan(skb); 74 62 ··· 69 77 skb_forward_csum(skb); 70 78 71 79 tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate); 80 + 81 + /* Obtain the ttl using the following set of rules. 82 + * 83 + * LWT ttl propagation setting: 84 + * - disabled => use default TTL value from LWT 85 + * - enabled => use TTL value from IPv4/IPv6 header 86 + * - default => 87 + * Global ttl propagation setting: 88 + * - disabled => use default TTL value from global setting 89 + * - enabled => use TTL value from IPv4/IPv6 header 90 + */ 91 + if (dst->ops->family == AF_INET) { 92 + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) 93 + ttl = tun_encap_info->default_ttl; 94 + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && 95 + !net->mpls.ip_ttl_propagate) 96 + ttl = net->mpls.default_ttl; 97 + else 98 + ttl = ip_hdr(skb)->ttl; 99 + rt = (struct rtable *)dst; 100 + } else if (dst->ops->family == AF_INET6) { 101 + if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DISABLED) 102 + ttl = tun_encap_info->default_ttl; 103 + else if (tun_encap_info->ttl_propagate == MPLS_TTL_PROP_DEFAULT && 104 + !net->mpls.ip_ttl_propagate) 105 + ttl = net->mpls.default_ttl; 106 + else 107 + ttl = ipv6_hdr(skb)->hop_limit; 108 + rt6 = (struct rt6_info *)dst; 109 + } else { 110 + goto drop; 111 + } 72 112 73 113 /* Verify the destination can hold the packet */ 74 114 new_header_size = mpls_encap_size(tun_encap_info); ··· 184 160 &tun_encap_info->labels, tun_encap_info->label); 185 161 if (ret) 186 162 goto errout; 163 + 164 + tun_encap_info->ttl_propagate = MPLS_TTL_PROP_DEFAULT; 165 + 166 + if (tb[MPLS_IPTUNNEL_TTL]) { 167 + tun_encap_info->default_ttl = nla_get_u8(tb[MPLS_IPTUNNEL_TTL]); 168 + /* TTL 0 implies propagate from IP header */ 169 + tun_encap_info->ttl_propagate = tun_encap_info->default_ttl ? 170 + MPLS_TTL_PROP_DISABLED : 171 + MPLS_TTL_PROP_ENABLED; 172 + } 173 + 187 174 newts->type = LWTUNNEL_ENCAP_MPLS; 188 175 newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; 189 176 newts->headroom = mpls_encap_size(tun_encap_info); ··· 221 186 tun_encap_info->label)) 222 187 goto nla_put_failure; 223 188 189 + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT && 190 + nla_put_u8(skb, MPLS_IPTUNNEL_TTL, tun_encap_info->default_ttl)) 191 + goto nla_put_failure; 192 + 224 193 return 0; 225 194 226 195 nla_put_failure: ··· 234 195 static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate) 235 196 { 236 197 struct mpls_iptunnel_encap *tun_encap_info; 198 + int nlsize; 237 199 238 200 tun_encap_info = mpls_lwtunnel_encap(lwtstate); 239 201 240 - return nla_total_size(tun_encap_info->labels * 4); 202 + nlsize = nla_total_size(tun_encap_info->labels * 4); 203 + 204 + if (tun_encap_info->ttl_propagate != MPLS_TTL_PROP_DEFAULT) 205 + nlsize += nla_total_size(1); 206 + 207 + return nlsize; 241 208 } 242 209 243 210 static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) ··· 252 207 struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b); 253 208 int l; 254 209 255 - if (a_hdr->labels != b_hdr->labels) 210 + if (a_hdr->labels != b_hdr->labels || 211 + a_hdr->ttl_propagate != b_hdr->ttl_propagate || 212 + a_hdr->default_ttl != b_hdr->default_ttl) 256 213 return 1; 257 214 258 215 for (l = 0; l < MAX_NEW_LABELS; l++)