Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6: ioam: Support for IOAM injection with lwtunnels

Add support for the IOAM inline insertion (only for the host-to-host use case)
which is per-route configured with lightweight tunnels. The target is iproute2
and the patch is ready. It will be posted as soon as this patchset is merged.
Here is an overview:

$ ip -6 ro ad fc00::1/128 encap ioam6 trace type 0x800000 ns 1 size 12 dev eth0

This example configures an IOAM Pre-allocated Trace option attached to the
fc00::1/128 prefix. The IOAM namespace (ns) is 1, the size of the pre-allocated
trace data block is 12 octets (size) and only the first IOAM data (bit 0:
hop_limit + node id) is included in the trace (type) represented as a bitfield.

The reason why the in-transit (IPv6-in-IPv6 encapsulation) use case is not
implemented is explained on the patchset cover.

Signed-off-by: Justin Iurman <justin.iurman@uliege.be>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Justin Iurman and committed by
David S. Miller
3edede08 8c6f6fa6

+358 -12
+13
include/linux/ioam6_iptunnel.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ */ 2 + /* 3 + * IPv6 IOAM Lightweight Tunnel API 4 + * 5 + * Author: 6 + * Justin Iurman <justin.iurman@uliege.be> 7 + */ 8 + #ifndef _LINUX_IOAM6_IPTUNNEL_H 9 + #define _LINUX_IOAM6_IPTUNNEL_H 10 + 11 + #include <uapi/linux/ioam6_iptunnel.h> 12 + 13 + #endif /* _LINUX_IOAM6_IPTUNNEL_H */
+3
include/net/ioam6.h
··· 61 61 int ioam6_init(void); 62 62 void ioam6_exit(void); 63 63 64 + int ioam6_iptunnel_init(void); 65 + void ioam6_iptunnel_exit(void); 66 + 64 67 #endif /* _NET_IOAM6_H */
+1
include/uapi/linux/ioam6.h
··· 126 126 #error "Please fix <asm/byteorder.h>" 127 127 #endif 128 128 129 + #define IOAM6_TRACE_DATA_SIZE_MAX 244 129 130 __u8 data[0]; 130 131 } __attribute__((packed)); 131 132
+20
include/uapi/linux/ioam6_iptunnel.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ 2 + /* 3 + * IPv6 IOAM Lightweight Tunnel API 4 + * 5 + * Author: 6 + * Justin Iurman <justin.iurman@uliege.be> 7 + */ 8 + 9 + #ifndef _UAPI_LINUX_IOAM6_IPTUNNEL_H 10 + #define _UAPI_LINUX_IOAM6_IPTUNNEL_H 11 + 12 + enum { 13 + IOAM6_IPTUNNEL_UNSPEC, 14 + IOAM6_IPTUNNEL_TRACE, /* struct ioam6_trace_hdr */ 15 + __IOAM6_IPTUNNEL_MAX, 16 + }; 17 + 18 + #define IOAM6_IPTUNNEL_MAX (__IOAM6_IPTUNNEL_MAX - 1) 19 + 20 + #endif /* _UAPI_LINUX_IOAM6_IPTUNNEL_H */
+1
include/uapi/linux/lwtunnel.h
··· 14 14 LWTUNNEL_ENCAP_BPF, 15 15 LWTUNNEL_ENCAP_SEG6_LOCAL, 16 16 LWTUNNEL_ENCAP_RPL, 17 + LWTUNNEL_ENCAP_IOAM6, 17 18 __LWTUNNEL_ENCAP_MAX, 18 19 }; 19 20
+2
net/core/lwtunnel.c
··· 43 43 return "SEG6LOCAL"; 44 44 case LWTUNNEL_ENCAP_RPL: 45 45 return "RPL"; 46 + case LWTUNNEL_ENCAP_IOAM6: 47 + return "IOAM6"; 46 48 case LWTUNNEL_ENCAP_IP6: 47 49 case LWTUNNEL_ENCAP_IP: 48 50 case LWTUNNEL_ENCAP_NONE:
+11
net/ipv6/Kconfig
··· 328 328 329 329 If unsure, say N. 330 330 331 + config IPV6_IOAM6_LWTUNNEL 332 + bool "IPv6: IOAM Pre-allocated Trace insertion support" 333 + depends on IPV6 334 + select LWTUNNEL 335 + help 336 + Support for the inline insertion of IOAM Pre-allocated 337 + Trace Header (only on locally generated packets), using 338 + the lightweight tunnels mechanism. 339 + 340 + If unsure, say N. 341 + 331 342 endif # IPV6
+1
net/ipv6/Makefile
··· 27 27 ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o 28 28 ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o 29 29 ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o 30 + ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o 30 31 31 32 ipv6-objs += $(ipv6-y) 32 33
+32 -12
net/ipv6/ioam6.c
··· 648 648 if (skb->dev) 649 649 byte--; 650 650 651 - raw32 = dev_net(skb->dev)->ipv6.sysctl.ioam6_id; 651 + raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id; 652 652 653 653 *(__be32 *)data = cpu_to_be32((byte << 24) | raw32); 654 654 data += sizeof(__be32); ··· 675 675 676 676 /* timestamp seconds */ 677 677 if (trace->type.bit2) { 678 - if (!skb->tstamp) 679 - __net_timestamp(skb); 678 + if (!skb->dev) { 679 + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); 680 + } else { 681 + if (!skb->tstamp) 682 + __net_timestamp(skb); 680 683 681 - skb_get_new_timestamp(skb, &ts); 682 - 683 - *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); 684 + skb_get_new_timestamp(skb, &ts); 685 + *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec); 686 + } 684 687 data += sizeof(__be32); 685 688 } 686 689 687 690 /* timestamp subseconds */ 688 691 if (trace->type.bit3) { 689 - if (!skb->tstamp) 690 - __net_timestamp(skb); 692 + if (!skb->dev) { 693 + *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE); 694 + } else { 695 + if (!skb->tstamp) 696 + __net_timestamp(skb); 691 697 692 - if (!trace->type.bit2) 693 - skb_get_new_timestamp(skb, &ts); 698 + if (!trace->type.bit2) 699 + skb_get_new_timestamp(skb, &ts); 694 700 695 - *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); 701 + *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec); 702 + } 696 703 data += sizeof(__be32); 697 704 } 698 705 ··· 733 726 if (skb->dev) 734 727 byte--; 735 728 736 - raw64 = dev_net(skb->dev)->ipv6.sysctl.ioam6_id_wide; 729 + raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide; 737 730 738 731 *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64); 739 732 data += sizeof(__be64); ··· 881 874 if (err) 882 875 goto out_unregister_pernet_subsys; 883 876 877 + #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL 878 + err = ioam6_iptunnel_init(); 879 + if (err) 880 + goto out_unregister_genl; 881 + #endif 882 + 884 883 pr_info("In-situ OAM (IOAM) with IPv6\n"); 885 884 886 885 out: 887 886 return err; 887 + #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL 888 + out_unregister_genl: 889 + genl_unregister_family(&ioam6_genl_family); 890 + #endif 888 891 out_unregister_pernet_subsys: 889 892 unregister_pernet_subsys(&ioam6_net_ops); 890 893 goto out; ··· 902 885 903 886 void ioam6_exit(void) 904 887 { 888 + #ifdef CONFIG_IPV6_IOAM6_LWTUNNEL 889 + ioam6_iptunnel_exit(); 890 + #endif 905 891 genl_unregister_family(&ioam6_genl_family); 906 892 unregister_pernet_subsys(&ioam6_net_ops); 907 893 }
+274
net/ipv6/ioam6_iptunnel.c
··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* 3 + * IPv6 IOAM Lightweight Tunnel implementation 4 + * 5 + * Author: 6 + * Justin Iurman <justin.iurman@uliege.be> 7 + */ 8 + 9 + #include <linux/kernel.h> 10 + #include <linux/skbuff.h> 11 + #include <linux/net.h> 12 + #include <linux/netlink.h> 13 + #include <linux/in6.h> 14 + #include <linux/ioam6.h> 15 + #include <linux/ioam6_iptunnel.h> 16 + #include <net/dst.h> 17 + #include <net/sock.h> 18 + #include <net/lwtunnel.h> 19 + #include <net/ioam6.h> 20 + 21 + #define IOAM6_MASK_SHORT_FIELDS 0xff100000 22 + #define IOAM6_MASK_WIDE_FIELDS 0xe00000 23 + 24 + struct ioam6_lwt_encap { 25 + struct ipv6_hopopt_hdr eh; 26 + u8 pad[2]; /* 2-octet padding for 4n-alignment */ 27 + struct ioam6_hdr ioamh; 28 + struct ioam6_trace_hdr traceh; 29 + } __packed; 30 + 31 + struct ioam6_lwt { 32 + struct ioam6_lwt_encap tuninfo; 33 + }; 34 + 35 + static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt) 36 + { 37 + return (struct ioam6_lwt *)lwt->data; 38 + } 39 + 40 + static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt) 41 + { 42 + return &ioam6_lwt_state(lwt)->tuninfo; 43 + } 44 + 45 + static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt) 46 + { 47 + return &(ioam6_lwt_state(lwt)->tuninfo.traceh); 48 + } 49 + 50 + static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = { 51 + [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)), 52 + }; 53 + 54 + static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype, 55 + struct ioam6_trace_hdr *trace) 56 + { 57 + struct ioam6_trace_hdr *data; 58 + struct nlattr *nla; 59 + int len; 60 + 61 + len = sizeof(*trace); 62 + 63 + nla = nla_reserve(skb, attrtype, len); 64 + if (!nla) 65 + return -EMSGSIZE; 66 + 67 + data = nla_data(nla); 68 + memcpy(data, trace, len); 69 + 70 + return 0; 71 + } 72 + 73 + static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace) 74 + { 75 + u32 fields; 76 + 77 + if (!trace->type_be32 || !trace->remlen || 78 + trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4) 79 + return false; 80 + 81 + trace->nodelen = 0; 82 + fields = be32_to_cpu(trace->type_be32); 83 + 84 + trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS) 85 + * (sizeof(__be32) / 4); 86 + trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS) 87 + * (sizeof(__be64) / 4); 88 + 89 + return true; 90 + } 91 + 92 + static int ioam6_build_state(struct net *net, struct nlattr *nla, 93 + unsigned int family, const void *cfg, 94 + struct lwtunnel_state **ts, 95 + struct netlink_ext_ack *extack) 96 + { 97 + struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1]; 98 + struct ioam6_lwt_encap *tuninfo; 99 + struct ioam6_trace_hdr *trace; 100 + struct lwtunnel_state *s; 101 + int len_aligned; 102 + int len, err; 103 + 104 + if (family != AF_INET6) 105 + return -EINVAL; 106 + 107 + err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla, 108 + ioam6_iptunnel_policy, extack); 109 + if (err < 0) 110 + return err; 111 + 112 + if (!tb[IOAM6_IPTUNNEL_TRACE]) { 113 + NL_SET_ERR_MSG(extack, "missing trace"); 114 + return -EINVAL; 115 + } 116 + 117 + trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]); 118 + if (!ioam6_validate_trace_hdr(trace)) { 119 + NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE], 120 + "invalid trace validation"); 121 + return -EINVAL; 122 + } 123 + 124 + len = sizeof(*tuninfo) + trace->remlen * 4; 125 + len_aligned = ALIGN(len, 8); 126 + 127 + s = lwtunnel_state_alloc(len_aligned); 128 + if (!s) 129 + return -ENOMEM; 130 + 131 + tuninfo = ioam6_lwt_info(s); 132 + tuninfo->eh.hdrlen = (len_aligned >> 3) - 1; 133 + tuninfo->pad[0] = IPV6_TLV_PADN; 134 + tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC; 135 + tuninfo->ioamh.opt_type = IPV6_TLV_IOAM; 136 + tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace) 137 + + trace->remlen * 4; 138 + 139 + memcpy(&tuninfo->traceh, trace, sizeof(*trace)); 140 + 141 + len = len_aligned - len; 142 + if (len == 1) { 143 + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1; 144 + } else if (len > 0) { 145 + tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN; 146 + tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2; 147 + } 148 + 149 + s->type = LWTUNNEL_ENCAP_IOAM6; 150 + s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; 151 + 152 + *ts = s; 153 + 154 + return 0; 155 + } 156 + 157 + static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo) 158 + { 159 + struct ioam6_trace_hdr *trace; 160 + struct ipv6hdr *oldhdr, *hdr; 161 + struct ioam6_namespace *ns; 162 + int hdrlen, err; 163 + 164 + hdrlen = (tuninfo->eh.hdrlen + 1) << 3; 165 + 166 + err = skb_cow_head(skb, hdrlen + skb->mac_len); 167 + if (unlikely(err)) 168 + return err; 169 + 170 + oldhdr = ipv6_hdr(skb); 171 + skb_pull(skb, sizeof(*oldhdr)); 172 + skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr)); 173 + 174 + skb_push(skb, sizeof(*oldhdr) + hdrlen); 175 + skb_reset_network_header(skb); 176 + skb_mac_header_rebuild(skb); 177 + 178 + hdr = ipv6_hdr(skb); 179 + memmove(hdr, oldhdr, sizeof(*oldhdr)); 180 + tuninfo->eh.nexthdr = hdr->nexthdr; 181 + 182 + skb_set_transport_header(skb, sizeof(*hdr)); 183 + skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen); 184 + 185 + memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen); 186 + 187 + hdr->nexthdr = NEXTHDR_HOP; 188 + hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr)); 189 + 190 + trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb) 191 + + sizeof(struct ipv6_hopopt_hdr) + 2 192 + + sizeof(struct ioam6_hdr)); 193 + 194 + ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id); 195 + if (ns) 196 + ioam6_fill_trace_data(skb, ns, trace); 197 + 198 + return 0; 199 + } 200 + 201 + static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) 202 + { 203 + struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate; 204 + int err = -EINVAL; 205 + 206 + if (skb->protocol != htons(ETH_P_IPV6)) 207 + goto drop; 208 + 209 + /* Only for packets we send and 210 + * that do not contain a Hop-by-Hop yet 211 + */ 212 + if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) 213 + goto out; 214 + 215 + err = ioam6_do_inline(skb, ioam6_lwt_info(lwt)); 216 + if (unlikely(err)) 217 + goto drop; 218 + 219 + err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev)); 220 + if (unlikely(err)) 221 + goto drop; 222 + 223 + out: 224 + return lwt->orig_output(net, sk, skb); 225 + 226 + drop: 227 + kfree_skb(skb); 228 + return err; 229 + } 230 + 231 + static int ioam6_fill_encap_info(struct sk_buff *skb, 232 + struct lwtunnel_state *lwtstate) 233 + { 234 + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); 235 + 236 + if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace)) 237 + return -EMSGSIZE; 238 + 239 + return 0; 240 + } 241 + 242 + static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate) 243 + { 244 + struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate); 245 + 246 + return nla_total_size(sizeof(*trace)); 247 + } 248 + 249 + static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) 250 + { 251 + struct ioam6_trace_hdr *a_hdr = ioam6_trace(a); 252 + struct ioam6_trace_hdr *b_hdr = ioam6_trace(b); 253 + 254 + return (a_hdr->namespace_id != b_hdr->namespace_id); 255 + } 256 + 257 + static const struct lwtunnel_encap_ops ioam6_iptun_ops = { 258 + .build_state = ioam6_build_state, 259 + .output = ioam6_output, 260 + .fill_encap = ioam6_fill_encap_info, 261 + .get_encap_size = ioam6_encap_nlsize, 262 + .cmp_encap = ioam6_encap_cmp, 263 + .owner = THIS_MODULE, 264 + }; 265 + 266 + int __init ioam6_iptunnel_init(void) 267 + { 268 + return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); 269 + } 270 + 271 + void ioam6_iptunnel_exit(void) 272 + { 273 + lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6); 274 + }