Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

seg6: add support for SRv6 H.Encaps.Red behavior

The SRv6 H.Encaps.Red behavior described in [1] is an optimization of
the SRv6 H.Encaps behavior [2].

H.Encaps.Red reduces the length of the SRH by excluding the first
segment (SID) in the SRH of the pushed IPv6 header. The first SID is
only placed in the IPv6 Destination Address field of the pushed IPv6
header.
When the SRv6 Policy only contains one SID the SRH is omitted, unless
there is an HMAC TLV to be carried.

[1] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.2
[2] - https://datatracker.ietf.org/doc/html/rfc8986#section-5.1

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Signed-off-by: Anton Makarov <anton.makarov11235@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Andrea Mayer and committed by
David S. Miller
b07c8cdb 5b91884b

+128 -1
+1
include/uapi/linux/seg6_iptunnel.h
··· 35 35 SEG6_IPTUN_MODE_INLINE, 36 36 SEG6_IPTUN_MODE_ENCAP, 37 37 SEG6_IPTUN_MODE_L2ENCAP, 38 + SEG6_IPTUN_MODE_ENCAP_RED, 38 39 }; 39 40 40 41 #endif
+127 -1
net/ipv6/seg6_iptunnel.c
··· 36 36 case SEG6_IPTUN_MODE_INLINE: 37 37 break; 38 38 case SEG6_IPTUN_MODE_ENCAP: 39 + case SEG6_IPTUN_MODE_ENCAP_RED: 39 40 head = sizeof(struct ipv6hdr); 40 41 break; 41 42 case SEG6_IPTUN_MODE_L2ENCAP: ··· 198 197 } 199 198 EXPORT_SYMBOL_GPL(seg6_do_srh_encap); 200 199 200 + /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ 201 + static int seg6_do_srh_encap_red(struct sk_buff *skb, 202 + struct ipv6_sr_hdr *osrh, int proto) 203 + { 204 + __u8 first_seg = osrh->first_segment; 205 + struct dst_entry *dst = skb_dst(skb); 206 + struct net *net = dev_net(dst->dev); 207 + struct ipv6hdr *hdr, *inner_hdr; 208 + int hdrlen = ipv6_optlen(osrh); 209 + int red_tlv_offset, tlv_offset; 210 + struct ipv6_sr_hdr *isrh; 211 + bool skip_srh = false; 212 + __be32 flowlabel; 213 + int tot_len, err; 214 + int red_hdrlen; 215 + int tlvs_len; 216 + 217 + if (first_seg > 0) { 218 + red_hdrlen = hdrlen - sizeof(struct in6_addr); 219 + } else { 220 + /* NOTE: if tag/flags and/or other TLVs are introduced in the 221 + * seg6_iptunnel infrastructure, they should be considered when 222 + * deciding to skip the SRH. 223 + */ 224 + skip_srh = !sr_has_hmac(osrh); 225 + 226 + red_hdrlen = skip_srh ? 0 : hdrlen; 227 + } 228 + 229 + tot_len = red_hdrlen + sizeof(struct ipv6hdr); 230 + 231 + err = skb_cow_head(skb, tot_len + skb->mac_len); 232 + if (unlikely(err)) 233 + return err; 234 + 235 + inner_hdr = ipv6_hdr(skb); 236 + flowlabel = seg6_make_flowlabel(net, skb, inner_hdr); 237 + 238 + skb_push(skb, tot_len); 239 + skb_reset_network_header(skb); 240 + skb_mac_header_rebuild(skb); 241 + hdr = ipv6_hdr(skb); 242 + 243 + /* based on seg6_do_srh_encap() */ 244 + if (skb->protocol == htons(ETH_P_IPV6)) { 245 + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), 246 + flowlabel); 247 + hdr->hop_limit = inner_hdr->hop_limit; 248 + } else { 249 + ip6_flow_hdr(hdr, 0, flowlabel); 250 + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); 251 + 252 + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); 253 + IP6CB(skb)->iif = skb->skb_iif; 254 + } 255 + 256 + /* no matter if we have to skip the SRH or not, the first segment 257 + * always comes in the pushed IPv6 header. 258 + */ 259 + hdr->daddr = osrh->segments[first_seg]; 260 + 261 + if (skip_srh) { 262 + hdr->nexthdr = proto; 263 + 264 + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); 265 + goto out; 266 + } 267 + 268 + /* we cannot skip the SRH, slow path */ 269 + 270 + hdr->nexthdr = NEXTHDR_ROUTING; 271 + isrh = (void *)hdr + sizeof(struct ipv6hdr); 272 + 273 + if (unlikely(!first_seg)) { 274 + /* this is a very rare case; we have only one SID but 275 + * we cannot skip the SRH since we are carrying some 276 + * other info. 277 + */ 278 + memcpy(isrh, osrh, hdrlen); 279 + goto srcaddr; 280 + } 281 + 282 + tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr); 283 + red_tlv_offset = tlv_offset - sizeof(struct in6_addr); 284 + 285 + memcpy(isrh, osrh, red_tlv_offset); 286 + 287 + tlvs_len = hdrlen - tlv_offset; 288 + if (unlikely(tlvs_len > 0)) { 289 + const void *s = (const void *)osrh + tlv_offset; 290 + void *d = (void *)isrh + red_tlv_offset; 291 + 292 + memcpy(d, s, tlvs_len); 293 + } 294 + 295 + --isrh->first_segment; 296 + isrh->hdrlen -= 2; 297 + 298 + srcaddr: 299 + isrh->nexthdr = proto; 300 + set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr); 301 + 302 + #ifdef CONFIG_IPV6_SEG6_HMAC 303 + if (unlikely(!skip_srh && sr_has_hmac(isrh))) { 304 + err = seg6_push_hmac(net, &hdr->saddr, isrh); 305 + if (unlikely(err)) 306 + return err; 307 + } 308 + #endif 309 + 310 + out: 311 + hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); 312 + 313 + skb_postpush_rcsum(skb, hdr, tot_len); 314 + 315 + return 0; 316 + } 317 + 201 318 /* insert an SRH within an IPv6 packet, just after the IPv6 header */ 202 319 int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) 203 320 { ··· 388 269 return err; 389 270 break; 390 271 case SEG6_IPTUN_MODE_ENCAP: 272 + case SEG6_IPTUN_MODE_ENCAP_RED: 391 273 err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6); 392 274 if (err) 393 275 return err; ··· 400 280 else 401 281 return -EINVAL; 402 282 403 - err = seg6_do_srh_encap(skb, tinfo->srh, proto); 283 + if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) 284 + err = seg6_do_srh_encap(skb, tinfo->srh, proto); 285 + else 286 + err = seg6_do_srh_encap_red(skb, tinfo->srh, proto); 287 + 404 288 if (err) 405 289 return err; 406 290 ··· 640 516 case SEG6_IPTUN_MODE_ENCAP: 641 517 break; 642 518 case SEG6_IPTUN_MODE_L2ENCAP: 519 + break; 520 + case SEG6_IPTUN_MODE_ENCAP_RED: 643 521 break; 644 522 default: 645 523 return -EINVAL;