Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6: implement dataplane support for rthdr type 4 (Segment Routing Header)

Implement minimal support for processing of SR-enabled packets
as described in
https://tools.ietf.org/html/draft-ietf-6man-segment-routing-header-02.

This patch implements the following operations:
- Intermediate segment endpoint: incrementation of active segment and rerouting.
- Egress for SR-encapsulated packets: decapsulation of outer IPv6 header + SRH
and routing of inner packet.
- Cleanup flag support for SR-inlined packets: removal of SRH if we are the
penultimate segment endpoint.

A per-interface sysctl seg6_enabled is provided, to accept/deny SR-enabled
packets. Default is deny.

This patch does not provide support for HMAC-signed packets.

Signed-off-by: David Lebrun <david.lebrun@uclouvain.be>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

David Lebrun and committed by
David S. Miller
1ababeba dc0b2c9c

+284
+1
include/linux/ipv6.h
··· 64 64 } stable_secret; 65 65 __s32 use_oif_addrs_only; 66 66 __s32 keep_addr_on_down; 67 + __s32 seg6_enabled; 67 68 68 69 struct ctl_table_header *sysctl_header; 69 70 };
+6
include/linux/seg6.h
··· 1 + #ifndef _LINUX_SEG6_H 2 + #define _LINUX_SEG6_H 3 + 4 + #include <uapi/linux/seg6.h> 5 + 6 + #endif
+36
include/net/seg6.h
··· 1 + /* 2 + * SR-IPv6 implementation 3 + * 4 + * Author: 5 + * David Lebrun <david.lebrun@uclouvain.be> 6 + * 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License 10 + * as published by the Free Software Foundation; either version 11 + * 2 of the License, or (at your option) any later version. 12 + */ 13 + 14 + #ifndef _NET_SEG6_H 15 + #define _NET_SEG6_H 16 + 17 + static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, 18 + __be32 to) 19 + { 20 + __be32 diff[] = { ~from, to }; 21 + 22 + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); 23 + } 24 + 25 + static inline void update_csum_diff16(struct sk_buff *skb, __be32 *from, 26 + __be32 *to) 27 + { 28 + __be32 diff[] = { 29 + ~from[0], ~from[1], ~from[2], ~from[3], 30 + to[0], to[1], to[2], to[3], 31 + }; 32 + 33 + skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); 34 + } 35 + 36 + #endif
+2
include/uapi/linux/ipv6.h
··· 39 39 #define IPV6_SRCRT_STRICT 0x01 /* Deprecated; will be removed */ 40 40 #define IPV6_SRCRT_TYPE_0 0 /* Deprecated; will be removed */ 41 41 #define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ 42 + #define IPV6_SRCRT_TYPE_4 4 /* Segment Routing with IPv6 */ 42 43 43 44 /* 44 45 * routing header ··· 179 178 DEVCONF_DROP_UNSOLICITED_NA, 180 179 DEVCONF_KEEP_ADDR_ON_DOWN, 181 180 DEVCONF_RTR_SOLICIT_MAX_INTERVAL, 181 + DEVCONF_SEG6_ENABLED, 182 182 DEVCONF_MAX 183 183 }; 184 184
+54
include/uapi/linux/seg6.h
··· 1 + /* 2 + * SR-IPv6 implementation 3 + * 4 + * Author: 5 + * David Lebrun <david.lebrun@uclouvain.be> 6 + * 7 + * 8 + * This program is free software; you can redistribute it and/or 9 + * modify it under the terms of the GNU General Public License 10 + * as published by the Free Software Foundation; either version 11 + * 2 of the License, or (at your option) any later version. 12 + */ 13 + 14 + #ifndef _UAPI_LINUX_SEG6_H 15 + #define _UAPI_LINUX_SEG6_H 16 + 17 + /* 18 + * SRH 19 + */ 20 + struct ipv6_sr_hdr { 21 + __u8 nexthdr; 22 + __u8 hdrlen; 23 + __u8 type; 24 + __u8 segments_left; 25 + __u8 first_segment; 26 + __u8 flag_1; 27 + __u8 flag_2; 28 + __u8 reserved; 29 + 30 + struct in6_addr segments[0]; 31 + }; 32 + 33 + #define SR6_FLAG1_CLEANUP (1 << 7) 34 + #define SR6_FLAG1_PROTECTED (1 << 6) 35 + #define SR6_FLAG1_OAM (1 << 5) 36 + #define SR6_FLAG1_ALERT (1 << 4) 37 + #define SR6_FLAG1_HMAC (1 << 3) 38 + 39 + #define SR6_TLV_INGRESS 1 40 + #define SR6_TLV_EGRESS 2 41 + #define SR6_TLV_OPAQUE 3 42 + #define SR6_TLV_PADDING 4 43 + #define SR6_TLV_HMAC 5 44 + 45 + #define sr_has_cleanup(srh) ((srh)->flag_1 & SR6_FLAG1_CLEANUP) 46 + #define sr_has_hmac(srh) ((srh)->flag_1 & SR6_FLAG1_HMAC) 47 + 48 + struct sr6_tlv { 49 + __u8 type; 50 + __u8 len; 51 + __u8 data[0]; 52 + }; 53 + 54 + #endif
+10
net/ipv6/addrconf.c
··· 238 238 .use_oif_addrs_only = 0, 239 239 .ignore_routes_with_linkdown = 0, 240 240 .keep_addr_on_down = 0, 241 + .seg6_enabled = 0, 241 242 }; 242 243 243 244 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { ··· 285 284 .use_oif_addrs_only = 0, 286 285 .ignore_routes_with_linkdown = 0, 287 286 .keep_addr_on_down = 0, 287 + .seg6_enabled = 0, 288 288 }; 289 289 290 290 /* Check if a valid qdisc is available */ ··· 4946 4944 array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast; 4947 4945 array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na; 4948 4946 array[DEVCONF_KEEP_ADDR_ON_DOWN] = cnf->keep_addr_on_down; 4947 + array[DEVCONF_SEG6_ENABLED] = cnf->seg6_enabled; 4949 4948 } 4950 4949 4951 4950 static inline size_t inet6_ifla6_size(void) ··· 6037 6034 .mode = 0644, 6038 6035 .proc_handler = proc_dointvec, 6039 6036 6037 + }, 6038 + { 6039 + .procname = "seg6_enabled", 6040 + .data = &ipv6_devconf.seg6_enabled, 6041 + .maxlen = sizeof(int), 6042 + .mode = 0644, 6043 + .proc_handler = proc_dointvec, 6040 6044 }, 6041 6045 { 6042 6046 /* sentinel */
+175
net/ipv6/exthdrs.c
··· 47 47 #if IS_ENABLED(CONFIG_IPV6_MIP6) 48 48 #include <net/xfrm.h> 49 49 #endif 50 + #include <linux/seg6.h> 51 + #include <net/seg6.h> 50 52 51 53 #include <linux/uaccess.h> 52 54 ··· 288 286 return -1; 289 287 } 290 288 289 + static void seg6_update_csum(struct sk_buff *skb) 290 + { 291 + struct ipv6_sr_hdr *hdr; 292 + struct in6_addr *addr; 293 + __be32 from, to; 294 + 295 + /* srh is at transport offset and seg_left is already decremented 296 + * but daddr is not yet updated with next segment 297 + */ 298 + 299 + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); 300 + addr = hdr->segments + hdr->segments_left; 301 + 302 + hdr->segments_left++; 303 + from = *(__be32 *)hdr; 304 + 305 + hdr->segments_left--; 306 + to = *(__be32 *)hdr; 307 + 308 + /* update skb csum with diff resulting from seg_left decrement */ 309 + 310 + update_csum_diff4(skb, from, to); 311 + 312 + /* compute csum diff between current and next segment and update */ 313 + 314 + update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), 315 + (__be32 *)addr); 316 + } 317 + 318 + static int ipv6_srh_rcv(struct sk_buff *skb) 319 + { 320 + struct inet6_skb_parm *opt = IP6CB(skb); 321 + struct net *net = dev_net(skb->dev); 322 + struct ipv6_sr_hdr *hdr; 323 + struct inet6_dev *idev; 324 + struct in6_addr *addr; 325 + bool cleanup = false; 326 + int accept_seg6; 327 + 328 + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); 329 + 330 + idev = __in6_dev_get(skb->dev); 331 + 332 + accept_seg6 = net->ipv6.devconf_all->seg6_enabled; 333 + if (accept_seg6 > idev->cnf.seg6_enabled) 334 + accept_seg6 = idev->cnf.seg6_enabled; 335 + 336 + if (!accept_seg6) { 337 + kfree_skb(skb); 338 + return -1; 339 + } 340 + 341 + looped_back: 342 + if (hdr->segments_left > 0) { 343 + if (hdr->nexthdr != NEXTHDR_IPV6 && hdr->segments_left == 1 && 344 + sr_has_cleanup(hdr)) 345 + cleanup = true; 346 + } else { 347 + if (hdr->nexthdr == NEXTHDR_IPV6) { 348 + int offset = (hdr->hdrlen + 1) << 3; 349 + 350 + skb_postpull_rcsum(skb, skb_network_header(skb), 351 + skb_network_header_len(skb)); 352 + 353 + if (!pskb_pull(skb, offset)) { 354 + kfree_skb(skb); 355 + return -1; 356 + } 357 + skb_postpull_rcsum(skb, skb_transport_header(skb), 358 + offset); 359 + 360 + skb_reset_network_header(skb); 361 + skb_reset_transport_header(skb); 362 + skb->encapsulation = 0; 363 + 364 + __skb_tunnel_rx(skb, skb->dev, net); 365 + 366 + netif_rx(skb); 367 + return -1; 368 + } 369 + 370 + opt->srcrt = skb_network_header_len(skb); 371 + opt->lastopt = opt->srcrt; 372 + skb->transport_header += (hdr->hdrlen + 1) << 3; 373 + opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); 374 + 375 + return 1; 376 + } 377 + 378 + if (hdr->segments_left >= (hdr->hdrlen >> 1)) { 379 + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 380 + IPSTATS_MIB_INHDRERRORS); 381 + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, 382 + ((&hdr->segments_left) - 383 + skb_network_header(skb))); 384 + kfree_skb(skb); 385 + return -1; 386 + } 387 + 388 + if (skb_cloned(skb)) { 389 + if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 390 + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 391 + IPSTATS_MIB_OUTDISCARDS); 392 + kfree_skb(skb); 393 + return -1; 394 + } 395 + } 396 + 397 + hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); 398 + 399 + hdr->segments_left--; 400 + addr = hdr->segments + hdr->segments_left; 401 + 402 + skb_push(skb, sizeof(struct ipv6hdr)); 403 + 404 + if (skb->ip_summed == CHECKSUM_COMPLETE) 405 + seg6_update_csum(skb); 406 + 407 + ipv6_hdr(skb)->daddr = *addr; 408 + 409 + if (cleanup) { 410 + int srhlen = (hdr->hdrlen + 1) << 3; 411 + int nh = hdr->nexthdr; 412 + 413 + skb_pull_rcsum(skb, sizeof(struct ipv6hdr) + srhlen); 414 + memmove(skb_network_header(skb) + srhlen, 415 + skb_network_header(skb), 416 + (unsigned char *)hdr - skb_network_header(skb)); 417 + skb->network_header += srhlen; 418 + ipv6_hdr(skb)->nexthdr = nh; 419 + ipv6_hdr(skb)->payload_len = htons(skb->len - 420 + sizeof(struct ipv6hdr)); 421 + skb_push_rcsum(skb, sizeof(struct ipv6hdr)); 422 + } 423 + 424 + skb_dst_drop(skb); 425 + 426 + ip6_route_input(skb); 427 + 428 + if (skb_dst(skb)->error) { 429 + dst_input(skb); 430 + return -1; 431 + } 432 + 433 + if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { 434 + if (ipv6_hdr(skb)->hop_limit <= 1) { 435 + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), 436 + IPSTATS_MIB_INHDRERRORS); 437 + icmpv6_send(skb, ICMPV6_TIME_EXCEED, 438 + ICMPV6_EXC_HOPLIMIT, 0); 439 + kfree_skb(skb); 440 + return -1; 441 + } 442 + ipv6_hdr(skb)->hop_limit--; 443 + 444 + /* be sure that srh is still present before reinjecting */ 445 + if (!cleanup) { 446 + skb_pull(skb, sizeof(struct ipv6hdr)); 447 + goto looped_back; 448 + } 449 + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); 450 + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); 451 + } 452 + 453 + dst_input(skb); 454 + 455 + return -1; 456 + } 457 + 291 458 /******************************** 292 459 Routing header. 293 460 ********************************/ ··· 496 325 kfree_skb(skb); 497 326 return -1; 498 327 } 328 + 329 + /* segment routing */ 330 + if (hdr->type == IPV6_SRCRT_TYPE_4) 331 + return ipv6_srh_rcv(skb); 499 332 500 333 looped_back: 501 334 if (hdr->segments_left == 0) {