Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ipv6: RFC 4884 partial support for SIT/GRE tunnels

When receiving an ICMPv4 message containing extensions as
defined in RFC 4884, and translating it to ICMPv6 at SIT
or GRE tunnel, we need some extra manipulation in order
to properly forward the extensions.

This patch only takes care of Time Exceeded messages as they
are the ones that typically carry information from various
routers in a fabric during a traceroute session.

It also avoids complex skb logic if the data_len is not
a multiple of 8.

RFC states :

The "original datagram" field MUST contain at least 128 octets.
If the original datagram did not contain 128 octets, the
"original datagram" field MUST be zero padded to 128 octets.

In practice routers use 128 bytes of original datagram, not more.

Initial translation was added in commit ca15a078bd90
("sit: generate icmpv6 error when receiving icmpv4 error")

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Oussama Ghorbel <ghorbel@pivasoftware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Eric Dumazet and committed by
David S. Miller
20e1954f 9b8c6d7b

+34 -7
+2 -1
include/linux/icmpv6.h
··· 18 18 const struct in6_addr *force_saddr); 19 19 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); 20 20 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); 21 - int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type); 21 + int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, 22 + unsigned int data_len); 22 23 23 24 #else 24 25
+1
include/uapi/linux/icmp.h
··· 79 79 __be16 __unused; 80 80 __be16 mtu; 81 81 } frag; 82 + __u8 reserved[4]; 82 83 } un; 83 84 }; 84 85
+4 -1
net/ipv4/ip_gre.c
··· 144 144 const struct iphdr *iph; 145 145 const int type = icmp_hdr(skb)->type; 146 146 const int code = icmp_hdr(skb)->code; 147 + unsigned int data_len = 0; 147 148 struct ip_tunnel *t; 148 149 149 150 switch (type) { ··· 170 169 case ICMP_TIME_EXCEEDED: 171 170 if (code != ICMP_EXC_TTL) 172 171 return; 172 + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 173 173 break; 174 174 175 175 case ICMP_REDIRECT: ··· 191 189 192 190 #if IS_ENABLED(CONFIG_IPV6) 193 191 if (tpi->proto == htons(ETH_P_IPV6) && 194 - !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, type)) 192 + !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, 193 + type, data_len)) 195 194 return; 196 195 #endif 197 196
+24 -4
net/ipv6/icmp.c
··· 564 564 * Either an IPv4 header for SIT encap 565 565 * an IPv4 header + GRE header for GRE encap 566 566 */ 567 - int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type) 567 + int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, 568 + unsigned int data_len) 568 569 { 569 570 struct in6_addr temp_saddr; 570 571 struct rt6_info *rt; 571 572 struct sk_buff *skb2; 573 + u32 info = 0; 572 574 573 575 if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8)) 574 576 return 1; 575 577 576 - skb2 = skb_clone(skb, GFP_ATOMIC); 578 + /* RFC 4884 (partial) support for ICMP extensions */ 579 + if (data_len < 128 || (data_len & 7) || skb->len < data_len) 580 + data_len = 0; 581 + 582 + skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC); 577 583 578 584 if (!skb2) 579 585 return 1; ··· 594 588 skb2->dev = rt->dst.dev; 595 589 596 590 ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr); 591 + 592 + if (data_len) { 593 + /* RFC 4884 (partial) support : 594 + * insert 0 padding at the end, before the extensions 595 + */ 596 + __skb_push(skb2, nhs); 597 + skb_reset_network_header(skb2); 598 + memmove(skb2->data, skb2->data + nhs, data_len - nhs); 599 + memset(skb2->data + data_len - nhs, 0, nhs); 600 + /* RFC 4884 4.5 : Length is measured in 64-bit words, 601 + * and stored in reserved[0] 602 + */ 603 + info = (data_len/8) << 24; 604 + } 597 605 if (type == ICMP_TIME_EXCEEDED) 598 606 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 599 - 0, &temp_saddr); 607 + info, &temp_saddr); 600 608 else 601 609 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 602 - 0, &temp_saddr); 610 + info, &temp_saddr); 603 611 if (rt) 604 612 ip6_rt_put(rt); 605 613
+3 -1
net/ipv6/sit.c
··· 484 484 const struct iphdr *iph = (const struct iphdr *)skb->data; 485 485 const int type = icmp_hdr(skb)->type; 486 486 const int code = icmp_hdr(skb)->code; 487 + unsigned int data_len = 0; 487 488 struct ip_tunnel *t; 488 489 int err; 489 490 ··· 509 508 case ICMP_TIME_EXCEEDED: 510 509 if (code != ICMP_EXC_TTL) 511 510 return 0; 511 + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ 512 512 break; 513 513 case ICMP_REDIRECT: 514 514 break; ··· 538 536 } 539 537 540 538 err = 0; 541 - if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type)) 539 + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) 542 540 goto out; 543 541 544 542 if (t->parms.iph.daddr == 0)