Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: icmp: pass zeroed opts from icmp{,v6}_ndo_send before sending

The icmp{,v6}_send functions make all sorts of use of skb->cb, casting
it with IPCB or IP6CB, assuming the skb to have come directly from the
inet layer. But when the packet comes from the ndo layer, especially
when forwarded, there's no telling what might be in skb->cb at that
point. As a result, the icmp sending code risks reading bogus memory
contents, which can result in nasty stack overflows such as this one
reported by a user:

panic+0x108/0x2ea
__stack_chk_fail+0x14/0x20
__icmp_send+0x5bd/0x5c0
icmp_ndo_send+0x148/0x160

In icmp_send, skb->cb is cast with IPCB and an ip_options struct is read
from it. The optlen parameter there is of particular note, as it can
induce writes beyond bounds. There are quite a few ways that can happen
in __ip_options_echo. For example:

// sptr/skb are attacker-controlled skb bytes
sptr = skb_network_header(skb);
// dptr/dopt points to stack memory allocated by __icmp_send
dptr = dopt->__data;
// sopt is the corrupt skb->cb in question
if (sopt->rr) {
optlen = sptr[sopt->rr+1]; // corrupt skb->cb + skb->data
soffset = sptr[sopt->rr+2]; // corrupt skb->cb + skb->data
// this now writes potentially attacker-controlled data, over
// flowing the stack:
memcpy(dptr, sptr+sopt->rr, optlen);
}

In the icmpv6_send case, the story is similar, but not as dire, as only
IP6CB(skb)->iif and IP6CB(skb)->dsthao are used. The dsthao case is
worse than the iif case, but it is passed to ipv6_find_tlv, which does
a bit of bounds checking on the value.

This is easy to simulate by doing a `memset(skb->cb, 0x41,
sizeof(skb->cb));` before calling icmp{,v6}_ndo_send, and it's only by
good fortune and the rarity of icmp sending from that context that we've
avoided reports like this until now. For example, in KASAN:

BUG: KASAN: stack-out-of-bounds in __ip_options_echo+0xa0e/0x12b0
Write of size 38 at addr ffff888006f1f80e by task ping/89
CPU: 2 PID: 89 Comm: ping Not tainted 5.10.0-rc7-debug+ #5
Call Trace:
dump_stack+0x9a/0xcc
print_address_description.constprop.0+0x1a/0x160
__kasan_report.cold+0x20/0x38
kasan_report+0x32/0x40
check_memory_region+0x145/0x1a0
memcpy+0x39/0x60
__ip_options_echo+0xa0e/0x12b0
__icmp_send+0x744/0x1700

Actually, out of the 4 drivers that do this, only gtp zeroed the cb for
the v4 case, while the rest did not. So this commit actually removes the
gtp-specific zeroing, while putting the code where it belongs in the
shared infrastructure of icmp{,v6}_ndo_send.

This commit fixes the issue by passing an empty IPCB or IP6CB along to
the functions that actually do the work. For the icmp_send, this was
already trivial, thanks to __icmp_send providing the plumbing function.
For icmpv6_send, this required a tiny bit of refactoring to make it
behave like the v4 case, after which it was straight forward.

Fixes: a2b78e9b2cac ("sunvnet: generate ICMP PTMUD messages for smaller port MTUs")
Reported-by: SinYu <liuxyon@gmail.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/netdev/CAF=yD-LOF116aHub6RMe8vB8ZpnrrnoTdqhobEx+bvoA8AsP0w@mail.gmail.com/T/
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Link: https://lore.kernel.org/r/20210223131858.72082-1-Jason@zx2c4.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Jason A. Donenfeld and committed by
Jakub Kicinski
ee576c47 42870a1a

+44 -25
-1
drivers/net/gtp.c
··· 543 543 if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && 544 544 mtu < ntohs(iph->tot_len)) { 545 545 netdev_dbg(dev, "packet too big, fragmentation needed\n"); 546 - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); 547 546 icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, 548 547 htonl(mtu)); 549 548 goto err_rt;
+20 -6
include/linux/icmpv6.h
··· 3 3 #define _LINUX_ICMPV6_H 4 4 5 5 #include <linux/skbuff.h> 6 + #include <linux/ipv6.h> 6 7 #include <uapi/linux/icmpv6.h> 7 8 8 9 static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) ··· 16 15 #if IS_ENABLED(CONFIG_IPV6) 17 16 18 17 typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, 19 - const struct in6_addr *force_saddr); 18 + const struct in6_addr *force_saddr, 19 + const struct inet6_skb_parm *parm); 20 20 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 21 - const struct in6_addr *force_saddr); 21 + const struct in6_addr *force_saddr, 22 + const struct inet6_skb_parm *parm); 22 23 #if IS_BUILTIN(CONFIG_IPV6) 23 - static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) 24 + static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 25 + const struct inet6_skb_parm *parm) 24 26 { 25 - icmp6_send(skb, type, code, info, NULL); 27 + icmp6_send(skb, type, code, info, NULL, parm); 26 28 } 27 29 static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) 28 30 { ··· 38 34 return 0; 39 35 } 40 36 #else 41 - extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); 37 + extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 38 + const struct inet6_skb_parm *parm); 42 39 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); 43 40 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); 44 41 #endif 42 + 43 + static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) 44 + { 45 + __icmpv6_send(skb, type, code, info, IP6CB(skb)); 46 + } 45 47 46 48 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, 47 49 unsigned int data_len); ··· 55 45 #if IS_ENABLED(CONFIG_NF_NAT) 56 46 void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); 57 47 #else 58 - #define icmpv6_ndo_send icmpv6_send 48 + static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) 49 + { 50 + struct inet6_skb_parm parm = { 0 }; 51 + __icmpv6_send(skb_in, type, code, info, &parm); 52 + } 59 53 #endif 60 54 61 55 #else
-1
include/linux/ipv6.h
··· 85 85 __s32 autoconf; 86 86 }; 87 87 extern struct ipv6_params ipv6_defaults; 88 - #include <linux/icmpv6.h> 89 88 #include <linux/tcp.h> 90 89 #include <linux/udp.h> 91 90
+5 -1
include/net/icmp.h
··· 46 46 #if IS_ENABLED(CONFIG_NF_NAT) 47 47 void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); 48 48 #else 49 - #define icmp_ndo_send icmp_send 49 + static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) 50 + { 51 + struct ip_options opts = { 0 }; 52 + __icmp_send(skb_in, type, code, info, &opts); 53 + } 50 54 #endif 51 55 52 56 int icmp_rcv(struct sk_buff *skb);
+3 -2
net/ipv4/icmp.c
··· 775 775 void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) 776 776 { 777 777 struct sk_buff *cloned_skb = NULL; 778 + struct ip_options opts = { 0 }; 778 779 enum ip_conntrack_info ctinfo; 779 780 struct nf_conn *ct; 780 781 __be32 orig_ip; 781 782 782 783 ct = nf_ct_get(skb_in, &ctinfo); 783 784 if (!ct || !(ct->status & IPS_SRC_NAT)) { 784 - icmp_send(skb_in, type, code, info); 785 + __icmp_send(skb_in, type, code, info, &opts); 785 786 return; 786 787 } 787 788 ··· 797 796 798 797 orig_ip = ip_hdr(skb_in)->saddr; 799 798 ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; 800 - icmp_send(skb_in, type, code, info); 799 + __icmp_send(skb_in, type, code, info, &opts); 801 800 ip_hdr(skb_in)->saddr = orig_ip; 802 801 out: 803 802 consume_skb(cloned_skb);
+9 -9
net/ipv6/icmp.c
··· 331 331 } 332 332 333 333 #if IS_ENABLED(CONFIG_IPV6_MIP6) 334 - static void mip6_addr_swap(struct sk_buff *skb) 334 + static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) 335 335 { 336 336 struct ipv6hdr *iph = ipv6_hdr(skb); 337 - struct inet6_skb_parm *opt = IP6CB(skb); 338 337 struct ipv6_destopt_hao *hao; 339 338 struct in6_addr tmp; 340 339 int off; ··· 350 351 } 351 352 } 352 353 #else 353 - static inline void mip6_addr_swap(struct sk_buff *skb) {} 354 + static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} 354 355 #endif 355 356 356 357 static struct dst_entry *icmpv6_route_lookup(struct net *net, ··· 445 446 * Send an ICMP message in response to a packet in error 446 447 */ 447 448 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 448 - const struct in6_addr *force_saddr) 449 + const struct in6_addr *force_saddr, 450 + const struct inet6_skb_parm *parm) 449 451 { 450 452 struct inet6_dev *idev = NULL; 451 453 struct ipv6hdr *hdr = ipv6_hdr(skb); ··· 542 542 if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) 543 543 goto out_bh_enable; 544 544 545 - mip6_addr_swap(skb); 545 + mip6_addr_swap(skb, parm); 546 546 547 547 sk = icmpv6_xmit_lock(net); 548 548 if (!sk) ··· 559 559 /* select a more meaningful saddr from input if */ 560 560 struct net_device *in_netdev; 561 561 562 - in_netdev = dev_get_by_index(net, IP6CB(skb)->iif); 562 + in_netdev = dev_get_by_index(net, parm->iif); 563 563 if (in_netdev) { 564 564 ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, 565 565 inet6_sk(sk)->srcprefs, ··· 640 640 */ 641 641 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) 642 642 { 643 - icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL); 643 + icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); 644 644 kfree_skb(skb); 645 645 } 646 646 ··· 697 697 } 698 698 if (type == ICMP_TIME_EXCEEDED) 699 699 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 700 - info, &temp_saddr); 700 + info, &temp_saddr, IP6CB(skb2)); 701 701 else 702 702 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 703 - info, &temp_saddr); 703 + info, &temp_saddr, IP6CB(skb2)); 704 704 if (rt) 705 705 ip6_rt_put(rt); 706 706
+7 -5
net/ipv6/ip6_icmp.c
··· 33 33 } 34 34 EXPORT_SYMBOL(inet6_unregister_icmp_sender); 35 35 36 - void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) 36 + void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, 37 + const struct inet6_skb_parm *parm) 37 38 { 38 39 ip6_icmp_send_t *send; 39 40 40 41 rcu_read_lock(); 41 42 send = rcu_dereference(ip6_icmp_send); 42 43 if (send) 43 - send(skb, type, code, info, NULL); 44 + send(skb, type, code, info, NULL, parm); 44 45 rcu_read_unlock(); 45 46 } 46 - EXPORT_SYMBOL(icmpv6_send); 47 + EXPORT_SYMBOL(__icmpv6_send); 47 48 #endif 48 49 49 50 #if IS_ENABLED(CONFIG_NF_NAT) 50 51 #include <net/netfilter/nf_conntrack.h> 51 52 void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) 52 53 { 54 + struct inet6_skb_parm parm = { 0 }; 53 55 struct sk_buff *cloned_skb = NULL; 54 56 enum ip_conntrack_info ctinfo; 55 57 struct in6_addr orig_ip; ··· 59 57 60 58 ct = nf_ct_get(skb_in, &ctinfo); 61 59 if (!ct || !(ct->status & IPS_SRC_NAT)) { 62 - icmpv6_send(skb_in, type, code, info); 60 + __icmpv6_send(skb_in, type, code, info, &parm); 63 61 return; 64 62 } 65 63 ··· 74 72 75 73 orig_ip = ipv6_hdr(skb_in)->saddr; 76 74 ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; 77 - icmpv6_send(skb_in, type, code, info); 75 + __icmpv6_send(skb_in, type, code, info, &parm); 78 76 ipv6_hdr(skb_in)->saddr = orig_ip; 79 77 out: 80 78 consume_skb(cloned_skb);