Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xfrm: Reinject transport-mode packets through tasklet

This is an old bugbear of mine:

https://www.mail-archive.com/netdev@vger.kernel.org/msg03894.html

By crafting special packets, it is possible to cause recursion
in our kernel when processing transport-mode packets at levels
that are only limited by packet size.

The easiest one is with DNAT, but an even worse one is where
UDP encapsulation is used in which case you just have to insert
an UDP encapsulation header in between each level of recursion.

This patch avoids this problem by reinjecting tranport-mode packets
through a tasklet.

Fixes: b05e106698d9 ("[IPV4/6]: Netfilter IPsec input hooks")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>

authored by

Herbert Xu and committed by
Steffen Klassert
acf568ee d2950278

+80 -2
+3
include/net/xfrm.h
··· 1570 1570 int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); 1571 1571 int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); 1572 1572 int xfrm_input_resume(struct sk_buff *skb, int nexthdr); 1573 + int xfrm_trans_queue(struct sk_buff *skb, 1574 + int (*finish)(struct net *, struct sock *, 1575 + struct sk_buff *)); 1573 1576 int xfrm_output_resume(struct sk_buff *skb, int err); 1574 1577 int xfrm_output(struct sock *sk, struct sk_buff *skb); 1575 1578 int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
+11 -1
net/ipv4/xfrm4_input.c
··· 23 23 return xfrm4_extract_header(skb); 24 24 } 25 25 26 + static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, 27 + struct sk_buff *skb) 28 + { 29 + return dst_input(skb); 30 + } 31 + 26 32 static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk, 27 33 struct sk_buff *skb) 28 34 { ··· 39 33 iph->tos, skb->dev)) 40 34 goto drop; 41 35 } 42 - return dst_input(skb); 36 + 37 + if (xfrm_trans_queue(skb, xfrm4_rcv_encap_finish2)) 38 + goto drop; 39 + 40 + return 0; 43 41 drop: 44 42 kfree_skb(skb); 45 43 return NET_RX_DROP;
+9 -1
net/ipv6/xfrm6_input.c
··· 32 32 } 33 33 EXPORT_SYMBOL(xfrm6_rcv_spi); 34 34 35 + static int xfrm6_transport_finish2(struct net *net, struct sock *sk, 36 + struct sk_buff *skb) 37 + { 38 + if (xfrm_trans_queue(skb, ip6_rcv_finish)) 39 + __kfree_skb(skb); 40 + return -1; 41 + } 42 + 35 43 int xfrm6_transport_finish(struct sk_buff *skb, int async) 36 44 { 37 45 struct xfrm_offload *xo = xfrm_offload(skb); ··· 64 56 65 57 NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, 66 58 dev_net(skb->dev), NULL, skb, skb->dev, NULL, 67 - ip6_rcv_finish); 59 + xfrm6_transport_finish2); 68 60 return -1; 69 61 } 70 62
+57
net/xfrm/xfrm_input.c
··· 8 8 * 9 9 */ 10 10 11 + #include <linux/bottom_half.h> 12 + #include <linux/interrupt.h> 11 13 #include <linux/slab.h> 12 14 #include <linux/module.h> 13 15 #include <linux/netdevice.h> 16 + #include <linux/percpu.h> 14 17 #include <net/dst.h> 15 18 #include <net/ip.h> 16 19 #include <net/xfrm.h> 17 20 #include <net/ip_tunnels.h> 18 21 #include <net/ip6_tunnel.h> 22 + 23 + struct xfrm_trans_tasklet { 24 + struct tasklet_struct tasklet; 25 + struct sk_buff_head queue; 26 + }; 27 + 28 + struct xfrm_trans_cb { 29 + int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb); 30 + }; 31 + 32 + #define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0])) 19 33 20 34 static struct kmem_cache *secpath_cachep __read_mostly; 21 35 ··· 38 24 39 25 static struct gro_cells gro_cells; 40 26 static struct net_device xfrm_napi_dev; 27 + 28 + static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet); 41 29 42 30 int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo) 43 31 { ··· 493 477 } 494 478 EXPORT_SYMBOL(xfrm_input_resume); 495 479 480 + static void xfrm_trans_reinject(unsigned long data) 481 + { 482 + struct xfrm_trans_tasklet *trans = (void *)data; 483 + struct sk_buff_head queue; 484 + struct sk_buff *skb; 485 + 486 + __skb_queue_head_init(&queue); 487 + skb_queue_splice_init(&trans->queue, &queue); 488 + 489 + while ((skb = __skb_dequeue(&queue))) 490 + XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb); 491 + } 492 + 493 + int xfrm_trans_queue(struct sk_buff *skb, 494 + int (*finish)(struct net *, struct sock *, 495 + struct sk_buff *)) 496 + { 497 + struct xfrm_trans_tasklet *trans; 498 + 499 + trans = this_cpu_ptr(&xfrm_trans_tasklet); 500 + 501 + if (skb_queue_len(&trans->queue) >= netdev_max_backlog) 502 + return -ENOBUFS; 503 + 504 + XFRM_TRANS_SKB_CB(skb)->finish = finish; 505 + skb_queue_tail(&trans->queue, skb); 506 + tasklet_schedule(&trans->tasklet); 507 + return 0; 508 + } 509 + EXPORT_SYMBOL(xfrm_trans_queue); 510 + 496 511 void __init xfrm_input_init(void) 497 512 { 498 513 int err; 514 + int i; 499 515 500 516 init_dummy_netdev(&xfrm_napi_dev); 501 517 err = gro_cells_init(&gro_cells, &xfrm_napi_dev); ··· 538 490 sizeof(struct sec_path), 539 491 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 540 492 NULL); 493 + 494 + for_each_possible_cpu(i) { 495 + struct xfrm_trans_tasklet *trans; 496 + 497 + trans = &per_cpu(xfrm_trans_tasklet, i); 498 + __skb_queue_head_init(&trans->queue); 499 + tasklet_init(&trans->tasklet, xfrm_trans_reinject, 500 + (unsigned long)trans); 501 + } 541 502 }