Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nft_chain_route: re-route before skb is queued to userspace

Imagine such situation, user add the following nft rules, and queue
the packets to userspace for further check:
# ip rule add fwmark 0x0/0x1 lookup eth0
# ip rule add fwmark 0x1/0x1 lookup eth1
# nft add table filter
# nft add chain filter output {type route hook output priority 0 \;}
# nft add rule filter output mark set 0x1
# nft add rule filter output queue num 0

But after we reinject the skbuff, the packet will be sent via the
wrong route, i.e. in this case, the packet will be routed via eth0
table, not eth1 table. Because we skip to do re-route when verdict
is NF_QUEUE, even if the mark was changed.

Acctually, we should not touch sk_buff if verdict is NF_DROP or
NF_STOLEN, and when re-route fails, return NF_DROP with error code.
This is consistent with the mangle table in iptables.

Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Liping Zhang and committed by
Pablo Neira Ayuso
d1a6cba5 5210d393

+14 -7
+7 -4
net/ipv4/netfilter/nft_chain_route_ipv4.c
··· 31 31 __be32 saddr, daddr; 32 32 u_int8_t tos; 33 33 const struct iphdr *iph; 34 + int err; 34 35 35 36 /* root is playing with raw sockets. */ 36 37 if (skb->len < sizeof(struct iphdr) || ··· 47 46 tos = iph->tos; 48 47 49 48 ret = nft_do_chain(&pkt, priv); 50 - if (ret != NF_DROP && ret != NF_QUEUE) { 49 + if (ret != NF_DROP && ret != NF_STOLEN) { 51 50 iph = ip_hdr(skb); 52 51 53 52 if (iph->saddr != saddr || 54 53 iph->daddr != daddr || 55 54 skb->mark != mark || 56 - iph->tos != tos) 57 - if (ip_route_me_harder(state->net, skb, RTN_UNSPEC)) 58 - ret = NF_DROP; 55 + iph->tos != tos) { 56 + err = ip_route_me_harder(state->net, skb, RTN_UNSPEC); 57 + if (err < 0) 58 + ret = NF_DROP_ERR(err); 59 + } 59 60 } 60 61 return ret; 61 62 }
+7 -3
net/ipv6/netfilter/nft_chain_route_ipv6.c
··· 31 31 struct in6_addr saddr, daddr; 32 32 u_int8_t hop_limit; 33 33 u32 mark, flowlabel; 34 + int err; 34 35 35 36 /* malformed packet, drop it */ 36 37 if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) ··· 47 46 flowlabel = *((u32 *)ipv6_hdr(skb)); 48 47 49 48 ret = nft_do_chain(&pkt, priv); 50 - if (ret != NF_DROP && ret != NF_QUEUE && 49 + if (ret != NF_DROP && ret != NF_STOLEN && 51 50 (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || 52 51 memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || 53 52 skb->mark != mark || 54 53 ipv6_hdr(skb)->hop_limit != hop_limit || 55 - flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) 56 - return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP; 54 + flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) { 55 + err = ip6_route_me_harder(state->net, skb); 56 + if (err < 0) 57 + ret = NF_DROP_ERR(err); 58 + } 57 59 58 60 return ret; 59 61 }