Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: ipv4: propagate routing errors from ip_route_me_harder()

Propagate routing errors from ip_route_me_harder() when dropping a packet
using NF_DROP_ERR(). This makes userspace get the proper error instead of
EPERM for everything.

Example:

# ip r a unreachable default table 100
# ip ru add fwmark 0x1 lookup 100
# iptables -t mangle -A OUTPUT -d 8.8.8.8 -j MARK --set-mark 0x1

Current behaviour:

PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
ping: sendmsg: Operation not permitted
ping: sendmsg: Operation not permitted
ping: sendmsg: Operation not permitted

New behaviour:

PING 8.8.8.8 (8.8.8.8) 56(84) bytes of data.
ping: sendmsg: Network is unreachable
ping: sendmsg: Network is unreachable
ping: sendmsg: Network is unreachable
ping: sendmsg: Network is unreachable

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Patrick McHardy and committed by
Pablo Neira Ayuso
c9e1673a 6b0ee8c0

+14 -9
+4 -4
net/ipv4/netfilter.c
··· 40 40 fl4.flowi4_flags = flags; 41 41 rt = ip_route_output_key(net, &fl4); 42 42 if (IS_ERR(rt)) 43 - return -1; 43 + return PTR_ERR(rt); 44 44 45 45 /* Drop old route. */ 46 46 skb_dst_drop(skb); 47 47 skb_dst_set(skb, &rt->dst); 48 48 49 49 if (skb_dst(skb)->error) 50 - return -1; 50 + return skb_dst(skb)->error; 51 51 52 52 #ifdef CONFIG_XFRM 53 53 if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && ··· 56 56 skb_dst_set(skb, NULL); 57 57 dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0); 58 58 if (IS_ERR(dst)) 59 - return -1; 59 + return PTR_ERR(dst);; 60 60 skb_dst_set(skb, dst); 61 61 } 62 62 #endif ··· 66 66 if (skb_headroom(skb) < hh_len && 67 67 pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), 68 68 0, GFP_ATOMIC)) 69 - return -1; 69 + return -ENOMEM; 70 70 71 71 return 0; 72 72 }
+6 -3
net/ipv4/netfilter/iptable_mangle.c
··· 44 44 u_int8_t tos; 45 45 __be32 saddr, daddr; 46 46 u_int32_t mark; 47 + int err; 47 48 48 49 /* root is playing with raw sockets. */ 49 50 if (skb->len < sizeof(struct iphdr) || ··· 67 66 if (iph->saddr != saddr || 68 67 iph->daddr != daddr || 69 68 skb->mark != mark || 70 - iph->tos != tos) 71 - if (ip_route_me_harder(skb, RTN_UNSPEC)) 72 - ret = NF_DROP; 69 + iph->tos != tos) { 70 + err = ip_route_me_harder(skb, RTN_UNSPEC); 71 + if (err < 0) 72 + ret = NF_DROP_ERR(err); 73 + } 73 74 } 74 75 75 76 return ret;
+4 -2
net/ipv4/netfilter/iptable_nat.c
··· 213 213 const struct nf_conn *ct; 214 214 enum ip_conntrack_info ctinfo; 215 215 unsigned int ret; 216 + int err; 216 217 217 218 /* root is playing with raw sockets. */ 218 219 if (skb->len < sizeof(struct iphdr) || ··· 227 226 228 227 if (ct->tuplehash[dir].tuple.dst.u3.ip != 229 228 ct->tuplehash[!dir].tuple.src.u3.ip) { 230 - if (ip_route_me_harder(skb, RTN_UNSPEC)) 231 - ret = NF_DROP; 229 + err = ip_route_me_harder(skb, RTN_UNSPEC); 230 + if (err < 0) 231 + ret = NF_DROP_ERR(err); 232 232 } 233 233 #ifdef CONFIG_XFRM 234 234 else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&