Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Fix broken listing of set elements when table has an owner.

2) Fix conntrack refcount leak in ctnetlink with related conntrack
entries, from Hangyu Hua.

3) Fix use-after-free/double-free in ctnetlink conntrack insert path,
from Florian Westphal.

4) Fix ip6t_rpfilter with VRF, from Phil Sutter.

5) Fix use-after-free in ebtables reported by syzbot, also from Florian.

6) Use skb->len in xt_length to deal with IPv6 jumbo packets,
from Xin Long.

7) Fix NETLINK_LISTEN_ALL_NSID with ctnetlink, from Florian Westphal.

8) Fix memleak in {ip_,ip6_,arp_}tables in ENOMEM error case,
from Pavel Tikhomirov.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
netfilter: x_tables: fix percpu counter block leak on error path when creating new netns
netfilter: ctnetlink: make event listener tracking global
netfilter: xt_length: use skb len to match in length_mt6
netfilter: ebtables: fix table blob use-after-free
netfilter: ip6t_rpfilter: Fix regression with VRF interfaces
netfilter: conntrack: fix rmmod double-free race
netfilter: ctnetlink: fix possible refcount leak in ctnetlink_create_conntrack()
netfilter: nf_tables: allow to fetch set elements when table has an owner
====================

Link: https://lore.kernel.org/r/20230222092137.88637-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+79 -36
+5
include/linux/netfilter.h
··· 491 491 */ 492 492 DECLARE_PER_CPU(bool, nf_skb_duplicated); 493 493 494 + /** 495 + * Contains bitmask of ctnetlink event subscribers, if any. 496 + * Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag. 497 + */ 498 + extern u8 nf_ctnetlink_has_listener; 494 499 #endif /*__LINUX_NETFILTER_H*/
-1
include/net/netns/conntrack.h
··· 95 95 96 96 struct netns_ct { 97 97 #ifdef CONFIG_NF_CONNTRACK_EVENTS 98 - u8 ctnetlink_has_listener; 99 98 bool ecache_dwork_pending; 100 99 #endif 101 100 u8 sysctl_log_invalid; /* Log invalid packets */
+1 -1
net/bridge/netfilter/ebtables.c
··· 1090 1090 1091 1091 audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries, 1092 1092 AUDIT_XT_OP_REPLACE, GFP_KERNEL); 1093 - return ret; 1093 + return 0; 1094 1094 1095 1095 free_unlock: 1096 1096 mutex_unlock(&ebt_mutex);
+4
net/ipv4/netfilter/arp_tables.c
··· 1525 1525 1526 1526 new_table = xt_register_table(net, table, &bootstrap, newinfo); 1527 1527 if (IS_ERR(new_table)) { 1528 + struct arpt_entry *iter; 1529 + 1530 + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) 1531 + cleanup_entry(iter, net); 1528 1532 xt_free_table_info(newinfo); 1529 1533 return PTR_ERR(new_table); 1530 1534 }
+5 -2
net/ipv4/netfilter/ip_tables.c
··· 1045 1045 struct xt_counters *counters; 1046 1046 struct ipt_entry *iter; 1047 1047 1048 - ret = 0; 1049 1048 counters = xt_counters_alloc(num_counters); 1050 1049 if (!counters) { 1051 1050 ret = -ENOMEM; ··· 1090 1091 net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); 1091 1092 } 1092 1093 vfree(counters); 1093 - return ret; 1094 + return 0; 1094 1095 1095 1096 put_module: 1096 1097 module_put(t->me); ··· 1741 1742 1742 1743 new_table = xt_register_table(net, table, &bootstrap, newinfo); 1743 1744 if (IS_ERR(new_table)) { 1745 + struct ipt_entry *iter; 1746 + 1747 + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) 1748 + cleanup_entry(iter, net); 1744 1749 xt_free_table_info(newinfo); 1745 1750 return PTR_ERR(new_table); 1746 1751 }
+5 -2
net/ipv6/netfilter/ip6_tables.c
··· 1062 1062 struct xt_counters *counters; 1063 1063 struct ip6t_entry *iter; 1064 1064 1065 - ret = 0; 1066 1065 counters = xt_counters_alloc(num_counters); 1067 1066 if (!counters) { 1068 1067 ret = -ENOMEM; ··· 1107 1108 net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); 1108 1109 } 1109 1110 vfree(counters); 1110 - return ret; 1111 + return 0; 1111 1112 1112 1113 put_module: 1113 1114 module_put(t->me); ··· 1750 1751 1751 1752 new_table = xt_register_table(net, table, &bootstrap, newinfo); 1752 1753 if (IS_ERR(new_table)) { 1754 + struct ip6t_entry *iter; 1755 + 1756 + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) 1757 + cleanup_entry(iter, net); 1753 1758 xt_free_table_info(newinfo); 1754 1759 return PTR_ERR(new_table); 1755 1760 }
+3 -1
net/ipv6/netfilter/ip6t_rpfilter.c
··· 72 72 goto out; 73 73 } 74 74 75 - if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) 75 + if (rt->rt6i_idev->dev == dev || 76 + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex || 77 + (flags & XT_RPFILTER_LOOSE)) 76 78 ret = true; 77 79 out: 78 80 ip6_rt_put(rt);
+3
net/netfilter/core.c
··· 669 669 EXPORT_SYMBOL_GPL(nf_ct_hook); 670 670 671 671 #if IS_ENABLED(CONFIG_NF_CONNTRACK) 672 + u8 nf_ctnetlink_has_listener; 673 + EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener); 674 + 672 675 const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly; 673 676 EXPORT_SYMBOL_GPL(nf_nat_hook); 674 677
-1
net/netfilter/nf_conntrack_bpf.c
··· 381 381 struct nf_conn *nfct = (struct nf_conn *)nfct_i; 382 382 int err; 383 383 384 - nfct->status |= IPS_CONFIRMED; 385 384 err = nf_conntrack_hash_check_insert(nfct); 386 385 if (err < 0) { 387 386 nf_conntrack_free(nfct);
+15 -10
net/netfilter/nf_conntrack_core.c
··· 884 884 885 885 zone = nf_ct_zone(ct); 886 886 887 - if (!nf_ct_ext_valid_pre(ct->ext)) { 888 - NF_CT_STAT_INC_ATOMIC(net, insert_failed); 889 - return -ETIMEDOUT; 890 - } 887 + if (!nf_ct_ext_valid_pre(ct->ext)) 888 + return -EAGAIN; 891 889 892 890 local_bh_disable(); 893 891 do { ··· 920 922 goto chaintoolong; 921 923 } 922 924 925 + /* If genid has changed, we can't insert anymore because ct 926 + * extensions could have stale pointers and nf_ct_iterate_destroy 927 + * might have completed its table scan already. 928 + * 929 + * Increment of the ext genid right after this check is fine: 930 + * nf_ct_iterate_destroy blocks until locks are released. 931 + */ 932 + if (!nf_ct_ext_valid_post(ct->ext)) { 933 + err = -EAGAIN; 934 + goto out; 935 + } 936 + 937 + ct->status |= IPS_CONFIRMED; 923 938 smp_wmb(); 924 939 /* The caller holds a reference to this object */ 925 940 refcount_set(&ct->ct_general.use, 2); ··· 940 929 nf_conntrack_double_unlock(hash, reply_hash); 941 930 NF_CT_STAT_INC(net, insert); 942 931 local_bh_enable(); 943 - 944 - if (!nf_ct_ext_valid_post(ct->ext)) { 945 - nf_ct_kill(ct); 946 - NF_CT_STAT_INC_ATOMIC(net, drop); 947 - return -ETIMEDOUT; 948 - } 949 932 950 933 return 0; 951 934 chaintoolong:
+1 -1
net/netfilter/nf_conntrack_ecache.c
··· 309 309 break; 310 310 return true; 311 311 case 2: /* autodetect: no event listener, don't allocate extension. */ 312 - if (!READ_ONCE(net->ct.ctnetlink_has_listener)) 312 + if (!READ_ONCE(nf_ctnetlink_has_listener)) 313 313 return true; 314 314 fallthrough; 315 315 case 1:
+4 -4
net/netfilter/nf_conntrack_netlink.c
··· 2316 2316 nfct_seqadj_ext_add(ct); 2317 2317 nfct_synproxy_ext_add(ct); 2318 2318 2319 - /* we must add conntrack extensions before confirmation. */ 2320 - ct->status |= IPS_CONFIRMED; 2321 - 2322 2319 if (cda[CTA_STATUS]) { 2323 2320 err = ctnetlink_change_status(ct, cda); 2324 2321 if (err < 0) ··· 2372 2375 2373 2376 err = nf_conntrack_hash_check_insert(ct); 2374 2377 if (err < 0) 2375 - goto err2; 2378 + goto err3; 2376 2379 2377 2380 rcu_read_unlock(); 2378 2381 2379 2382 return ct; 2380 2383 2384 + err3: 2385 + if (ct->master) 2386 + nf_ct_put(ct->master); 2381 2387 err2: 2382 2388 rcu_read_unlock(); 2383 2389 err1:
+1 -1
net/netfilter/nf_tables_api.c
··· 5507 5507 int rem, err = 0; 5508 5508 5509 5509 table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, 5510 - genmask, NETLINK_CB(skb).portid); 5510 + genmask, 0); 5511 5511 if (IS_ERR(table)) { 5512 5512 NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); 5513 5513 return PTR_ERR(table);
+5 -4
net/netfilter/nfnetlink.c
··· 29 29 30 30 #include <net/netlink.h> 31 31 #include <net/netns/generic.h> 32 + #include <linux/netfilter.h> 32 33 #include <linux/netfilter/nfnetlink.h> 33 34 34 35 MODULE_LICENSE("GPL"); ··· 686 685 group_bit = (1 << group); 687 686 688 687 spin_lock(&nfnl_grp_active_lock); 689 - v = READ_ONCE(net->ct.ctnetlink_has_listener); 688 + v = READ_ONCE(nf_ctnetlink_has_listener); 690 689 if ((v & group_bit) == 0) { 691 690 v |= group_bit; 692 691 693 692 /* read concurrently without nfnl_grp_active_lock held. */ 694 - WRITE_ONCE(net->ct.ctnetlink_has_listener, v); 693 + WRITE_ONCE(nf_ctnetlink_has_listener, v); 695 694 } 696 695 697 696 spin_unlock(&nfnl_grp_active_lock); ··· 745 744 746 745 spin_lock(&nfnl_grp_active_lock); 747 746 if (!nfnetlink_has_listeners(net, group)) { 748 - u8 v = READ_ONCE(net->ct.ctnetlink_has_listener); 747 + u8 v = READ_ONCE(nf_ctnetlink_has_listener); 749 748 750 749 v &= ~group_bit; 751 750 752 751 /* read concurrently without nfnl_grp_active_lock held. */ 753 - WRITE_ONCE(net->ct.ctnetlink_has_listener, v); 752 + WRITE_ONCE(nf_ctnetlink_has_listener, v); 754 753 } 755 754 spin_unlock(&nfnl_grp_active_lock); 756 755 #endif
+1 -2
net/netfilter/xt_length.c
··· 30 30 length_mt6(const struct sk_buff *skb, struct xt_action_param *par) 31 31 { 32 32 const struct xt_length_info *info = par->matchinfo; 33 - const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + 34 - sizeof(struct ipv6hdr); 33 + u32 pktlen = skb->len; 35 34 36 35 return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; 37 36 }
+26 -6
tools/testing/selftests/netfilter/rpath.sh
··· 62 62 ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad 63 63 64 64 # firewall matches to test 65 - [ -n "$iptables" ] && ip netns exec "$ns2" \ 66 - "$iptables" -t raw -A PREROUTING -s 192.168.0.0/16 -m rpfilter 67 - [ -n "$ip6tables" ] && ip netns exec "$ns2" \ 68 - "$ip6tables" -t raw -A PREROUTING -s fec0::/16 -m rpfilter 65 + [ -n "$iptables" ] && { 66 + common='-t raw -A PREROUTING -s 192.168.0.0/16' 67 + ip netns exec "$ns2" "$iptables" $common -m rpfilter 68 + ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert 69 + } 70 + [ -n "$ip6tables" ] && { 71 + common='-t raw -A PREROUTING -s fec0::/16' 72 + ip netns exec "$ns2" "$ip6tables" $common -m rpfilter 73 + ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert 74 + } 69 75 [ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF 70 76 table inet t { 71 77 chain c { ··· 95 89 [ -n "$1" ] || return 0 96 90 ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0" 97 91 } 92 + ipt_zero_reverse_rule() { # (command) 93 + [ -n "$1" ] || return 0 94 + ip netns exec "$ns2" "$1" -t raw -vS | \ 95 + grep -q -- "-m rpfilter --invert -c 0 0" 96 + } 98 97 nft_zero_rule() { # (family) 99 98 [ -n "$nft" ] || return 0 100 99 ip netns exec "$ns2" "$nft" list chain inet t c | \ ··· 112 101 ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null 113 102 } 114 103 115 - testrun() { 116 - # clear counters first 104 + clear_counters() { 117 105 [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z 118 106 [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z 119 107 if [ -n "$nft" ]; then ··· 121 111 ip netns exec "$ns2" $nft -s list table inet t; 122 112 ) | ip netns exec "$ns2" $nft -f - 123 113 fi 114 + } 115 + 116 + testrun() { 117 + clear_counters 124 118 125 119 # test 1: martian traffic should fail rpfilter matches 126 120 netns_ping "$ns1" -I v0 192.168.42.1 && \ ··· 134 120 135 121 ipt_zero_rule "$iptables" || die "iptables matched martian" 136 122 ipt_zero_rule "$ip6tables" || die "ip6tables matched martian" 123 + ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian" 124 + ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian" 137 125 nft_zero_rule ip || die "nft IPv4 matched martian" 138 126 nft_zero_rule ip6 || die "nft IPv6 matched martian" 127 + 128 + clear_counters 139 129 140 130 # test 2: rpfilter match should pass for regular traffic 141 131 netns_ping "$ns1" 192.168.23.1 || \ ··· 149 131 150 132 ipt_zero_rule "$iptables" && die "iptables match not effective" 151 133 ipt_zero_rule "$ip6tables" && die "ip6tables match not effective" 134 + ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective" 135 + ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective" 152 136 nft_zero_rule ip && die "nft IPv4 match not effective" 153 137 nft_zero_rule ip6 && die "nft IPv6 match not effective" 154 138