Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Remove leftovers from flowtable modules, from Geert Uytterhoeven.

2) Missing refcount increment of conntrack template in nft_ct,
from Florian Westphal.

3) Reduce nft_zone selftest time, also from Florian.

4) Add selftest to cover stateless NAT on fragments, from Florian Westphal.

5) Do not set net_device when for reject packets from the bridge path,
from Phil Sutter.

6) Cancel register tracking info on nft_byteorder operations.

7) Extend nft_concat_range selftest to cover set reload with no elements,
from Florian Westphal.

8) Remove useless update of pointer in chain blob builder, reported
by kbuild test robot.

* git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf:
netfilter: nf_tables: remove assignment with no effect in chain blob builder
selftests: nft_concat_range: add test for reload with no element add/del
netfilter: nft_byteorder: track register operations
netfilter: nft_reject_bridge: Fix for missing reply from prerouting
selftests: netfilter: check stateless nat udp checksum fixup
selftests: netfilter: reduce zone stress test running time
netfilter: nft_ct: fix use after free when attaching zone template
netfilter: Remove flowtable relics
====================

Link: https://lore.kernel.org/r/20220127235235.656931-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+249 -24
+4 -4
net/bridge/netfilter/nft_reject_bridge.c
··· 49 49 { 50 50 struct sk_buff *nskb; 51 51 52 - nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook); 52 + nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook); 53 53 if (!nskb) 54 54 return; 55 55 ··· 65 65 { 66 66 struct sk_buff *nskb; 67 67 68 - nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code); 68 + nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code); 69 69 if (!nskb) 70 70 return; 71 71 ··· 81 81 { 82 82 struct sk_buff *nskb; 83 83 84 - nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook); 84 + nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook); 85 85 if (!nskb) 86 86 return; 87 87 ··· 98 98 { 99 99 struct sk_buff *nskb; 100 100 101 - nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code); 101 + nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code); 102 102 if (!nskb) 103 103 return; 104 104
-4
net/ipv4/netfilter/Kconfig
··· 58 58 59 59 endif # NF_TABLES 60 60 61 - config NF_FLOW_TABLE_IPV4 62 - tristate 63 - select NF_FLOW_TABLE_INET 64 - 65 61 config NF_DUP_IPV4 66 62 tristate "Netfilter IPv4 packet duplication to alternate destination" 67 63 depends on !NF_CONNTRACK || NF_CONNTRACK
-4
net/ipv6/netfilter/Kconfig
··· 47 47 endif # NF_TABLES_IPV6 48 48 endif # NF_TABLES 49 49 50 - config NF_FLOW_TABLE_IPV6 51 - tristate 52 - select NF_FLOW_TABLE_INET 53 - 54 50 config NF_DUP_IPV6 55 51 tristate "Netfilter IPv6 packet duplication to alternate destination" 56 52 depends on !NF_CONNTRACK || NF_CONNTRACK
-3
net/ipv6/netfilter/Makefile
··· 28 28 obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o 29 29 obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o 30 30 31 - # flow table support 32 - obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o 33 - 34 31 # matches 35 32 obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 36 33 obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
net/ipv6/netfilter/nf_flow_table_ipv6.c
-1
net/netfilter/nf_tables_api.c
··· 2011 2011 2012 2012 prule = (struct nft_rule_dp *)ptr; 2013 2013 prule->is_last = 1; 2014 - ptr += offsetof(struct nft_rule_dp, data); 2015 2014 /* blob size does not include the trailer rule */ 2016 2015 } 2017 2016
+12
net/netfilter/nft_byteorder.c
··· 167 167 return -1; 168 168 } 169 169 170 + static bool nft_byteorder_reduce(struct nft_regs_track *track, 171 + const struct nft_expr *expr) 172 + { 173 + struct nft_byteorder *priv = nft_expr_priv(expr); 174 + 175 + track->regs[priv->dreg].selector = NULL; 176 + track->regs[priv->dreg].bitwise = NULL; 177 + 178 + return false; 179 + } 180 + 170 181 static const struct nft_expr_ops nft_byteorder_ops = { 171 182 .type = &nft_byteorder_type, 172 183 .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), 173 184 .eval = nft_byteorder_eval, 174 185 .init = nft_byteorder_init, 175 186 .dump = nft_byteorder_dump, 187 + .reduce = nft_byteorder_reduce, 176 188 }; 177 189 178 190 struct nft_expr_type nft_byteorder_type __read_mostly = {
+4 -1
net/netfilter/nft_ct.c
··· 260 260 ct = this_cpu_read(nft_ct_pcpu_template); 261 261 262 262 if (likely(refcount_read(&ct->ct_general.use) == 1)) { 263 + refcount_inc(&ct->ct_general.use); 263 264 nf_ct_zone_add(ct, &zone); 264 265 } else { 265 - /* previous skb got queued to userspace */ 266 + /* previous skb got queued to userspace, allocate temporary 267 + * one until percpu template can be reused. 268 + */ 266 269 ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); 267 270 if (!ct) { 268 271 regs->verdict.code = NF_DROP;
+71 -1
tools/testing/selftests/netfilter/nft_concat_range.sh
··· 27 27 net6_port_net6_port net_port_mac_proto_net" 28 28 29 29 # Reported bugs, also described by TYPE_ variables below 30 - BUGS="flush_remove_add" 30 + BUGS="flush_remove_add reload" 31 31 32 32 # List of possible paths to pktgen script from kernel tree for performance tests 33 33 PKTGEN_SCRIPT_PATHS=" ··· 352 352 # display display text for test report 353 353 TYPE_flush_remove_add=" 354 354 display Add two elements, flush, re-add 355 + " 356 + 357 + TYPE_reload=" 358 + display net,mac with reload 359 + type_spec ipv4_addr . ether_addr 360 + chain_spec ip daddr . ether saddr 361 + dst addr4 362 + src mac 363 + start 1 364 + count 1 365 + src_delta 2000 366 + tools sendip nc bash 367 + proto udp 368 + 369 + race_repeat 0 370 + 371 + perf_duration 0 355 372 " 356 373 357 374 # Set template for all tests, types and rules are filled in depending on test ··· 1487 1470 nft flush set t s 2>/dev/null || return 1 1488 1471 nft add element t s ${elem2} 2>/dev/null || return 1 1489 1472 done 1473 + nft flush ruleset 1474 + } 1475 + 1476 + # - add ranged element, check that packets match it 1477 + # - reload the set, check packets still match 1478 + test_bug_reload() { 1479 + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} 1480 + rstart=${start} 1481 + 1482 + range_size=1 1483 + for i in $(seq "${start}" $((start + count))); do 1484 + end=$((start + range_size)) 1485 + 1486 + # Avoid negative or zero-sized port ranges 1487 + if [ $((end / 65534)) -gt $((start / 65534)) ]; then 1488 + start=${end} 1489 + end=$((end + 1)) 1490 + fi 1491 + srcstart=$((start + src_delta)) 1492 + srcend=$((end + src_delta)) 1493 + 1494 + add "$(format)" || return 1 1495 + range_size=$((range_size + 1)) 1496 + start=$((end + range_size)) 1497 + done 1498 + 1499 + # check kernel does allocate pcpu sctrach map 1500 + # for reload with no elemet add/delete 1501 + ( echo flush set inet filter test ; 1502 + nft list set inet filter test ) | nft -f - 1503 + 1504 + start=${rstart} 1505 + range_size=1 1506 + 1507 + for i in $(seq "${start}" $((start + count))); do 1508 + end=$((start + range_size)) 1509 + 1510 + # Avoid negative or zero-sized port ranges 1511 + if [ $((end / 65534)) -gt $((start / 65534)) ]; then 1512 + start=${end} 1513 + end=$((end + 1)) 1514 + fi 1515 + srcstart=$((start + src_delta)) 1516 + srcend=$((end + src_delta)) 1517 + 1518 + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do 1519 + send_match "${j}" $((j + src_delta)) || return 1 1520 + done 1521 + 1522 + range_size=$((range_size + 1)) 1523 + start=$((end + range_size)) 1524 + done 1525 + 1490 1526 nft flush ruleset 1491 1527 } 1492 1528
+152
tools/testing/selftests/netfilter/nft_nat.sh
··· 899 899 ip netns exec "$ns0" nft delete table $family nat 900 900 } 901 901 902 + test_stateless_nat_ip() 903 + { 904 + local lret=0 905 + 906 + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null 907 + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null 908 + 909 + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 910 + if [ $? -ne 0 ] ; then 911 + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" 912 + return 1 913 + fi 914 + 915 + ip netns exec "$ns0" nft -f /dev/stdin <<EOF 916 + table ip stateless { 917 + map xlate_in { 918 + typeof meta iifname . ip saddr . ip daddr : ip daddr 919 + elements = { 920 + "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2, 921 + } 922 + } 923 + map xlate_out { 924 + typeof meta iifname . ip saddr . ip daddr : ip daddr 925 + elements = { 926 + "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99 927 + } 928 + } 929 + 930 + chain prerouting { 931 + type filter hook prerouting priority -400; policy accept; 932 + ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in 933 + ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out 934 + } 935 + } 936 + EOF 937 + if [ $? -ne 0 ]; then 938 + echo "SKIP: Could not add ip statless rules" 939 + return $ksft_skip 940 + fi 941 + 942 + reset_counters 943 + 944 + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 945 + if [ $? -ne 0 ] ; then 946 + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" 947 + lret=1 948 + fi 949 + 950 + # ns1 should have seen packets from .2.2, due to stateless rewrite. 951 + expect="packets 1 bytes 84" 952 + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") 953 + if [ $? -ne 0 ]; then 954 + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" 955 + lret=1 956 + fi 957 + 958 + for dir in "in" "out" ; do 959 + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") 960 + if [ $? -ne 0 ]; then 961 + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" 962 + lret=1 963 + fi 964 + done 965 + 966 + # ns1 should not have seen packets from ns2, due to masquerade 967 + expect="packets 0 bytes 0" 968 + for dir in "in" "out" ; do 969 + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") 970 + if [ $? -ne 0 ]; then 971 + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" 972 + lret=1 973 + fi 974 + 975 + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") 976 + if [ $? -ne 0 ]; then 977 + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" 978 + lret=1 979 + fi 980 + done 981 + 982 + reset_counters 983 + 984 + socat -h > /dev/null 2>&1 985 + if [ $? -ne 0 ];then 986 + echo "SKIP: Could not run stateless nat frag test without socat tool" 987 + if [ $lret -eq 0 ]; then 988 + return $ksft_skip 989 + fi 990 + 991 + ip netns exec "$ns0" nft delete table ip stateless 992 + return $lret 993 + fi 994 + 995 + local tmpfile=$(mktemp) 996 + dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null 997 + 998 + local outfile=$(mktemp) 999 + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null & 1000 + sc_r=$! 1001 + 1002 + sleep 1 1003 + # re-do with large ping -> ip fragmentation 1004 + ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null 1005 + if [ $? -ne 0 ] ; then 1006 + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 1007 + lret=1 1008 + fi 1009 + 1010 + wait 1011 + 1012 + cmp "$tmpfile" "$outfile" 1013 + if [ $? -ne 0 ]; then 1014 + ls -l "$tmpfile" "$outfile" 1015 + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 1016 + lret=1 1017 + fi 1018 + 1019 + rm -f "$tmpfile" "$outfile" 1020 + 1021 + # ns1 should have seen packets from 2.2, due to stateless rewrite. 1022 + expect="packets 3 bytes 4164" 1023 + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") 1024 + if [ $? -ne 0 ]; then 1025 + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" 1026 + lret=1 1027 + fi 1028 + 1029 + ip netns exec "$ns0" nft delete table ip stateless 1030 + if [ $? -ne 0 ]; then 1031 + echo "ERROR: Could not delete table ip stateless" 1>&2 1032 + lret=1 1033 + fi 1034 + 1035 + test $lret -eq 0 && echo "PASS: IP statless for $ns2" 1036 + 1037 + return $lret 1038 + } 1039 + 902 1040 # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 903 1041 for i in 0 1 2; do 904 1042 ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF ··· 1103 965 EOF 1104 966 done 1105 967 968 + # special case for stateless nat check, counter needs to 969 + # be done before (input) ip defragmentation 970 + ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF 971 + table inet filter { 972 + counter ns0insl {} 973 + 974 + chain pre { 975 + type filter hook prerouting priority -400; policy accept; 976 + ip saddr 10.0.2.2 counter name "ns0insl" 977 + } 978 + } 979 + EOF 980 + 1106 981 sleep 3 1107 982 # test basic connectivity 1108 983 for i in 1 2; do ··· 1170 1019 $test_inet_nat && test_redirect6 inet 1171 1020 1172 1021 test_port_shadowing 1022 + test_stateless_nat_ip 1173 1023 1174 1024 if [ $ret -ne 0 ];then 1175 1025 echo -n "FAIL: "
+6 -6
tools/testing/selftests/netfilter/nft_zones_many.sh
··· 9 9 # Kselftest framework requirement - SKIP code is 4. 10 10 ksft_skip=4 11 11 12 - zones=20000 12 + zones=2000 13 13 have_ct_tool=0 14 14 ret=0 15 15 ··· 75 75 76 76 while [ $i -lt $max_zones ]; do 77 77 local start=$(date +%s%3N) 78 - i=$((i + 10000)) 78 + i=$((i + 1000)) 79 79 j=$((j + 1)) 80 80 # nft rule in output places each packet in a different zone. 81 - dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 81 + dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 82 82 if [ $? -ne 0 ] ;then 83 83 ret=1 84 84 break ··· 86 86 87 87 stop=$(date +%s%3N) 88 88 local duration=$((stop-start)) 89 - echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)" 89 + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" 90 90 done 91 91 92 92 if [ $have_ct_tool -eq 1 ]; then ··· 128 128 break 129 129 fi 130 130 131 - if [ $((i%10000)) -eq 0 ];then 131 + if [ $((i%1000)) -eq 0 ];then 132 132 stop=$(date +%s%3N) 133 133 134 134 local duration=$((stop-start)) 135 - echo "PASS: added 10000 entries in $duration ms (now $i total)" 135 + echo "PASS: added 1000 entries in $duration ms (now $i total)" 136 136 start=$stop 137 137 fi 138 138 done