Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests: netfilter: add selftest for directional zone support

Add a script to exercise NAT port clash resolution with directional zones.

Add net namespaces that use the same IP address and connect them to a
gateway.

Gateway uses policy routing based on iif/mark and conntrack zones to
isolate the client namespaces. In server direction, same zone with NAT
to single address is used.

Then, connect to a server from each client netns, using identical
connection id, i.e. saddr:sport -> daddr:dport.

Expectation is for all connections to succeeed: NAT gatway is
supposed to do port reallocation for each of the (clashing) connections.

This is based on the description/use case provided in the commit message of
deedb59039f111 ("netfilter: nf_conntrack: add direction support for zones").

Cc: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Florian Westphal and committed by
Pablo Neira Ayuso
0f1148ab d2966dc7

+309
+309
tools/testing/selftests/netfilter/nft_nat_zones.sh
··· 1 + #!/bin/bash 2 + # 3 + # Test connection tracking zone and NAT source port reallocation support. 4 + # 5 + 6 + # Kselftest framework requirement - SKIP code is 4. 7 + ksft_skip=4 8 + 9 + # Don't increase too much, 2000 clients should work 10 + # just fine but script can then take several minutes with 11 + # KASAN/debug builds. 12 + maxclients=100 13 + 14 + have_iperf=1 15 + ret=0 16 + 17 + # client1---. 18 + # veth1-. 19 + # | 20 + # NAT Gateway --veth0--> Server 21 + # | | 22 + # veth2-' | 23 + # client2---' | 24 + # .... | 25 + # clientX----vethX---' 26 + 27 + # All clients share identical IP address. 28 + # NAT Gateway uses policy routing and conntrack zones to isolate client 29 + # namespaces. Each client connects to Server, each with colliding tuples: 30 + # clientsaddr:10000 -> serveraddr:dport 31 + # NAT Gateway is supposed to do port reallocation for each of the 32 + # connections. 33 + 34 + sfx=$(mktemp -u "XXXXXXXX") 35 + gw="ns-gw-$sfx" 36 + cl1="ns-cl1-$sfx" 37 + cl2="ns-cl2-$sfx" 38 + srv="ns-srv-$sfx" 39 + 40 + v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null) 41 + v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null) 42 + v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null) 43 + v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null) 44 + v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null) 45 + v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null) 46 + 47 + cleanup() 48 + { 49 + ip netns del $gw 50 + ip netns del $srv 51 + for i in $(seq 1 $maxclients); do 52 + ip netns del ns-cl$i-$sfx 2>/dev/null 53 + done 54 + 55 + sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null 56 + sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null 57 + sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null 58 + sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null 59 + sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null 60 + sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null 61 + } 62 + 63 + nft --version > /dev/null 2>&1 64 + if [ $? -ne 0 ];then 65 + echo "SKIP: Could not run test without nft tool" 66 + exit $ksft_skip 67 + fi 68 + 69 + ip -Version > /dev/null 2>&1 70 + if [ $? -ne 0 ];then 71 + echo "SKIP: Could not run test without ip tool" 72 + exit $ksft_skip 73 + fi 74 + 75 + conntrack -V > /dev/null 2>&1 76 + if [ $? -ne 0 ];then 77 + echo "SKIP: Could not run test without conntrack tool" 78 + exit $ksft_skip 79 + fi 80 + 81 + iperf3 -v >/dev/null 2>&1 82 + if [ $? -ne 0 ];then 83 + have_iperf=0 84 + fi 85 + 86 + ip netns add "$gw" 87 + if [ $? -ne 0 ];then 88 + echo "SKIP: Could not create net namespace $gw" 89 + exit $ksft_skip 90 + fi 91 + ip -net "$gw" link set lo up 92 + 93 + trap cleanup EXIT 94 + 95 + ip netns add "$srv" 96 + if [ $? -ne 0 ];then 97 + echo "SKIP: Could not create server netns $srv" 98 + exit $ksft_skip 99 + fi 100 + 101 + ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv" 102 + ip -net "$gw" link set veth0 up 103 + ip -net "$srv" link set lo up 104 + ip -net "$srv" link set eth0 up 105 + 106 + sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null 107 + sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null 108 + sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null 109 + sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null 110 + sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null 111 + sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null 112 + 113 + for i in $(seq 1 $maxclients);do 114 + cl="ns-cl$i-$sfx" 115 + 116 + ip netns add "$cl" 117 + if [ $? -ne 0 ];then 118 + echo "SKIP: Could not create client netns $cl" 119 + exit $ksft_skip 120 + fi 121 + ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1 122 + if [ $? -ne 0 ];then 123 + echo "SKIP: No virtual ethernet pair device support in kernel" 124 + exit $ksft_skip 125 + fi 126 + done 127 + 128 + for i in $(seq 1 $maxclients);do 129 + cl="ns-cl$i-$sfx" 130 + echo netns exec "$cl" ip link set lo up 131 + echo netns exec "$cl" ip link set eth0 up 132 + echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 133 + echo netns exec "$gw" ip link set veth$i up 134 + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2 135 + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0 136 + 137 + # clients have same IP addresses. 138 + echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 139 + echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 140 + echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0 141 + echo netns exec "$cl" ip route add default via dead:1::2 dev eth0 142 + 143 + # NB: same addresses on client-facing interfaces. 144 + echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i 145 + echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i 146 + 147 + # gw: policy routing 148 + echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i)) 149 + echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i)) 150 + echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i)) 151 + echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i)) 152 + echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i)) 153 + done | ip -batch /dev/stdin 154 + 155 + ip -net "$gw" addr add 10.3.0.1/24 dev veth0 156 + ip -net "$gw" addr add dead:3::1/64 dev veth0 157 + 158 + ip -net "$srv" addr add 10.3.0.99/24 dev eth0 159 + ip -net "$srv" addr add dead:3::99/64 dev eth0 160 + 161 + ip netns exec $gw nft -f /dev/stdin<<EOF 162 + table inet raw { 163 + map iiftomark { 164 + type ifname : mark 165 + } 166 + 167 + map iiftozone { 168 + typeof iifname : ct zone 169 + } 170 + 171 + set inicmp { 172 + flags dynamic 173 + type ipv4_addr . ifname . ipv4_addr 174 + } 175 + set inflows { 176 + flags dynamic 177 + type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service 178 + } 179 + 180 + set inflows6 { 181 + flags dynamic 182 + type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service 183 + } 184 + 185 + chain prerouting { 186 + type filter hook prerouting priority -64000; policy accept; 187 + ct original zone set meta iifname map @iiftozone 188 + meta mark set meta iifname map @iiftomark 189 + 190 + tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter } 191 + add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter } 192 + ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter } 193 + } 194 + 195 + chain nat_postrouting { 196 + type nat hook postrouting priority 0; policy accept; 197 + ct mark set meta mark meta oifname veth0 masquerade 198 + } 199 + 200 + chain mangle_prerouting { 201 + type filter hook prerouting priority -100; policy accept; 202 + ct direction reply meta mark set ct mark 203 + } 204 + } 205 + EOF 206 + 207 + ( echo add element inet raw iiftomark \{ 208 + for i in $(seq 1 $((maxclients-1))); do 209 + echo \"veth$i\" : $i, 210 + done 211 + echo \"veth$maxclients\" : $maxclients \} 212 + echo add element inet raw iiftozone \{ 213 + for i in $(seq 1 $((maxclients-1))); do 214 + echo \"veth$i\" : $i, 215 + done 216 + echo \"veth$maxclients\" : $maxclients \} 217 + ) | ip netns exec $gw nft -f /dev/stdin 218 + 219 + ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null 220 + ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null 221 + ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null 222 + 223 + # useful for debugging: allows to use 'ping' from clients to gateway. 224 + ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null 225 + ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null 226 + 227 + for i in $(seq 1 $maxclients); do 228 + cl="ns-cl$i-$sfx" 229 + ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 & 230 + if [ $? -ne 0 ]; then 231 + echo FAIL: Ping failure from $cl 1>&2 232 + ret=1 233 + break 234 + fi 235 + done 236 + 237 + wait 238 + 239 + for i in $(seq 1 $maxclients); do 240 + ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }" 241 + if [ $? -ne 0 ];then 242 + ret=1 243 + echo "FAIL: counter icmp mismatch for veth$i" 1>&2 244 + ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2 245 + break 246 + fi 247 + done 248 + 249 + ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" 250 + if [ $? -ne 0 ];then 251 + ret=1 252 + echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" 253 + ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2 254 + fi 255 + 256 + if [ $ret -eq 0 ]; then 257 + echo "PASS: ping test from all $maxclients namespaces" 258 + fi 259 + 260 + if [ $have_iperf -eq 0 ];then 261 + echo "SKIP: iperf3 not installed" 262 + if [ $ret -ne 0 ];then 263 + exit $ret 264 + fi 265 + exit $ksft_skip 266 + fi 267 + 268 + ip netns exec $srv iperf3 -s > /dev/null 2>&1 & 269 + iperfpid=$! 270 + sleep 1 271 + 272 + for i in $(seq 1 $maxclients); do 273 + if [ $ret -ne 0 ]; then 274 + break 275 + fi 276 + cl="ns-cl$i-$sfx" 277 + ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null 278 + if [ $? -ne 0 ]; then 279 + echo FAIL: Failure to connect for $cl 1>&2 280 + ip netns exec $gw conntrack -S 1>&2 281 + ret=1 282 + fi 283 + done 284 + if [ $ret -eq 0 ];then 285 + echo "PASS: iperf3 connections for all $maxclients net namespaces" 286 + fi 287 + 288 + kill $iperfpid 289 + wait 290 + 291 + for i in $(seq 1 $maxclients); do 292 + ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null 293 + if [ $? -ne 0 ];then 294 + ret=1 295 + echo "FAIL: can't find expected tcp entry for veth$i" 1>&2 296 + break 297 + fi 298 + done 299 + if [ $ret -eq 0 ];then 300 + echo "PASS: Found client connection for all $maxclients net namespaces" 301 + fi 302 + 303 + ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null 304 + if [ $? -ne 0 ];then 305 + ret=1 306 + echo "FAIL: cannot find return entry on veth0" 1>&2 307 + fi 308 + 309 + exit $ret