Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests: netfilter: add selftest for ipip pmtu discovery with enabled connection tracking

Convert Christians bug description into a reproducer.

Cc: Shuah Khan <shuah@kernel.org>
Reported-by: Christian Perle <christian.perle@secunet.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Florian Westphal and committed by
Jakub Kicinski
9e7a67de f3562f5e

+208 -1
+2 -1
tools/testing/selftests/netfilter/Makefile
··· 4 4 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ 5 5 conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ 6 6 nft_concat_range.sh nft_conntrack_helper.sh \ 7 - nft_queue.sh nft_meta.sh 7 + nft_queue.sh nft_meta.sh \ 8 + ipip-conntrack-mtu.sh 8 9 9 10 LDLIBS = -lmnl 10 11 TEST_GEN_FILES = nf-queue
+206
tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + 4 + # Kselftest framework requirement - SKIP code is 4. 5 + ksft_skip=4 6 + 7 + # Conntrack needs to reassemble fragments in order to have complete 8 + # packets for rule matching. Reassembly can lead to packet loss. 9 + 10 + # Consider the following setup: 11 + # +--------+ +---------+ +--------+ 12 + # |Router A|-------|Wanrouter|-------|Router B| 13 + # | |.IPIP..| |..IPIP.| | 14 + # +--------+ +---------+ +--------+ 15 + # / mtu 1400 \ 16 + # / \ 17 + #+--------+ +--------+ 18 + #|Client A| |Client B| 19 + #| | | | 20 + #+--------+ +--------+ 21 + 22 + # Router A and Router B use IPIP tunnel interfaces to tunnel traffic 23 + # between Client A and Client B over WAN. Wanrouter has MTU 1400 set 24 + # on its interfaces. 25 + 26 + rnd=$(mktemp -u XXXXXXXX) 27 + rx=$(mktemp) 28 + 29 + r_a="ns-ra-$rnd" 30 + r_b="ns-rb-$rnd" 31 + r_w="ns-rw-$rnd" 32 + c_a="ns-ca-$rnd" 33 + c_b="ns-cb-$rnd" 34 + 35 + checktool (){ 36 + if ! $1 > /dev/null 2>&1; then 37 + echo "SKIP: Could not $2" 38 + exit $ksft_skip 39 + fi 40 + } 41 + 42 + checktool "iptables --version" "run test without iptables" 43 + checktool "ip -Version" "run test without ip tool" 44 + checktool "which nc" "run test without nc (netcat)" 45 + checktool "ip netns add ${r_a}" "create net namespace" 46 + 47 + for n in ${r_b} ${r_w} ${c_a} ${c_b};do 48 + ip netns add ${n} 49 + done 50 + 51 + cleanup() { 52 + for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do 53 + ip netns del ${n} 54 + done 55 + rm -f ${rx} 56 + } 57 + 58 + trap cleanup EXIT 59 + 60 + test_path() { 61 + msg="$1" 62 + 63 + ip netns exec ${c_b} nc -n -w 3 -q 3 -u -l -p 5000 > ${rx} < /dev/null & 64 + 65 + sleep 1 66 + for i in 1 2 3; do 67 + head -c1400 /dev/zero | tr "\000" "a" | ip netns exec ${c_a} nc -n -w 1 -u 192.168.20.2 5000 68 + done 69 + 70 + wait 71 + 72 + bytes=$(wc -c < ${rx}) 73 + 74 + if [ $bytes -eq 1400 ];then 75 + echo "OK: PMTU $msg connection tracking" 76 + else 77 + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" 78 + exit 1 79 + fi 80 + } 81 + 82 + # Detailed setup for Router A 83 + # --------------------------- 84 + # Interfaces: 85 + # eth0: 10.2.2.1/24 86 + # eth1: 192.168.10.1/24 87 + # ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 88 + # Routes: 89 + # 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) 90 + # 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) 91 + # No iptables rules at all. 92 + 93 + ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w} 94 + ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a} 95 + 96 + l_addr="10.2.2.1" 97 + r_addr="10.4.4.1" 98 + ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip 99 + 100 + for dev in lo veth0 veth1 ipip0; do 101 + ip -net ${r_a} link set $dev up 102 + done 103 + 104 + ip -net ${r_a} addr add 10.2.2.1/24 dev veth0 105 + ip -net ${r_a} addr add 192.168.10.1/24 dev veth1 106 + 107 + ip -net ${r_a} route add 192.168.20.0/24 dev ipip0 108 + ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254 109 + 110 + ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null 111 + 112 + # Detailed setup for Router B 113 + # --------------------------- 114 + # Interfaces: 115 + # eth0: 10.4.4.1/24 116 + # eth1: 192.168.20.1/24 117 + # ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 118 + # Routes: 119 + # 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) 120 + # 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) 121 + # No iptables rules at all. 122 + 123 + ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w} 124 + ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b} 125 + 126 + l_addr="10.4.4.1" 127 + r_addr="10.2.2.1" 128 + 129 + ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip 130 + 131 + for dev in lo veth0 veth1 ipip0; do 132 + ip -net ${r_b} link set $dev up 133 + done 134 + 135 + ip -net ${r_b} addr add 10.4.4.1/24 dev veth0 136 + ip -net ${r_b} addr add 192.168.20.1/24 dev veth1 137 + 138 + ip -net ${r_b} route add 192.168.10.0/24 dev ipip0 139 + ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254 140 + ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null 141 + 142 + # Client A 143 + ip -net ${c_a} addr add 192.168.10.2/24 dev veth0 144 + ip -net ${c_a} link set dev lo up 145 + ip -net ${c_a} link set dev veth0 up 146 + ip -net ${c_a} route add default via 192.168.10.1 147 + 148 + # Client A 149 + ip -net ${c_b} addr add 192.168.20.2/24 dev veth0 150 + ip -net ${c_b} link set dev veth0 up 151 + ip -net ${c_b} link set dev lo up 152 + ip -net ${c_b} route add default via 192.168.20.1 153 + 154 + # Wan 155 + ip -net ${r_w} addr add 10.2.2.254/24 dev veth0 156 + ip -net ${r_w} addr add 10.4.4.254/24 dev veth1 157 + 158 + ip -net ${r_w} link set dev lo up 159 + ip -net ${r_w} link set dev veth0 up mtu 1400 160 + ip -net ${r_w} link set dev veth1 up mtu 1400 161 + 162 + ip -net ${r_a} link set dev veth0 mtu 1400 163 + ip -net ${r_b} link set dev veth0 mtu 1400 164 + 165 + ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null 166 + 167 + # Path MTU discovery 168 + # ------------------ 169 + # Running tracepath from Client A to Client B shows PMTU discovery is working 170 + # as expected: 171 + # 172 + # clienta:~# tracepath 192.168.20.2 173 + # 1?: [LOCALHOST] pmtu 1500 174 + # 1: 192.168.10.1 0.867ms 175 + # 1: 192.168.10.1 0.302ms 176 + # 2: 192.168.10.1 0.312ms pmtu 1480 177 + # 2: no reply 178 + # 3: 192.168.10.1 0.510ms pmtu 1380 179 + # 3: 192.168.20.2 2.320ms reached 180 + # Resume: pmtu 1380 hops 3 back 3 181 + 182 + # ip netns exec ${c_a} traceroute --mtu 192.168.20.2 183 + 184 + # Router A has learned PMTU (1400) to Router B from Wanrouter. 185 + # Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B 186 + # from Router A. 187 + 188 + #Send large UDP packet 189 + #--------------------- 190 + #Now we send a 1400 bytes UDP packet from Client A to Client B: 191 + 192 + # clienta:~# head -c1400 /dev/zero | tr "\000" "a" | nc -u 192.168.20.2 5000 193 + test_path "without" 194 + 195 + # The IPv4 stack on Client A already knows the PMTU to Client B, so the 196 + # UDP packet is sent as two fragments (1380 + 20). Router A forwards the 197 + # fragments between eth1 and ipip0. The fragments fit into the tunnel and 198 + # reach their destination. 199 + 200 + #When sending the large UDP packet again, Router A now reassembles the 201 + #fragments before routing the packet over ipip0. The resulting IPIP 202 + #packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is 203 + #dropped on Router A before sending. 204 + 205 + ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW 206 + test_path "with"