Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter/IPVS fixes for net

The following patchset contains Netfilter/IPVS fixes for your net tree:

1) Fix crash when dumping rules after conversion to RCU,
from Florian Westphal.

2) Fix incorrect hook reinjection from nf_queue in case NF_REPEAT,
from Jagdish Motwani.

3) Fix check for route existence in fib extension, from Phil Sutter.

4) Fix use after free in ip_vs_in() hook, from YueHaibing.

5) Check for veth existence from netfilter selftests,
from Jeffrin Jose T.

6) Checksum corruption in UDP NAT helpers due to typo,
from Florian Westphal.

7) Pass up packets to classic forwarding path regardless of
IPv4 DF bit, patch for the flowtable infrastructure from Florian.

8) Set liberal TCP tracking for flows that are placed in the
flowtable, in case they need to go back to classic forwarding path,
also from Florian.

9) Don't add flow with sequence adjustment to flowtable, from Florian.

10) Skip IPv4 options from IPv6 datapath in flowtable, from Florian.

11) Add selftest for the flowtable infrastructure, from Florian.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+375 -63
+1 -1
include/net/netfilter/nft_fib.h
··· 34 34 const struct nft_pktinfo *pkt); 35 35 36 36 void nft_fib_store_result(void *reg, const struct nft_fib *priv, 37 - const struct nft_pktinfo *pkt, int index); 37 + const struct net_device *dev); 38 38 #endif
+3 -20
net/ipv4/netfilter/nft_fib_ipv4.c
··· 58 58 } 59 59 EXPORT_SYMBOL_GPL(nft_fib4_eval_type); 60 60 61 - static int get_ifindex(const struct net_device *dev) 62 - { 63 - return dev ? dev->ifindex : 0; 64 - } 65 - 66 61 void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, 67 62 const struct nft_pktinfo *pkt) 68 63 { ··· 89 94 90 95 if (nft_hook(pkt) == NF_INET_PRE_ROUTING && 91 96 nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { 92 - nft_fib_store_result(dest, priv, pkt, 93 - nft_in(pkt)->ifindex); 97 + nft_fib_store_result(dest, priv, nft_in(pkt)); 94 98 return; 95 99 } 96 100 ··· 102 108 if (ipv4_is_zeronet(iph->saddr)) { 103 109 if (ipv4_is_lbcast(iph->daddr) || 104 110 ipv4_is_local_multicast(iph->daddr)) { 105 - nft_fib_store_result(dest, priv, pkt, 106 - get_ifindex(pkt->skb->dev)); 111 + nft_fib_store_result(dest, priv, pkt->skb->dev); 107 112 return; 108 113 } 109 114 } ··· 143 150 found = oif; 144 151 } 145 152 146 - switch (priv->result) { 147 - case NFT_FIB_RESULT_OIF: 148 - *dest = found->ifindex; 149 - break; 150 - case NFT_FIB_RESULT_OIFNAME: 151 - strncpy((char *)dest, found->name, IFNAMSIZ); 152 - break; 153 - default: 154 - WARN_ON_ONCE(1); 155 - break; 156 - } 153 + nft_fib_store_result(dest, priv, found); 157 154 } 158 155 EXPORT_SYMBOL_GPL(nft_fib4_eval); 159 156
+2 -14
net/ipv6/netfilter/nft_fib_ipv6.c
··· 169 169 170 170 if (nft_hook(pkt) == NF_INET_PRE_ROUTING && 171 171 nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { 172 - nft_fib_store_result(dest, priv, pkt, 173 - nft_in(pkt)->ifindex); 172 + nft_fib_store_result(dest, priv, nft_in(pkt)); 174 173 return; 175 174 } 176 175 ··· 186 187 if (oif && oif != rt->rt6i_idev->dev) 187 188 goto put_rt_err; 188 189 189 - switch (priv->result) { 190 - case NFT_FIB_RESULT_OIF: 191 - *dest = rt->rt6i_idev->dev->ifindex; 192 - break; 193 - case NFT_FIB_RESULT_OIFNAME: 194 - strncpy((char *)dest, rt->rt6i_idev->dev->name, IFNAMSIZ); 195 - break; 196 - default: 197 - WARN_ON_ONCE(1); 198 - break; 199 - } 200 - 190 + nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); 201 191 put_rt_err: 202 192 ip6_rt_put(rt); 203 193 }
+1 -1
net/netfilter/ipvs/ip_vs_core.c
··· 2312 2312 { 2313 2313 struct netns_ipvs *ipvs = net_ipvs(net); 2314 2314 2315 - nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 2316 2315 ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */ 2317 2316 ip_vs_conn_net_cleanup(ipvs); 2318 2317 ip_vs_app_net_cleanup(ipvs); ··· 2326 2327 { 2327 2328 struct netns_ipvs *ipvs = net_ipvs(net); 2328 2329 EnterFunction(2); 2330 + nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); 2329 2331 ipvs->enable = 0; /* Disable packet reception */ 2330 2332 smp_wmb(); 2331 2333 ip_vs_sync_net_cleanup(ipvs);
+1 -2
net/netfilter/nf_flow_table_ip.c
··· 244 244 rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache; 245 245 outdev = rt->dst.dev; 246 246 247 - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) && 248 - (ip_hdr(skb)->frag_off & htons(IP_DF)) != 0) 247 + if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu))) 249 248 return NF_ACCEPT; 250 249 251 250 if (skb_try_make_writable(skb, sizeof(*iph)))
+1 -1
net/netfilter/nf_nat_helper.c
··· 170 170 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) 171 171 return true; 172 172 173 - nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP, 173 + nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_UDP, 174 174 udph, &udph->check, datalen, oldlen); 175 175 176 176 return true;
+1
net/netfilter/nf_queue.c
··· 255 255 repeat: 256 256 verdict = nf_hook_entry_hookfn(hook, skb, state); 257 257 if (verdict != NF_ACCEPT) { 258 + *index = i; 258 259 if (verdict != NF_REPEAT) 259 260 return verdict; 260 261 goto repeat;
+11 -9
net/netfilter/nf_tables_api.c
··· 2270 2270 u32 flags, int family, 2271 2271 const struct nft_table *table, 2272 2272 const struct nft_chain *chain, 2273 - const struct nft_rule *rule) 2273 + const struct nft_rule *rule, 2274 + const struct nft_rule *prule) 2274 2275 { 2275 2276 struct nlmsghdr *nlh; 2276 2277 struct nfgenmsg *nfmsg; 2277 2278 const struct nft_expr *expr, *next; 2278 2279 struct nlattr *list; 2279 - const struct nft_rule *prule; 2280 2280 u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event); 2281 2281 2282 2282 nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags); ··· 2296 2296 NFTA_RULE_PAD)) 2297 2297 goto nla_put_failure; 2298 2298 2299 - if ((event != NFT_MSG_DELRULE) && (rule->list.prev != &chain->rules)) { 2300 - prule = list_prev_entry(rule, list); 2299 + if (event != NFT_MSG_DELRULE && prule) { 2301 2300 if (nla_put_be64(skb, NFTA_RULE_POSITION, 2302 2301 cpu_to_be64(prule->handle), 2303 2302 NFTA_RULE_PAD)) ··· 2343 2344 2344 2345 err = nf_tables_fill_rule_info(skb, ctx->net, ctx->portid, ctx->seq, 2345 2346 event, 0, ctx->family, ctx->table, 2346 - ctx->chain, rule); 2347 + ctx->chain, rule, NULL); 2347 2348 if (err < 0) { 2348 2349 kfree_skb(skb); 2349 2350 goto err; ··· 2368 2369 const struct nft_chain *chain) 2369 2370 { 2370 2371 struct net *net = sock_net(skb->sk); 2372 + const struct nft_rule *rule, *prule; 2371 2373 unsigned int s_idx = cb->args[0]; 2372 - const struct nft_rule *rule; 2373 2374 2375 + prule = NULL; 2374 2376 list_for_each_entry_rcu(rule, &chain->rules, list) { 2375 2377 if (!nft_is_active(net, rule)) 2376 - goto cont; 2378 + goto cont_skip; 2377 2379 if (*idx < s_idx) 2378 2380 goto cont; 2379 2381 if (*idx > s_idx) { ··· 2386 2386 NFT_MSG_NEWRULE, 2387 2387 NLM_F_MULTI | NLM_F_APPEND, 2388 2388 table->family, 2389 - table, chain, rule) < 0) 2389 + table, chain, rule, prule) < 0) 2390 2390 return 1; 2391 2391 2392 2392 nl_dump_check_consistent(cb, nlmsg_hdr(skb)); 2393 2393 cont: 2394 + prule = rule; 2395 + cont_skip: 2394 2396 (*idx)++; 2395 2397 } 2396 2398 return 0; ··· 2548 2546 2549 2547 err = nf_tables_fill_rule_info(skb2, net, NETLINK_CB(skb).portid, 2550 2548 nlh->nlmsg_seq, NFT_MSG_NEWRULE, 0, 2551 - family, table, chain, rule); 2549 + family, table, chain, rule, NULL); 2552 2550 if (err < 0) 2553 2551 goto err; 2554 2552
+3 -3
net/netfilter/nft_fib.c
··· 135 135 EXPORT_SYMBOL_GPL(nft_fib_dump); 136 136 137 137 void nft_fib_store_result(void *reg, const struct nft_fib *priv, 138 - const struct nft_pktinfo *pkt, int index) 138 + const struct net_device *dev) 139 139 { 140 - struct net_device *dev; 141 140 u32 *dreg = reg; 141 + int index; 142 142 143 143 switch (priv->result) { 144 144 case NFT_FIB_RESULT_OIF: 145 + index = dev ? dev->ifindex : 0; 145 146 *dreg = (priv->flags & NFTA_FIB_F_PRESENT) ? !!index : index; 146 147 break; 147 148 case NFT_FIB_RESULT_OIFNAME: 148 - dev = dev_get_by_index_rcu(nft_net(pkt), index); 149 149 if (priv->flags & NFTA_FIB_F_PRESENT) 150 150 *dreg = !!dev; 151 151 else
+21 -10
net/netfilter/nft_flow_offload.c
··· 13 13 #include <net/netfilter/nf_conntrack_core.h> 14 14 #include <linux/netfilter/nf_conntrack_common.h> 15 15 #include <net/netfilter/nf_flow_table.h> 16 - #include <net/netfilter/nf_conntrack_helper.h> 17 16 18 17 struct nft_flow_offload { 19 18 struct nft_flowtable *flowtable; ··· 49 50 return 0; 50 51 } 51 52 52 - static bool nft_flow_offload_skip(struct sk_buff *skb) 53 + static bool nft_flow_offload_skip(struct sk_buff *skb, int family) 53 54 { 54 - struct ip_options *opt = &(IPCB(skb)->opt); 55 - 56 - if (unlikely(opt->optlen)) 57 - return true; 58 55 if (skb_sec_path(skb)) 59 56 return true; 57 + 58 + if (family == NFPROTO_IPV4) { 59 + const struct ip_options *opt; 60 + 61 + opt = &(IPCB(skb)->opt); 62 + 63 + if (unlikely(opt->optlen)) 64 + return true; 65 + } 60 66 61 67 return false; 62 68 } ··· 72 68 { 73 69 struct nft_flow_offload *priv = nft_expr_priv(expr); 74 70 struct nf_flowtable *flowtable = &priv->flowtable->data; 75 - const struct nf_conn_help *help; 76 71 enum ip_conntrack_info ctinfo; 77 72 struct nf_flow_route route; 78 73 struct flow_offload *flow; 79 74 enum ip_conntrack_dir dir; 75 + bool is_tcp = false; 80 76 struct nf_conn *ct; 81 77 int ret; 82 78 83 - if (nft_flow_offload_skip(pkt->skb)) 79 + if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt))) 84 80 goto out; 85 81 86 82 ct = nf_ct_get(pkt->skb, &ctinfo); ··· 89 85 90 86 switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { 91 87 case IPPROTO_TCP: 88 + is_tcp = true; 89 + break; 92 90 case IPPROTO_UDP: 93 91 break; 94 92 default: 95 93 goto out; 96 94 } 97 95 98 - help = nfct_help(ct); 99 - if (help) 96 + if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || 97 + ct->status & IPS_SEQ_ADJUST) 100 98 goto out; 101 99 102 100 if (!nf_ct_is_confirmed(ct)) ··· 114 108 flow = flow_offload_alloc(ct, &route); 115 109 if (!flow) 116 110 goto err_flow_alloc; 111 + 112 + if (is_tcp) { 113 + ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; 114 + ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; 115 + } 117 116 118 117 ret = flow_offload_add(flowtable, flow); 119 118 if (ret < 0)
+1 -1
tools/testing/selftests/netfilter/Makefile
··· 2 2 # Makefile for netfilter selftests 3 3 4 4 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ 5 - conntrack_icmp_related.sh 5 + conntrack_icmp_related.sh nft_flowtable.sh 6 6 7 7 include ../lib.mk
+324
tools/testing/selftests/netfilter/nft_flowtable.sh
··· 1 + #!/bin/bash 2 + # SPDX-License-Identifier: GPL-2.0 3 + # 4 + # This tests basic flowtable functionality. 5 + # Creates following topology: 6 + # 7 + # Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) 8 + # Router1 is the one doing flow offloading, Router2 has no special 9 + # purpose other than having a link that is smaller than either Originator 10 + # and responder, i.e. TCPMSS announced values are too large and will still 11 + # result in fragmentation and/or PMTU discovery. 12 + 13 + # Kselftest framework requirement - SKIP code is 4. 14 + ksft_skip=4 15 + ret=0 16 + 17 + ns1in="" 18 + ns2in="" 19 + ns1out="" 20 + ns2out="" 21 + 22 + log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) 23 + 24 + nft --version > /dev/null 2>&1 25 + if [ $? -ne 0 ];then 26 + echo "SKIP: Could not run test without nft tool" 27 + exit $ksft_skip 28 + fi 29 + 30 + ip -Version > /dev/null 2>&1 31 + if [ $? -ne 0 ];then 32 + echo "SKIP: Could not run test without ip tool" 33 + exit $ksft_skip 34 + fi 35 + 36 + which nc > /dev/null 2>&1 37 + if [ $? -ne 0 ];then 38 + echo "SKIP: Could not run test without nc (netcat)" 39 + exit $ksft_skip 40 + fi 41 + 42 + ip netns add nsr1 43 + if [ $? -ne 0 ];then 44 + echo "SKIP: Could not create net namespace" 45 + exit $ksft_skip 46 + fi 47 + 48 + ip netns add ns1 49 + ip netns add ns2 50 + 51 + ip netns add nsr2 52 + 53 + cleanup() { 54 + for i in 1 2; do 55 + ip netns del ns$i 56 + ip netns del nsr$i 57 + done 58 + 59 + rm -f "$ns1in" "$ns1out" 60 + rm -f "$ns2in" "$ns2out" 61 + 62 + [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns 63 + } 64 + 65 + trap cleanup EXIT 66 + 67 + sysctl -q net.netfilter.nf_log_all_netns=1 68 + 69 + ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 70 + ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 71 + 72 + ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 73 + 74 + for dev in lo veth0 veth1; do 75 + for i in 1 2; do 76 + ip -net nsr$i link set $dev up 77 + done 78 + done 79 + 80 + ip -net nsr1 addr add 10.0.1.1/24 dev veth0 81 + ip -net nsr1 addr add dead:1::1/64 dev veth0 82 + 83 + ip -net nsr2 addr add 10.0.2.1/24 dev veth1 84 + ip -net nsr2 addr add dead:2::1/64 dev veth1 85 + 86 + # set different MTUs so we need to push packets coming from ns1 (large MTU) 87 + # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), 88 + # or to do PTMU discovery (send ICMP error back to originator). 89 + # ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers 90 + # is NOT the lowest link mtu. 91 + 92 + ip -net nsr1 link set veth0 mtu 9000 93 + ip -net ns1 link set eth0 mtu 9000 94 + 95 + ip -net nsr2 link set veth1 mtu 2000 96 + ip -net ns2 link set eth0 mtu 2000 97 + 98 + # transfer-net between nsr1 and nsr2. 99 + # these addresses are not used for connections. 100 + ip -net nsr1 addr add 192.168.10.1/24 dev veth1 101 + ip -net nsr1 addr add fee1:2::1/64 dev veth1 102 + 103 + ip -net nsr2 addr add 192.168.10.2/24 dev veth0 104 + ip -net nsr2 addr add fee1:2::2/64 dev veth0 105 + 106 + for i in 1 2; do 107 + ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null 108 + ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null 109 + 110 + ip -net ns$i link set lo up 111 + ip -net ns$i link set eth0 up 112 + ip -net ns$i addr add 10.0.$i.99/24 dev eth0 113 + ip -net ns$i route add default via 10.0.$i.1 114 + ip -net ns$i addr add dead:$i::99/64 dev eth0 115 + ip -net ns$i route add default via dead:$i::1 116 + ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null 117 + 118 + # don't set ip DF bit for first two tests 119 + ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 120 + done 121 + 122 + ip -net nsr1 route add default via 192.168.10.2 123 + ip -net nsr2 route add default via 192.168.10.1 124 + 125 + ip netns exec nsr1 nft -f - <<EOF 126 + table inet filter { 127 + flowtable f1 { 128 + hook ingress priority 0 129 + devices = { veth0, veth1 } 130 + } 131 + 132 + chain forward { 133 + type filter hook forward priority 0; policy drop; 134 + 135 + # flow offloaded? Tag ct with mark 1, so we can detect when it fails. 136 + meta oif "veth1" tcp dport 12345 flow offload @f1 counter 137 + 138 + # use packet size to trigger 'should be offloaded by now'. 139 + # otherwise, if 'flow offload' expression never offloads, the 140 + # test will pass. 141 + tcp dport 12345 meta length gt 200 ct mark set 1 counter 142 + 143 + # this turns off flow offloading internally, so expect packets again 144 + tcp flags fin,rst ct mark set 0 accept 145 + 146 + # this allows large packets from responder, we need this as long 147 + # as PMTUd is off. 148 + # This rule is deleted for the last test, when we expect PMTUd 149 + # to kick in and ensure all packets meet mtu requirements. 150 + meta length gt 1500 accept comment something-to-grep-for 151 + 152 + # next line blocks connection w.o. working offload. 153 + # we only do this for reverse dir, because we expect packets to 154 + # enter slow path due to MTU mismatch of veth0 and veth1. 155 + tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop 156 + 157 + ct state established,related accept 158 + 159 + # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) 160 + meta length lt 200 oif "veth1" tcp dport 12345 counter accept 161 + 162 + meta nfproto ipv4 meta l4proto icmp accept 163 + meta nfproto ipv6 meta l4proto icmpv6 accept 164 + } 165 + } 166 + EOF 167 + 168 + if [ $? -ne 0 ]; then 169 + echo "SKIP: Could not load nft ruleset" 170 + exit $ksft_skip 171 + fi 172 + 173 + # test basic connectivity 174 + ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null 175 + if [ $? -ne 0 ];then 176 + echo "ERROR: ns1 cannot reach ns2" 1>&2 177 + bash 178 + exit 1 179 + fi 180 + 181 + ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null 182 + if [ $? -ne 0 ];then 183 + echo "ERROR: ns2 cannot reach ns1" 1>&2 184 + exit 1 185 + fi 186 + 187 + if [ $ret -eq 0 ];then 188 + echo "PASS: netns routing/connectivity: ns1 can reach ns2" 189 + fi 190 + 191 + ns1in=$(mktemp) 192 + ns1out=$(mktemp) 193 + ns2in=$(mktemp) 194 + ns2out=$(mktemp) 195 + 196 + make_file() 197 + { 198 + name=$1 199 + who=$2 200 + 201 + SIZE=$((RANDOM % (1024 * 8))) 202 + TSIZE=$((SIZE * 1024)) 203 + 204 + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null 205 + 206 + SIZE=$((RANDOM % 1024)) 207 + SIZE=$((SIZE + 128)) 208 + TSIZE=$((TSIZE + SIZE)) 209 + dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null 210 + } 211 + 212 + check_transfer() 213 + { 214 + in=$1 215 + out=$2 216 + what=$3 217 + 218 + cmp "$in" "$out" > /dev/null 2>&1 219 + if [ $? -ne 0 ] ;then 220 + echo "FAIL: file mismatch for $what" 1>&2 221 + ls -l "$in" 222 + ls -l "$out" 223 + return 1 224 + fi 225 + 226 + return 0 227 + } 228 + 229 + test_tcp_forwarding() 230 + { 231 + local nsa=$1 232 + local nsb=$2 233 + local lret=0 234 + 235 + ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & 236 + lpid=$! 237 + 238 + sleep 1 239 + ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" & 240 + cpid=$! 241 + 242 + sleep 3 243 + 244 + kill $lpid 245 + kill $cpid 246 + wait 247 + 248 + check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" 249 + if [ $? -ne 0 ];then 250 + lret=1 251 + fi 252 + 253 + check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" 254 + if [ $? -ne 0 ];then 255 + lret=1 256 + fi 257 + 258 + return $lret 259 + } 260 + 261 + make_file "$ns1in" "ns1" 262 + make_file "$ns2in" "ns2" 263 + 264 + # First test: 265 + # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. 266 + test_tcp_forwarding ns1 ns2 267 + if [ $? -eq 0 ] ;then 268 + echo "PASS: flow offloaded for ns1/ns2" 269 + else 270 + echo "FAIL: flow offload for ns1/ns2:" 1>&2 271 + ip netns exec nsr1 nft list ruleset 272 + ret=1 273 + fi 274 + 275 + # delete default route, i.e. ns2 won't be able to reach ns1 and 276 + # will depend on ns1 being masqueraded in nsr1. 277 + # expect ns1 has nsr1 address. 278 + ip -net ns2 route del default via 10.0.2.1 279 + ip -net ns2 route del default via dead:2::1 280 + ip -net ns2 route add 192.168.10.1 via 10.0.2.1 281 + 282 + # Second test: 283 + # Same, but with NAT enabled. 284 + ip netns exec nsr1 nft -f - <<EOF 285 + table ip nat { 286 + chain postrouting { 287 + type nat hook postrouting priority 0; policy accept; 288 + meta oifname "veth1" masquerade 289 + } 290 + } 291 + EOF 292 + 293 + test_tcp_forwarding ns1 ns2 294 + 295 + if [ $? -eq 0 ] ;then 296 + echo "PASS: flow offloaded for ns1/ns2 with NAT" 297 + else 298 + echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 299 + ip netns exec nsr1 nft list ruleset 300 + ret=1 301 + fi 302 + 303 + # Third test: 304 + # Same as second test, but with PMTU discovery enabled. 305 + handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) 306 + 307 + ip netns exec nsr1 nft delete rule inet filter forward $handle 308 + if [ $? -ne 0 ] ;then 309 + echo "FAIL: Could not delete large-packet accept rule" 310 + exit 1 311 + fi 312 + 313 + ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 314 + ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 315 + 316 + test_tcp_forwarding ns1 ns2 317 + if [ $? -eq 0 ] ;then 318 + echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" 319 + else 320 + echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 321 + ip netns exec nsr1 nft list ruleset 322 + fi 323 + 324 + exit $ret
+5 -1
tools/testing/selftests/netfilter/nft_nat.sh
··· 36 36 ip netns add ns1 37 37 ip netns add ns2 38 38 39 - ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 39 + ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1 40 + if [ $? -ne 0 ];then 41 + echo "SKIP: No virtual ethernet pair device support in kernel" 42 + exit $ksft_skip 43 + fi 40 44 ip link add veth1 netns ns0 type veth peer name eth0 netns ns2 41 45 42 46 ip -net ns0 link set lo up