Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florian Westphal says:

====================
netfilter: conntrack and nf_tables bug fixes

The following patchset contains netfilter fixes for net.

Broken since 5.19:
A few ancient connection tracking helpers assume TCP packets cannot
exceed 64kb in size, but this isn't the case anymore with 5.19 when
BIG TCP got merged, from myself.

Regressions since 5.19:
1. 'conntrack -E expect' won't display anything because nfnetlink failed
to enable events for expectations, only for normal conntrack events.

2. partially revert change that added resched calls to a function that can
be in atomic context. Both broken and fixed up by myself.

Broken for several releases (up to original merge of nf_tables):
Several fixes for nf_tables control plane, from Pablo.
This fixes up resource leaks in error paths and adds more sanity
checks for mutually exclusive attributes/flags.

Kconfig:
NF_CONNTRACK_PROCFS is very old and doesn't provide all info provided
via ctnetlink, so it should not default to y. From Geert Uytterhoeven.

Selftests:
rework nft_flowtable.sh: it frequently indicated failure; the way it
tried to detect an offload failure did not work reliably.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
testing: selftests: nft_flowtable.sh: rework test to detect offload failure
testing: selftests: nft_flowtable.sh: use random netns names
netfilter: conntrack: NF_CONNTRACK_PROCFS should no longer default to y
netfilter: nf_tables: check NFT_SET_CONCAT flag if field_count is specified
netfilter: nf_tables: disallow NFT_SET_ELEM_CATCHALL and NFT_SET_ELEM_INTERVAL_END
netfilter: nf_tables: NFTA_SET_ELEM_KEY_END requires concat and interval flags
netfilter: nf_tables: validate NFTA_SET_ELEM_OBJREF based on NFT_SET_OBJECT flag
netfilter: nf_tables: really skip inactive sets when allocating name
netfilter: nfnetlink: re-enable conntrack expectation events
netfilter: nf_tables: fix scheduling-while-atomic splat
netfilter: nf_ct_irc: cap packet search space to 4k
netfilter: nf_ct_ftp: prefer skb_linearize
netfilter: nf_ct_h323: cap packet size at 64k
netfilter: nf_ct_sane: remove pseudo skb linearization
netfilter: nf_tables: possible module reference underflow in error path
netfilter: nf_tables: disallow NFTA_SET_ELEM_KEY_END with NFT_SET_ELEM_INTERVAL_END flag
netfilter: nf_tables: use READ_ONCE and WRITE_ONCE for shared generation id access
====================

Link: https://lore.kernel.org/r/20220817140015.25843-1-fw@strlen.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+384 -255
+1 -1
include/net/netns/conntrack.h
··· 95 95 96 96 struct netns_ct { 97 97 #ifdef CONFIG_NF_CONNTRACK_EVENTS 98 - bool ctnetlink_has_listener; 98 + u8 ctnetlink_has_listener; 99 99 bool ecache_dwork_pending; 100 100 #endif 101 101 u8 sysctl_log_invalid; /* Log invalid packets */
-1
net/netfilter/Kconfig
··· 144 144 145 145 config NF_CONNTRACK_PROCFS 146 146 bool "Supply CT list in procfs (OBSOLETE)" 147 - default y 148 147 depends on PROC_FS 149 148 help 150 149 This option enables for the list of known conntrack entries
+6 -18
net/netfilter/nf_conntrack_ftp.c
··· 34 34 MODULE_ALIAS("ip_conntrack_ftp"); 35 35 MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); 36 36 37 - /* This is slow, but it's simple. --RR */ 38 - static char *ftp_buffer; 39 - 40 - static DEFINE_SPINLOCK(nf_ftp_lock); 41 - 42 37 #define MAX_PORTS 8 43 38 static u_int16_t ports[MAX_PORTS]; 44 39 static unsigned int ports_c; ··· 393 398 return NF_ACCEPT; 394 399 } 395 400 401 + if (unlikely(skb_linearize(skb))) 402 + return NF_DROP; 403 + 396 404 th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); 397 405 if (th == NULL) 398 406 return NF_ACCEPT; ··· 409 411 } 410 412 datalen = skb->len - dataoff; 411 413 412 - spin_lock_bh(&nf_ftp_lock); 413 - fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer); 414 - if (!fb_ptr) { 415 - spin_unlock_bh(&nf_ftp_lock); 416 - return NF_ACCEPT; 417 - } 414 + spin_lock_bh(&ct->lock); 415 + fb_ptr = skb->data + dataoff; 418 416 419 417 ends_in_nl = (fb_ptr[datalen - 1] == '\n'); 420 418 seq = ntohl(th->seq) + datalen; ··· 538 544 if (ends_in_nl) 539 545 update_nl_seq(ct, seq, ct_ftp_info, dir, skb); 540 546 out: 541 - spin_unlock_bh(&nf_ftp_lock); 547 + spin_unlock_bh(&ct->lock); 542 548 return ret; 543 549 } 544 550 ··· 565 571 static void __exit nf_conntrack_ftp_fini(void) 566 572 { 567 573 nf_conntrack_helpers_unregister(ftp, ports_c * 2); 568 - kfree(ftp_buffer); 569 574 } 570 575 571 576 static int __init nf_conntrack_ftp_init(void) ··· 572 579 int i, ret = 0; 573 580 574 581 NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_ftp_master)); 575 - 576 - ftp_buffer = kmalloc(65536, GFP_KERNEL); 577 - if (!ftp_buffer) 578 - return -ENOMEM; 579 582 580 583 if (ports_c == 0) 581 584 ports[ports_c++] = FTP_PORT; ··· 592 603 ret = nf_conntrack_helpers_register(ftp, ports_c * 2); 593 604 if (ret < 0) { 594 605 pr_err("failed to register helpers\n"); 595 - kfree(ftp_buffer); 596 606 return ret; 597 607 } 598 608
+9 -1
net/netfilter/nf_conntrack_h323_main.c
··· 34 34 #include <net/netfilter/nf_conntrack_zones.h> 35 35 #include <linux/netfilter/nf_conntrack_h323.h> 36 36 37 + #define H323_MAX_SIZE 65535 38 + 37 39 /* Parameters */ 38 40 static unsigned int default_rrq_ttl __read_mostly = 300; 39 41 module_param(default_rrq_ttl, uint, 0600); ··· 87 85 tcpdatalen = skb->len - tcpdataoff; 88 86 if (tcpdatalen <= 0) /* No TCP data */ 89 87 goto clear_out; 88 + 89 + if (tcpdatalen > H323_MAX_SIZE) 90 + tcpdatalen = H323_MAX_SIZE; 90 91 91 92 if (*data == NULL) { /* first TPKT */ 92 93 /* Get first TPKT pointer */ ··· 1174 1169 if (dataoff >= skb->len) 1175 1170 return NULL; 1176 1171 *datalen = skb->len - dataoff; 1172 + if (*datalen > H323_MAX_SIZE) 1173 + *datalen = H323_MAX_SIZE; 1174 + 1177 1175 return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); 1178 1176 } 1179 1177 ··· 1778 1770 1779 1771 NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_h323_master)); 1780 1772 1781 - h323_buffer = kmalloc(65536, GFP_KERNEL); 1773 + h323_buffer = kmalloc(H323_MAX_SIZE + 1, GFP_KERNEL); 1782 1774 if (!h323_buffer) 1783 1775 return -ENOMEM; 1784 1776 ret = h323_helper_init();
+9 -3
net/netfilter/nf_conntrack_irc.c
··· 39 39 EXPORT_SYMBOL_GPL(nf_nat_irc_hook); 40 40 41 41 #define HELPER_NAME "irc" 42 + #define MAX_SEARCH_SIZE 4095 42 43 43 44 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 44 45 MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); ··· 122 121 int i, ret = NF_ACCEPT; 123 122 char *addr_beg_p, *addr_end_p; 124 123 typeof(nf_nat_irc_hook) nf_nat_irc; 124 + unsigned int datalen; 125 125 126 126 /* If packet is coming from IRC server */ 127 127 if (dir == IP_CT_DIR_REPLY) ··· 142 140 if (dataoff >= skb->len) 143 141 return NF_ACCEPT; 144 142 143 + datalen = skb->len - dataoff; 144 + if (datalen > MAX_SEARCH_SIZE) 145 + datalen = MAX_SEARCH_SIZE; 146 + 145 147 spin_lock_bh(&irc_buffer_lock); 146 - ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff, 148 + ib_ptr = skb_header_pointer(skb, dataoff, datalen, 147 149 irc_buffer); 148 150 if (!ib_ptr) { 149 151 spin_unlock_bh(&irc_buffer_lock); ··· 155 149 } 156 150 157 151 data = ib_ptr; 158 - data_limit = ib_ptr + skb->len - dataoff; 152 + data_limit = ib_ptr + datalen; 159 153 160 154 /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 161 155 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ ··· 257 251 irc_exp_policy.max_expected = max_dcc_channels; 258 252 irc_exp_policy.timeout = dcc_timeout; 259 253 260 - irc_buffer = kmalloc(65536, GFP_KERNEL); 254 + irc_buffer = kmalloc(MAX_SEARCH_SIZE + 1, GFP_KERNEL); 261 255 if (!irc_buffer) 262 256 return -ENOMEM; 263 257
+31 -39
net/netfilter/nf_conntrack_sane.c
··· 34 34 MODULE_DESCRIPTION("SANE connection tracking helper"); 35 35 MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); 36 36 37 - static char *sane_buffer; 38 - 39 - static DEFINE_SPINLOCK(nf_sane_lock); 40 - 41 37 #define MAX_PORTS 8 42 38 static u_int16_t ports[MAX_PORTS]; 43 39 static unsigned int ports_c; ··· 63 67 unsigned int dataoff, datalen; 64 68 const struct tcphdr *th; 65 69 struct tcphdr _tcph; 66 - void *sb_ptr; 67 70 int ret = NF_ACCEPT; 68 71 int dir = CTINFO2DIR(ctinfo); 69 72 struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct); 70 73 struct nf_conntrack_expect *exp; 71 74 struct nf_conntrack_tuple *tuple; 72 - struct sane_request *req; 73 75 struct sane_reply_net_start *reply; 76 + union { 77 + struct sane_request req; 78 + struct sane_reply_net_start repl; 79 + } buf; 74 80 75 81 /* Until there's been traffic both ways, don't look in packets. */ 76 82 if (ctinfo != IP_CT_ESTABLISHED && ··· 90 92 return NF_ACCEPT; 91 93 92 94 datalen = skb->len - dataoff; 93 - 94 - spin_lock_bh(&nf_sane_lock); 95 - sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer); 96 - if (!sb_ptr) { 97 - spin_unlock_bh(&nf_sane_lock); 98 - return NF_ACCEPT; 99 - } 100 - 101 95 if (dir == IP_CT_DIR_ORIGINAL) { 102 - if (datalen != sizeof(struct sane_request)) 103 - goto out; 96 + const struct sane_request *req; 104 97 105 - req = sb_ptr; 98 + if (datalen != sizeof(struct sane_request)) 99 + return NF_ACCEPT; 100 + 101 + req = skb_header_pointer(skb, dataoff, datalen, &buf.req); 102 + if (!req) 103 + return NF_ACCEPT; 104 + 106 105 if (req->RPC_code != htonl(SANE_NET_START)) { 107 106 /* Not an interesting command */ 108 - ct_sane_info->state = SANE_STATE_NORMAL; 109 - goto out; 107 + WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL); 108 + return NF_ACCEPT; 110 109 } 111 110 112 111 /* We're interested in the next reply */ 113 - ct_sane_info->state = SANE_STATE_START_REQUESTED; 114 - goto out; 112 + WRITE_ONCE(ct_sane_info->state, SANE_STATE_START_REQUESTED); 113 + return NF_ACCEPT; 115 114 } 116 115 116 + /* IP_CT_DIR_REPLY */ 117 + 117 118 /* Is it a reply to an uninteresting command? */ 118 - if (ct_sane_info->state != SANE_STATE_START_REQUESTED) 119 - goto out; 119 + if (READ_ONCE(ct_sane_info->state) != SANE_STATE_START_REQUESTED) 120 + return NF_ACCEPT; 120 121 121 122 /* It's a reply to SANE_NET_START. */ 122 - ct_sane_info->state = SANE_STATE_NORMAL; 123 + WRITE_ONCE(ct_sane_info->state, SANE_STATE_NORMAL); 123 124 124 125 if (datalen < sizeof(struct sane_reply_net_start)) { 125 126 pr_debug("NET_START reply too short\n"); 126 - goto out; 127 + return NF_ACCEPT; 127 128 } 128 129 129 - reply = sb_ptr; 130 + datalen = sizeof(struct sane_reply_net_start); 131 + 132 + reply = skb_header_pointer(skb, dataoff, datalen, &buf.repl); 133 + if (!reply) 134 + return NF_ACCEPT; 135 + 130 136 if (reply->status != htonl(SANE_STATUS_SUCCESS)) { 131 137 /* saned refused the command */ 132 138 pr_debug("unsuccessful SANE_STATUS = %u\n", 133 139 ntohl(reply->status)); 134 - goto out; 140 + return NF_ACCEPT; 135 141 } 136 142 137 143 /* Invalid saned reply? Ignore it. */ 138 144 if (reply->zero != 0) 139 - goto out; 145 + return NF_ACCEPT; 140 146 141 147 exp = nf_ct_expect_alloc(ct); 142 148 if (exp == NULL) { 143 149 nf_ct_helper_log(skb, ct, "cannot alloc expectation"); 144 - ret = NF_DROP; 145 - goto out; 150 + return NF_DROP; 146 151 } 147 152 148 153 tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; ··· 163 162 } 164 163 165 164 nf_ct_expect_put(exp); 166 - 167 - out: 168 - spin_unlock_bh(&nf_sane_lock); 169 165 return ret; 170 166 } 171 167 ··· 176 178 static void __exit nf_conntrack_sane_fini(void) 177 179 { 178 180 nf_conntrack_helpers_unregister(sane, ports_c * 2); 179 - kfree(sane_buffer); 180 181 } 181 182 182 183 static int __init nf_conntrack_sane_init(void) ··· 183 186 int i, ret = 0; 184 187 185 188 NF_CT_HELPER_BUILD_BUG_ON(sizeof(struct nf_ct_sane_master)); 186 - 187 - sane_buffer = kmalloc(65536, GFP_KERNEL); 188 - if (!sane_buffer) 189 - return -ENOMEM; 190 189 191 190 if (ports_c == 0) 192 191 ports[ports_c++] = SANE_PORT; ··· 203 210 ret = nf_conntrack_helpers_register(sane, ports_c * 2); 204 211 if (ret < 0) { 205 212 pr_err("failed to register helpers\n"); 206 - kfree(sane_buffer); 207 213 return ret; 208 214 } 209 215
+57 -17
net/netfilter/nf_tables_api.c
··· 889 889 890 890 rcu_read_lock(); 891 891 nft_net = nft_pernet(net); 892 - cb->seq = nft_net->base_seq; 892 + cb->seq = READ_ONCE(nft_net->base_seq); 893 893 894 894 list_for_each_entry_rcu(table, &nft_net->tables, list) { 895 895 if (family != NFPROTO_UNSPEC && family != table->family) ··· 1705 1705 1706 1706 rcu_read_lock(); 1707 1707 nft_net = nft_pernet(net); 1708 - cb->seq = nft_net->base_seq; 1708 + cb->seq = READ_ONCE(nft_net->base_seq); 1709 1709 1710 1710 list_for_each_entry_rcu(table, &nft_net->tables, list) { 1711 1711 if (family != NFPROTO_UNSPEC && family != table->family) ··· 3149 3149 3150 3150 rcu_read_lock(); 3151 3151 nft_net = nft_pernet(net); 3152 - cb->seq = nft_net->base_seq; 3152 + cb->seq = READ_ONCE(nft_net->base_seq); 3153 3153 3154 3154 list_for_each_entry_rcu(table, &nft_net->tables, list) { 3155 3155 if (family != NFPROTO_UNSPEC && family != table->family) ··· 3907 3907 list_for_each_entry(i, &ctx->table->sets, list) { 3908 3908 int tmp; 3909 3909 3910 - if (!nft_is_active_next(ctx->net, set)) 3910 + if (!nft_is_active_next(ctx->net, i)) 3911 3911 continue; 3912 3912 if (!sscanf(i->name, name, &tmp)) 3913 3913 continue; ··· 4133 4133 4134 4134 rcu_read_lock(); 4135 4135 nft_net = nft_pernet(net); 4136 - cb->seq = nft_net->base_seq; 4136 + cb->seq = READ_ONCE(nft_net->base_seq); 4137 4137 4138 4138 list_for_each_entry_rcu(table, &nft_net->tables, list) { 4139 4139 if (ctx->family != NFPROTO_UNSPEC && ··· 4451 4451 err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]); 4452 4452 if (err < 0) 4453 4453 return err; 4454 + 4455 + if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT)) 4456 + return -EINVAL; 4457 + } else if (flags & NFT_SET_CONCAT) { 4458 + return -EINVAL; 4454 4459 } 4455 4460 4456 4461 if (nla[NFTA_SET_EXPR] || nla[NFTA_SET_EXPRESSIONS]) ··· 5066 5061 5067 5062 rcu_read_lock(); 5068 5063 nft_net = nft_pernet(net); 5064 + cb->seq = READ_ONCE(nft_net->base_seq); 5065 + 5069 5066 list_for_each_entry_rcu(table, &nft_net->tables, list) { 5070 5067 if (dump_ctx->ctx.family != NFPROTO_UNSPEC && 5071 5068 dump_ctx->ctx.family != table->family) ··· 5202 5195 return -EOPNOTSUPP; 5203 5196 if (!(set->flags & NFT_SET_INTERVAL) && 5204 5197 *flags & NFT_SET_ELEM_INTERVAL_END) 5198 + return -EINVAL; 5199 + if ((*flags & (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) == 5200 + (NFT_SET_ELEM_INTERVAL_END | NFT_SET_ELEM_CATCHALL)) 5205 5201 return -EINVAL; 5206 5202 5207 5203 return 0; ··· 5609 5599 5610 5600 err = nft_expr_clone(expr, set->exprs[i]); 5611 5601 if (err < 0) { 5612 - nft_expr_destroy(ctx, expr); 5602 + kfree(expr); 5613 5603 goto err_expr; 5614 5604 } 5615 5605 expr_array[i] = expr; ··· 5852 5842 set->ops->remove(net, set, elem); 5853 5843 } 5854 5844 5845 + static bool nft_setelem_valid_key_end(const struct nft_set *set, 5846 + struct nlattr **nla, u32 flags) 5847 + { 5848 + if ((set->flags & (NFT_SET_CONCAT | NFT_SET_INTERVAL)) == 5849 + (NFT_SET_CONCAT | NFT_SET_INTERVAL)) { 5850 + if (flags & NFT_SET_ELEM_INTERVAL_END) 5851 + return false; 5852 + if (!nla[NFTA_SET_ELEM_KEY_END] && 5853 + !(flags & NFT_SET_ELEM_CATCHALL)) 5854 + return false; 5855 + } else { 5856 + if (nla[NFTA_SET_ELEM_KEY_END]) 5857 + return false; 5858 + } 5859 + 5860 + return true; 5861 + } 5862 + 5855 5863 static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, 5856 5864 const struct nlattr *attr, u32 nlmsg_flags) 5857 5865 { ··· 5920 5892 return -EINVAL; 5921 5893 } 5922 5894 5895 + if (set->flags & NFT_SET_OBJECT) { 5896 + if (!nla[NFTA_SET_ELEM_OBJREF] && 5897 + !(flags & NFT_SET_ELEM_INTERVAL_END)) 5898 + return -EINVAL; 5899 + } else { 5900 + if (nla[NFTA_SET_ELEM_OBJREF]) 5901 + return -EINVAL; 5902 + } 5903 + 5904 + if (!nft_setelem_valid_key_end(set, nla, flags)) 5905 + return -EINVAL; 5906 + 5923 5907 if ((flags & NFT_SET_ELEM_INTERVAL_END) && 5924 5908 (nla[NFTA_SET_ELEM_DATA] || 5925 5909 nla[NFTA_SET_ELEM_OBJREF] || ··· 5939 5899 nla[NFTA_SET_ELEM_EXPIRATION] || 5940 5900 nla[NFTA_SET_ELEM_USERDATA] || 5941 5901 nla[NFTA_SET_ELEM_EXPR] || 5902 + nla[NFTA_SET_ELEM_KEY_END] || 5942 5903 nla[NFTA_SET_ELEM_EXPRESSIONS])) 5943 5904 return -EINVAL; 5944 5905 ··· 6070 6029 } 6071 6030 6072 6031 if (nla[NFTA_SET_ELEM_OBJREF] != NULL) { 6073 - if (!(set->flags & NFT_SET_OBJECT)) { 6074 - err = -EINVAL; 6075 - goto err_parse_key_end; 6076 - } 6077 6032 obj = nft_obj_lookup(ctx->net, ctx->table, 6078 6033 nla[NFTA_SET_ELEM_OBJREF], 6079 6034 set->objtype, genmask); ··· 6360 6323 return err; 6361 6324 6362 6325 if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL)) 6326 + return -EINVAL; 6327 + 6328 + if (!nft_setelem_valid_key_end(set, nla, flags)) 6363 6329 return -EINVAL; 6364 6330 6365 6331 nft_set_ext_prepare(&tmpl); ··· 6981 6941 6982 6942 rcu_read_lock(); 6983 6943 nft_net = nft_pernet(net); 6984 - cb->seq = nft_net->base_seq; 6944 + cb->seq = READ_ONCE(nft_net->base_seq); 6985 6945 6986 6946 list_for_each_entry_rcu(table, &nft_net->tables, list) { 6987 6947 if (family != NFPROTO_UNSPEC && family != table->family) ··· 7913 7873 7914 7874 rcu_read_lock(); 7915 7875 nft_net = nft_pernet(net); 7916 - cb->seq = nft_net->base_seq; 7876 + cb->seq = READ_ONCE(nft_net->base_seq); 7917 7877 7918 7878 list_for_each_entry_rcu(table, &nft_net->tables, list) { 7919 7879 if (family != NFPROTO_UNSPEC && family != table->family) ··· 8846 8806 struct nft_trans_elem *te; 8847 8807 struct nft_chain *chain; 8848 8808 struct nft_table *table; 8809 + unsigned int base_seq; 8849 8810 LIST_HEAD(adl); 8850 8811 int err; 8851 8812 ··· 8896 8855 * Bump generation counter, invalidate any dump in progress. 8897 8856 * Cannot fail after this point. 8898 8857 */ 8899 - while (++nft_net->base_seq == 0) 8858 + base_seq = READ_ONCE(nft_net->base_seq); 8859 + while (++base_seq == 0) 8900 8860 ; 8861 + 8862 + WRITE_ONCE(nft_net->base_seq, base_seq); 8901 8863 8902 8864 /* step 3. Start new generation, rules_gen_X now in use. */ 8903 8865 net->nft.gencursor = nft_gencursor_next(net); ··· 9463 9419 break; 9464 9420 } 9465 9421 } 9466 - 9467 - cond_resched(); 9468 9422 } 9469 9423 9470 9424 list_for_each_entry(set, &ctx->table->sets, list) { 9471 - cond_resched(); 9472 - 9473 9425 if (!nft_is_active_next(ctx->net, set)) 9474 9426 continue; 9475 9427 if (!(set->flags & NFT_SET_MAP) ||
+71 -12
net/netfilter/nfnetlink.c
··· 44 44 45 45 static unsigned int nfnetlink_pernet_id __read_mostly; 46 46 47 + #ifdef CONFIG_NF_CONNTRACK_EVENTS 48 + static DEFINE_SPINLOCK(nfnl_grp_active_lock); 49 + #endif 50 + 47 51 struct nfnl_net { 48 52 struct sock *nfnl; 49 53 }; ··· 658 654 netlink_rcv_skb(skb, nfnetlink_rcv_msg); 659 655 } 660 656 657 + static void nfnetlink_bind_event(struct net *net, unsigned int group) 658 + { 659 + #ifdef CONFIG_NF_CONNTRACK_EVENTS 660 + int type, group_bit; 661 + u8 v; 662 + 663 + /* All NFNLGRP_CONNTRACK_* group bits fit into u8. 664 + * The other groups are not relevant and can be ignored. 665 + */ 666 + if (group >= 8) 667 + return; 668 + 669 + type = nfnl_group2type[group]; 670 + 671 + switch (type) { 672 + case NFNL_SUBSYS_CTNETLINK: 673 + break; 674 + case NFNL_SUBSYS_CTNETLINK_EXP: 675 + break; 676 + default: 677 + return; 678 + } 679 + 680 + group_bit = (1 << group); 681 + 682 + spin_lock(&nfnl_grp_active_lock); 683 + v = READ_ONCE(net->ct.ctnetlink_has_listener); 684 + if ((v & group_bit) == 0) { 685 + v |= group_bit; 686 + 687 + /* read concurrently without nfnl_grp_active_lock held. */ 688 + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); 689 + } 690 + 691 + spin_unlock(&nfnl_grp_active_lock); 692 + #endif 693 + } 694 + 661 695 static int nfnetlink_bind(struct net *net, int group) 662 696 { 663 697 const struct nfnetlink_subsystem *ss; ··· 712 670 if (!ss) 713 671 request_module_nowait("nfnetlink-subsys-%d", type); 714 672 715 - #ifdef CONFIG_NF_CONNTRACK_EVENTS 716 - if (type == NFNL_SUBSYS_CTNETLINK) { 717 - nfnl_lock(NFNL_SUBSYS_CTNETLINK); 718 - WRITE_ONCE(net->ct.ctnetlink_has_listener, true); 719 - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); 720 - } 721 - #endif 673 + nfnetlink_bind_event(net, group); 722 674 return 0; 723 675 } 724 676 725 677 static void nfnetlink_unbind(struct net *net, int group) 726 678 { 727 679 #ifdef CONFIG_NF_CONNTRACK_EVENTS 680 + int type, group_bit; 681 + 728 682 if (group <= NFNLGRP_NONE || group > NFNLGRP_MAX) 729 683 return; 730 684 731 - if (nfnl_group2type[group] == NFNL_SUBSYS_CTNETLINK) { 732 - nfnl_lock(NFNL_SUBSYS_CTNETLINK); 733 - if (!nfnetlink_has_listeners(net, group)) 734 - WRITE_ONCE(net->ct.ctnetlink_has_listener, false); 735 - nfnl_unlock(NFNL_SUBSYS_CTNETLINK); 685 + type = nfnl_group2type[group]; 686 + 687 + switch (type) { 688 + case NFNL_SUBSYS_CTNETLINK: 689 + break; 690 + case NFNL_SUBSYS_CTNETLINK_EXP: 691 + break; 692 + default: 693 + return; 736 694 } 695 + 696 + /* ctnetlink_has_listener is u8 */ 697 + if (group >= 8) 698 + return; 699 + 700 + group_bit = (1 << group); 701 + 702 + spin_lock(&nfnl_grp_active_lock); 703 + if (!nfnetlink_has_listeners(net, group)) { 704 + u8 v = READ_ONCE(net->ct.ctnetlink_has_listener); 705 + 706 + v &= ~group_bit; 707 + 708 + /* read concurrently without nfnl_grp_active_lock held. */ 709 + WRITE_ONCE(net->ct.ctnetlink_has_listener, v); 710 + } 711 + spin_unlock(&nfnl_grp_active_lock); 737 712 #endif 738 713 } 739 714
+200 -163
tools/testing/selftests/netfilter/nft_flowtable.sh
··· 14 14 # nft_flowtable.sh -o8000 -l1500 -r2000 15 15 # 16 16 17 + sfx=$(mktemp -u "XXXXXXXX") 18 + ns1="ns1-$sfx" 19 + ns2="ns2-$sfx" 20 + nsr1="nsr1-$sfx" 21 + nsr2="nsr2-$sfx" 17 22 18 23 # Kselftest framework requirement - SKIP code is 4. 19 24 ksft_skip=4 20 25 ret=0 21 26 22 - ns1in="" 23 - ns2in="" 27 + nsin="" 24 28 ns1out="" 25 29 ns2out="" 26 30 ··· 40 36 checktool "nft --version" "run test without nft tool" 41 37 checktool "ip -Version" "run test without ip tool" 42 38 checktool "which nc" "run test without nc (netcat)" 43 - checktool "ip netns add nsr1" "create net namespace" 39 + checktool "ip netns add $nsr1" "create net namespace $nsr1" 44 40 45 - ip netns add ns1 46 - ip netns add ns2 47 - 48 - ip netns add nsr2 41 + ip netns add $ns1 42 + ip netns add $ns2 43 + ip netns add $nsr2 49 44 50 45 cleanup() { 51 - for i in 1 2; do 52 - ip netns del ns$i 53 - ip netns del nsr$i 54 - done 46 + ip netns del $ns1 47 + ip netns del $ns2 48 + ip netns del $nsr1 49 + ip netns del $nsr2 55 50 56 - rm -f "$ns1in" "$ns1out" 57 - rm -f "$ns2in" "$ns2out" 51 + rm -f "$nsin" "$ns1out" "$ns2out" 58 52 59 53 [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns 60 54 } ··· 61 59 62 60 sysctl -q net.netfilter.nf_log_all_netns=1 63 61 64 - ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1 65 - ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2 62 + ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1 63 + ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2 66 64 67 - ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2 65 + ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2 68 66 69 67 for dev in lo veth0 veth1; do 70 - for i in 1 2; do 71 - ip -net nsr$i link set $dev up 72 - done 68 + ip -net $nsr1 link set $dev up 69 + ip -net $nsr2 link set $dev up 73 70 done 74 71 75 - ip -net nsr1 addr add 10.0.1.1/24 dev veth0 76 - ip -net nsr1 addr add dead:1::1/64 dev veth0 72 + ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 73 + ip -net $nsr1 addr add dead:1::1/64 dev veth0 77 74 78 - ip -net nsr2 addr add 10.0.2.1/24 dev veth1 79 - ip -net nsr2 addr add dead:2::1/64 dev veth1 75 + ip -net $nsr2 addr add 10.0.2.1/24 dev veth1 76 + ip -net $nsr2 addr add dead:2::1/64 dev veth1 80 77 81 78 # set different MTUs so we need to push packets coming from ns1 (large MTU) 82 79 # to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), ··· 107 106 esac 108 107 done 109 108 110 - if ! ip -net nsr1 link set veth0 mtu $omtu; then 109 + if ! ip -net $nsr1 link set veth0 mtu $omtu; then 111 110 exit 1 112 111 fi 113 112 114 - ip -net ns1 link set eth0 mtu $omtu 113 + ip -net $ns1 link set eth0 mtu $omtu 115 114 116 - if ! ip -net nsr2 link set veth1 mtu $rmtu; then 115 + if ! ip -net $nsr2 link set veth1 mtu $rmtu; then 117 116 exit 1 118 117 fi 119 118 120 - ip -net ns2 link set eth0 mtu $rmtu 119 + ip -net $ns2 link set eth0 mtu $rmtu 121 120 122 121 # transfer-net between nsr1 and nsr2. 123 122 # these addresses are not used for connections. 124 - ip -net nsr1 addr add 192.168.10.1/24 dev veth1 125 - ip -net nsr1 addr add fee1:2::1/64 dev veth1 123 + ip -net $nsr1 addr add 192.168.10.1/24 dev veth1 124 + ip -net $nsr1 addr add fee1:2::1/64 dev veth1 126 125 127 - ip -net nsr2 addr add 192.168.10.2/24 dev veth0 128 - ip -net nsr2 addr add fee1:2::2/64 dev veth0 126 + ip -net $nsr2 addr add 192.168.10.2/24 dev veth0 127 + ip -net $nsr2 addr add fee1:2::2/64 dev veth0 129 128 130 - for i in 1 2; do 131 - ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null 132 - ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null 129 + for i in 0 1; do 130 + ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 131 + ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null 132 + done 133 133 134 - ip -net ns$i link set lo up 135 - ip -net ns$i link set eth0 up 136 - ip -net ns$i addr add 10.0.$i.99/24 dev eth0 137 - ip -net ns$i route add default via 10.0.$i.1 138 - ip -net ns$i addr add dead:$i::99/64 dev eth0 139 - ip -net ns$i route add default via dead:$i::1 140 - if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then 134 + for ns in $ns1 $ns2;do 135 + ip -net $ns link set lo up 136 + ip -net $ns link set eth0 up 137 + 138 + if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then 141 139 echo "ERROR: Check Originator/Responder values (problem during address addition)" 142 140 exit 1 143 141 fi 144 - 145 142 # don't set ip DF bit for first two tests 146 - ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 143 + ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null 147 144 done 148 145 149 - ip -net nsr1 route add default via 192.168.10.2 150 - ip -net nsr2 route add default via 192.168.10.1 146 + ip -net $ns1 addr add 10.0.1.99/24 dev eth0 147 + ip -net $ns2 addr add 10.0.2.99/24 dev eth0 148 + ip -net $ns1 route add default via 10.0.1.1 149 + ip -net $ns2 route add default via 10.0.2.1 150 + ip -net $ns1 addr add dead:1::99/64 dev eth0 151 + ip -net $ns2 addr add dead:2::99/64 dev eth0 152 + ip -net $ns1 route add default via dead:1::1 153 + ip -net $ns2 route add default via dead:2::1 151 154 152 - ip netns exec nsr1 nft -f - <<EOF 155 + ip -net $nsr1 route add default via 192.168.10.2 156 + ip -net $nsr2 route add default via 192.168.10.1 157 + 158 + ip netns exec $nsr1 nft -f - <<EOF 153 159 table inet filter { 154 160 flowtable f1 { 155 161 hook ingress priority 0 156 162 devices = { veth0, veth1 } 157 163 } 158 164 165 + counter routed_orig { } 166 + counter routed_repl { } 167 + 159 168 chain forward { 160 169 type filter hook forward priority 0; policy drop; 161 170 162 171 # flow offloaded? Tag ct with mark 1, so we can detect when it fails. 163 - meta oif "veth1" tcp dport 12345 flow offload @f1 counter 172 + meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept 164 173 165 - # use packet size to trigger 'should be offloaded by now'. 166 - # otherwise, if 'flow offload' expression never offloads, the 167 - # test will pass. 168 - tcp dport 12345 meta length gt 200 ct mark set 1 counter 169 - 170 - # this turns off flow offloading internally, so expect packets again 171 - tcp flags fin,rst ct mark set 0 accept 172 - 173 - # this allows large packets from responder, we need this as long 174 - # as PMTUd is off. 175 - # This rule is deleted for the last test, when we expect PMTUd 176 - # to kick in and ensure all packets meet mtu requirements. 177 - meta length gt $lmtu accept comment something-to-grep-for 178 - 179 - # next line blocks connection w.o. working offload. 180 - # we only do this for reverse dir, because we expect packets to 181 - # enter slow path due to MTU mismatch of veth0 and veth1. 182 - tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop 174 + # count packets supposedly offloaded as per direction. 175 + ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept 183 176 184 177 ct state established,related accept 185 - 186 - # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed) 187 - meta length lt 200 oif "veth1" tcp dport 12345 counter accept 188 178 189 179 meta nfproto ipv4 meta l4proto icmp accept 190 180 meta nfproto ipv6 meta l4proto icmpv6 accept ··· 189 197 fi 190 198 191 199 # test basic connectivity 192 - if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then 193 - echo "ERROR: ns1 cannot reach ns2" 1>&2 200 + if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then 201 + echo "ERROR: $ns1 cannot reach ns2" 1>&2 194 202 exit 1 195 203 fi 196 204 197 - if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then 198 - echo "ERROR: ns2 cannot reach ns1" 1>&2 205 + if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then 206 + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 199 207 exit 1 200 208 fi 201 209 202 210 if [ $ret -eq 0 ];then 203 - echo "PASS: netns routing/connectivity: ns1 can reach ns2" 211 + echo "PASS: netns routing/connectivity: $ns1 can reach $ns2" 204 212 fi 205 213 206 - ns1in=$(mktemp) 214 + nsin=$(mktemp) 207 215 ns1out=$(mktemp) 208 - ns2in=$(mktemp) 209 216 ns2out=$(mktemp) 210 217 211 218 make_file() 212 219 { 213 220 name=$1 214 221 215 - SIZE=$((RANDOM % (1024 * 8))) 222 + SIZE=$((RANDOM % (1024 * 128))) 223 + SIZE=$((SIZE + (1024 * 8))) 216 224 TSIZE=$((SIZE * 1024)) 217 225 218 226 dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null ··· 221 229 SIZE=$((SIZE + 128)) 222 230 TSIZE=$((TSIZE + SIZE)) 223 231 dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null 232 + } 233 + 234 + check_counters() 235 + { 236 + local what=$1 237 + local ok=1 238 + 239 + local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets) 240 + local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets) 241 + 242 + local orig_cnt=${orig#*bytes} 243 + local repl_cnt=${repl#*bytes} 244 + 245 + local fs=$(du -sb $nsin) 246 + local max_orig=${fs%%/*} 247 + local max_repl=$((max_orig/4)) 248 + 249 + if [ $orig_cnt -gt $max_orig ];then 250 + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 251 + ret=1 252 + ok=0 253 + fi 254 + 255 + if [ $repl_cnt -gt $max_repl ];then 256 + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 257 + ret=1 258 + ok=0 259 + fi 260 + 261 + if [ $ok -eq 1 ]; then 262 + echo "PASS: $what" 263 + fi 224 264 } 225 265 226 266 check_transfer() ··· 279 255 local dstport=$4 280 256 local lret=0 281 257 282 - ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" & 258 + ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" & 283 259 lpid=$! 284 260 285 261 sleep 1 286 - ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" & 262 + ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" & 287 263 cpid=$! 288 264 289 265 sleep 3 ··· 298 274 299 275 wait 300 276 301 - if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then 277 + if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then 302 278 lret=1 303 279 fi 304 280 305 - if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then 281 + if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then 306 282 lret=1 307 283 fi 308 284 ··· 319 295 test_tcp_forwarding_nat() 320 296 { 321 297 local lret 298 + local pmtu 322 299 323 300 test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 324 301 lret=$? 325 302 303 + pmtu=$3 304 + what=$4 305 + 326 306 if [ $lret -eq 0 ] ; then 307 + if [ $pmtu -eq 1 ] ;then 308 + check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" 309 + else 310 + echo "PASS: flow offload for ns1/ns2 with masquerade $what" 311 + fi 312 + 327 313 test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 328 314 lret=$? 315 + if [ $pmtu -eq 1 ] ;then 316 + check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" 317 + elif [ $lret -eq 0 ] ; then 318 + echo "PASS: flow offload for ns1/ns2 with dnat $what" 319 + fi 329 320 fi 330 321 331 322 return $lret 332 323 } 333 324 334 - make_file "$ns1in" 335 - make_file "$ns2in" 325 + make_file "$nsin" 336 326 337 327 # First test: 338 328 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. 339 - if test_tcp_forwarding ns1 ns2; then 329 + # Due to MTU mismatch in both directions, all packets (except small packets like pure 330 + # acks) have to be handled by normal forwarding path. Therefore, packet counters 331 + # are not checked. 332 + if test_tcp_forwarding $ns1 $ns2; then 340 333 echo "PASS: flow offloaded for ns1/ns2" 341 334 else 342 335 echo "FAIL: flow offload for ns1/ns2:" 1>&2 343 - ip netns exec nsr1 nft list ruleset 336 + ip netns exec $nsr1 nft list ruleset 344 337 ret=1 345 338 fi 346 339 347 340 # delete default route, i.e. ns2 won't be able to reach ns1 and 348 341 # will depend on ns1 being masqueraded in nsr1. 349 342 # expect ns1 has nsr1 address. 350 - ip -net ns2 route del default via 10.0.2.1 351 - ip -net ns2 route del default via dead:2::1 352 - ip -net ns2 route add 192.168.10.1 via 10.0.2.1 343 + ip -net $ns2 route del default via 10.0.2.1 344 + ip -net $ns2 route del default via dead:2::1 345 + ip -net $ns2 route add 192.168.10.1 via 10.0.2.1 353 346 354 347 # Second test: 355 - # Same, but with NAT enabled. 356 - ip netns exec nsr1 nft -f - <<EOF 348 + # Same, but with NAT enabled. Same as in first test: we expect normal forward path 349 + # to handle most packets. 350 + ip netns exec $nsr1 nft -f - <<EOF 357 351 table ip nat { 358 352 chain prerouting { 359 353 type nat hook prerouting priority 0; policy accept; ··· 385 343 } 386 344 EOF 387 345 388 - if test_tcp_forwarding_nat ns1 ns2; then 389 - echo "PASS: flow offloaded for ns1/ns2 with NAT" 390 - else 346 + if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then 391 347 echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 392 - ip netns exec nsr1 nft list ruleset 348 + ip netns exec $nsr1 nft list ruleset 393 349 ret=1 394 350 fi 395 351 396 352 # Third test: 397 - # Same as second test, but with PMTU discovery enabled. 398 - handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) 353 + # Same as second test, but with PMTU discovery enabled. This 354 + # means that we expect the fastpath to handle packets as soon 355 + # as the endpoints adjust the packet size. 356 + ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 357 + ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 399 358 400 - if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then 401 - echo "FAIL: Could not delete large-packet accept rule" 402 - exit 1 403 - fi 359 + # reset counters. 360 + # With pmtu in-place we'll also check that nft counters 361 + # are lower than file size and packets were forwarded via flowtable layer. 362 + # For earlier tests (large mtus), packets cannot be handled via flowtable 363 + # (except pure acks and other small packets). 364 + ip netns exec $nsr1 nft reset counters table inet filter >/dev/null 404 365 405 - ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 406 - ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null 407 - 408 - if test_tcp_forwarding_nat ns1 ns2; then 409 - echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" 410 - else 366 + if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then 411 367 echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 412 - ip netns exec nsr1 nft list ruleset 368 + ip netns exec $nsr1 nft list ruleset 413 369 fi 414 370 415 371 # Another test: 416 372 # Add bridge interface br0 to Router1, with NAT enabled. 417 - ip -net nsr1 link add name br0 type bridge 418 - ip -net nsr1 addr flush dev veth0 419 - ip -net nsr1 link set up dev veth0 420 - ip -net nsr1 link set veth0 master br0 421 - ip -net nsr1 addr add 10.0.1.1/24 dev br0 422 - ip -net nsr1 addr add dead:1::1/64 dev br0 423 - ip -net nsr1 link set up dev br0 373 + ip -net $nsr1 link add name br0 type bridge 374 + ip -net $nsr1 addr flush dev veth0 375 + ip -net $nsr1 link set up dev veth0 376 + ip -net $nsr1 link set veth0 master br0 377 + ip -net $nsr1 addr add 10.0.1.1/24 dev br0 378 + ip -net $nsr1 addr add dead:1::1/64 dev br0 379 + ip -net $nsr1 link set up dev br0 424 380 425 - ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null 381 + ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null 426 382 427 383 # br0 with NAT enabled. 428 - ip netns exec nsr1 nft -f - <<EOF 384 + ip netns exec $nsr1 nft -f - <<EOF 429 385 flush table ip nat 430 386 table ip nat { 431 387 chain prerouting { ··· 438 398 } 439 399 EOF 440 400 441 - if test_tcp_forwarding_nat ns1 ns2; then 442 - echo "PASS: flow offloaded for ns1/ns2 with bridge NAT" 443 - else 401 + if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then 444 402 echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 445 - ip netns exec nsr1 nft list ruleset 403 + ip netns exec $nsr1 nft list ruleset 446 404 ret=1 447 405 fi 448 406 407 + 449 408 # Another test: 450 409 # Add bridge interface br0 to Router1, with NAT and VLAN. 451 - ip -net nsr1 link set veth0 nomaster 452 - ip -net nsr1 link set down dev veth0 453 - ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10 454 - ip -net nsr1 link set up dev veth0 455 - ip -net nsr1 link set up dev veth0.10 456 - ip -net nsr1 link set veth0.10 master br0 410 + ip -net $nsr1 link set veth0 nomaster 411 + ip -net $nsr1 link set down dev veth0 412 + ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10 413 + ip -net $nsr1 link set up dev veth0 414 + ip -net $nsr1 link set up dev veth0.10 415 + ip -net $nsr1 link set veth0.10 master br0 457 416 458 - ip -net ns1 addr flush dev eth0 459 - ip -net ns1 link add link eth0 name eth0.10 type vlan id 10 460 - ip -net ns1 link set eth0 up 461 - ip -net ns1 link set eth0.10 up 462 - ip -net ns1 addr add 10.0.1.99/24 dev eth0.10 463 - ip -net ns1 route add default via 10.0.1.1 464 - ip -net ns1 addr add dead:1::99/64 dev eth0.10 417 + ip -net $ns1 addr flush dev eth0 418 + ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10 419 + ip -net $ns1 link set eth0 up 420 + ip -net $ns1 link set eth0.10 up 421 + ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10 422 + ip -net $ns1 route add default via 10.0.1.1 423 + ip -net $ns1 addr add dead:1::99/64 dev eth0.10 465 424 466 - if test_tcp_forwarding_nat ns1 ns2; then 467 - echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN" 468 - else 425 + if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then 469 426 echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 470 - ip netns exec nsr1 nft list ruleset 427 + ip netns exec $nsr1 nft list ruleset 471 428 ret=1 472 429 fi 473 430 474 431 # restore test topology (remove bridge and VLAN) 475 - ip -net nsr1 link set veth0 nomaster 476 - ip -net nsr1 link set veth0 down 477 - ip -net nsr1 link set veth0.10 down 478 - ip -net nsr1 link delete veth0.10 type vlan 479 - ip -net nsr1 link delete br0 type bridge 480 - ip -net ns1 addr flush dev eth0.10 481 - ip -net ns1 link set eth0.10 down 482 - ip -net ns1 link set eth0 down 483 - ip -net ns1 link delete eth0.10 type vlan 432 + ip -net $nsr1 link set veth0 nomaster 433 + ip -net $nsr1 link set veth0 down 434 + ip -net $nsr1 link set veth0.10 down 435 + ip -net $nsr1 link delete veth0.10 type vlan 436 + ip -net $nsr1 link delete br0 type bridge 437 + ip -net $ns1 addr flush dev eth0.10 438 + ip -net $ns1 link set eth0.10 down 439 + ip -net $ns1 link set eth0 down 440 + ip -net $ns1 link delete eth0.10 type vlan 484 441 485 442 # restore address in ns1 and nsr1 486 - ip -net ns1 link set eth0 up 487 - ip -net ns1 addr add 10.0.1.99/24 dev eth0 488 - ip -net ns1 route add default via 10.0.1.1 489 - ip -net ns1 addr add dead:1::99/64 dev eth0 490 - ip -net ns1 route add default via dead:1::1 491 - ip -net nsr1 addr add 10.0.1.1/24 dev veth0 492 - ip -net nsr1 addr add dead:1::1/64 dev veth0 493 - ip -net nsr1 link set up dev veth0 443 + ip -net $ns1 link set eth0 up 444 + ip -net $ns1 addr add 10.0.1.99/24 dev eth0 445 + ip -net $ns1 route add default via 10.0.1.1 446 + ip -net $ns1 addr add dead:1::99/64 dev eth0 447 + ip -net $ns1 route add default via dead:1::1 448 + ip -net $nsr1 addr add 10.0.1.1/24 dev veth0 449 + ip -net $nsr1 addr add dead:1::1/64 dev veth0 450 + ip -net $nsr1 link set up dev veth0 494 451 495 452 KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1) 496 453 KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1) ··· 517 480 518 481 } 519 482 520 - do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 483 + do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 521 484 522 - do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 485 + do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 523 486 524 - ip netns exec nsr1 nft delete table ip nat 487 + ip netns exec $nsr1 nft delete table ip nat 525 488 526 489 # restore default routes 527 - ip -net ns2 route del 192.168.10.1 via 10.0.2.1 528 - ip -net ns2 route add default via 10.0.2.1 529 - ip -net ns2 route add default via dead:2::1 490 + ip -net $ns2 route del 192.168.10.1 via 10.0.2.1 491 + ip -net $ns2 route add default via 10.0.2.1 492 + ip -net $ns2 route add default via dead:2::1 530 493 531 - if test_tcp_forwarding ns1 ns2; then 532 - echo "PASS: ipsec tunnel mode for ns1/ns2" 494 + if test_tcp_forwarding $ns1 $ns2; then 495 + check_counters "ipsec tunnel mode for ns1/ns2" 533 496 else 534 497 echo "FAIL: ipsec tunnel mode for ns1/ns2" 535 - ip netns exec nsr1 nft list ruleset 1>&2 536 - ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2 498 + ip netns exec $nsr1 nft list ruleset 1>&2 499 + ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2 537 500 fi 538 501 539 502 exit $ret