Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nf-25-12-16' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Florian Westphal says:

====================
netfilter: updates for net

The following patchset contains Netfilter fixes for *net*:

1) Jozsef Kadlecsik is retiring. Fortunately Jozsef will still keep an
eye on ipset patches.

2) remove a bogus direction check from nat core, this caused spurious
flakes in the 'reverse clash' selftest, from myself.

3) nf_tables doesn't need to do chain validation on register store,
from Pablo Neira Ayuso.

4) nf_tables shouldn't revisit chains during ruleset (graph) validation
if possible. Both 3 and 4 were slated for -next initially but there
are now two independent reports of people hitting soft lockup errors
during ruleset validation, so it makes no sense anymore to route
this via -next given this is -stable material. From myself.

5) call cond_resched() in a more frequently visited place during nf_tables
chain validation, this wasn't possible earlier due to rcu read lock,
but nowadays its not held anymore during set walks.

6) Don't fail conntrack packetdrill test with HZ=100 kernels.

netfilter pull request nf-25-12-16

* tag 'nf-25-12-16' of https://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
selftests: netfilter: packetdrill: avoid failure on HZ=100 kernel
netfilter: nf_tables: avoid softlockup warnings in nft_chain_validate
netfilter: nf_tables: avoid chain re-validation if possible
netfilter: nf_tables: remove redundant chain validation on register store
netfilter: nf_nat: remove bogus direction check
MAINTAINERS: Remove Jozsef Kadlecsik from MAINTAINERS file
====================

Link: https://patch.msgid.link/20251216190904.14507-1-fw@strlen.de
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+107 -44
+1
CREDITS
··· 1983 1983 D: netfilter: raw table 1984 1984 D: netfilter: iprange match 1985 1985 D: netfilter: new logging interfaces 1986 + D: netfilter: ipset 1986 1987 D: netfilter: various other hacks 1987 1988 S: Tata 1988 1989 S: Hungary
-1
MAINTAINERS
··· 17808 17808 17809 17809 NETFILTER 17810 17810 M: Pablo Neira Ayuso <pablo@netfilter.org> 17811 - M: Jozsef Kadlecsik <kadlec@netfilter.org> 17812 17811 M: Florian Westphal <fw@strlen.de> 17813 17812 R: Phil Sutter <phil@nwl.cc> 17814 17813 L: netfilter-devel@vger.kernel.org
+26 -8
include/net/netfilter/nf_tables.h
··· 1091 1091 __attribute__((aligned(__alignof__(struct nft_rule_dp)))); 1092 1092 }; 1093 1093 1094 + enum nft_chain_types { 1095 + NFT_CHAIN_T_DEFAULT = 0, 1096 + NFT_CHAIN_T_ROUTE, 1097 + NFT_CHAIN_T_NAT, 1098 + NFT_CHAIN_T_MAX 1099 + }; 1100 + 1101 + /** 1102 + * struct nft_chain_validate_state - validation state 1103 + * 1104 + * If a chain is encountered again during table validation it is 1105 + * possible to avoid revalidation provided the calling context is 1106 + * compatible. This structure stores relevant calling context of 1107 + * previous validations. 1108 + * 1109 + * @hook_mask: the hook numbers and locations the chain is linked to 1110 + * @depth: the deepest call chain level the chain is linked to 1111 + */ 1112 + struct nft_chain_validate_state { 1113 + u8 hook_mask[NFT_CHAIN_T_MAX]; 1114 + u8 depth; 1115 + }; 1116 + 1094 1117 /** 1095 1118 * struct nft_chain - nf_tables chain 1096 1119 * ··· 1132 1109 * @udlen: user data length 1133 1110 * @udata: user data in the chain 1134 1111 * @blob_next: rule blob pointer to the next in the chain 1112 + * @vstate: validation state 1135 1113 */ 1136 1114 struct nft_chain { 1137 1115 struct nft_rule_blob __rcu *blob_gen_0; ··· 1152 1128 1153 1129 /* Only used during control plane commit phase: */ 1154 1130 struct nft_rule_blob *blob_next; 1131 + struct nft_chain_validate_state vstate; 1155 1132 }; 1156 1133 1157 - int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); 1134 + int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain); 1158 1135 int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, 1159 1136 const struct nft_set_iter *iter, 1160 1137 struct nft_elem_priv *elem_priv); 1161 1138 int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set); 1162 1139 int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); 1163 1140 void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain); 1164 - 1165 - enum nft_chain_types { 1166 - NFT_CHAIN_T_DEFAULT = 0, 1167 - NFT_CHAIN_T_ROUTE, 1168 - NFT_CHAIN_T_NAT, 1169 - NFT_CHAIN_T_MAX 1170 - }; 1171 1141 1172 1142 /** 1173 1143 * struct nft_chain_type - nf_tables chain type info
+1 -13
net/netfilter/nf_nat_core.c
··· 294 294 295 295 ct = nf_ct_tuplehash_to_ctrack(thash); 296 296 297 - /* NB: IP_CT_DIR_ORIGINAL should be impossible because 298 - * nf_nat_used_tuple() handles origin collisions. 299 - * 300 - * Handle remote chance other CPU confirmed its ct right after. 301 - */ 302 - if (thash->tuple.dst.dir != IP_CT_DIR_REPLY) 303 - goto out; 304 - 305 297 /* clashing connection subject to NAT? Retry with new tuple. */ 306 298 if (READ_ONCE(ct->status) & uses_nat) 307 299 goto out; 308 300 309 301 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 310 - &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple) && 311 - nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 312 - &ignored_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) { 302 + &ignored_ct->tuplehash[IP_CT_DIR_REPLY].tuple)) 313 303 taken = false; 314 - goto out; 315 - } 316 304 out: 317 305 nf_ct_put(ct); 318 306 return taken;
+67 -17
net/netfilter/nf_tables_api.c
··· 123 123 124 124 table->validate_state = new_validate_state; 125 125 } 126 + 127 + static bool nft_chain_vstate_valid(const struct nft_ctx *ctx, 128 + const struct nft_chain *chain) 129 + { 130 + const struct nft_base_chain *base_chain; 131 + enum nft_chain_types type; 132 + u8 hooknum; 133 + 134 + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) 135 + return false; 136 + 137 + base_chain = nft_base_chain(ctx->chain); 138 + hooknum = base_chain->ops.hooknum; 139 + type = base_chain->type->type; 140 + 141 + /* chain is already validated for this call depth */ 142 + if (chain->vstate.depth >= ctx->level && 143 + chain->vstate.hook_mask[type] & BIT(hooknum)) 144 + return true; 145 + 146 + return false; 147 + } 148 + 126 149 static void nf_tables_trans_destroy_work(struct work_struct *w); 127 150 128 151 static void nft_trans_gc_work(struct work_struct *work); ··· 4102 4079 nf_tables_rule_destroy(ctx, rule); 4103 4080 } 4104 4081 4082 + static void nft_chain_vstate_update(const struct nft_ctx *ctx, struct nft_chain *chain) 4083 + { 4084 + const struct nft_base_chain *base_chain; 4085 + enum nft_chain_types type; 4086 + u8 hooknum; 4087 + 4088 + /* ctx->chain must hold the calling base chain. */ 4089 + if (WARN_ON_ONCE(!nft_is_base_chain(ctx->chain))) { 4090 + memset(&chain->vstate, 0, sizeof(chain->vstate)); 4091 + return; 4092 + } 4093 + 4094 + base_chain = nft_base_chain(ctx->chain); 4095 + hooknum = base_chain->ops.hooknum; 4096 + type = base_chain->type->type; 4097 + 4098 + BUILD_BUG_ON(BIT(NF_INET_NUMHOOKS) > U8_MAX); 4099 + 4100 + chain->vstate.hook_mask[type] |= BIT(hooknum); 4101 + if (chain->vstate.depth < ctx->level) 4102 + chain->vstate.depth = ctx->level; 4103 + } 4104 + 4105 4105 /** nft_chain_validate - loop detection and hook validation 4106 4106 * 4107 4107 * @ctx: context containing call depth and base chain ··· 4134 4088 * and set lookups until either the jump limit is hit or all reachable 4135 4089 * chains have been validated. 4136 4090 */ 4137 - int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain) 4091 + int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain) 4138 4092 { 4139 4093 struct nft_expr *expr, *last; 4140 4094 struct nft_rule *rule; 4141 4095 int err; 4142 4096 4097 + BUILD_BUG_ON(NFT_JUMP_STACK_SIZE > 255); 4143 4098 if (ctx->level == NFT_JUMP_STACK_SIZE) 4144 4099 return -EMLINK; 4100 + 4101 + if (ctx->level > 0) { 4102 + /* jumps to base chains are not allowed. */ 4103 + if (nft_is_base_chain(chain)) 4104 + return -ELOOP; 4105 + 4106 + if (nft_chain_vstate_valid(ctx, chain)) 4107 + return 0; 4108 + } 4145 4109 4146 4110 list_for_each_entry(rule, &chain->rules, list) { 4147 4111 if (fatal_signal_pending(current)) ··· 4171 4115 if (err < 0) 4172 4116 return err; 4173 4117 } 4118 + 4119 + cond_resched(); 4174 4120 } 4175 4121 4122 + nft_chain_vstate_update(ctx, chain); 4176 4123 return 0; 4177 4124 } 4178 4125 EXPORT_SYMBOL_GPL(nft_chain_validate); ··· 4187 4128 .net = net, 4188 4129 .family = table->family, 4189 4130 }; 4190 - int err; 4131 + int err = 0; 4191 4132 4192 4133 list_for_each_entry(chain, &table->chains, list) { 4193 4134 if (!nft_is_base_chain(chain)) ··· 4196 4137 ctx.chain = chain; 4197 4138 err = nft_chain_validate(&ctx, chain); 4198 4139 if (err < 0) 4199 - return err; 4200 - 4201 - cond_resched(); 4140 + goto err; 4202 4141 } 4203 4142 4204 - return 0; 4143 + err: 4144 + list_for_each_entry(chain, &table->chains, list) 4145 + memset(&chain->vstate, 0, sizeof(chain->vstate)); 4146 + 4147 + return err; 4205 4148 } 4206 4149 4207 4150 int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set, ··· 11737 11676 enum nft_data_types type, 11738 11677 unsigned int len) 11739 11678 { 11740 - int err; 11741 - 11742 11679 switch (reg) { 11743 11680 case NFT_REG_VERDICT: 11744 11681 if (type != NFT_DATA_VERDICT) 11745 11682 return -EINVAL; 11746 - 11747 - if (data != NULL && 11748 - (data->verdict.code == NFT_GOTO || 11749 - data->verdict.code == NFT_JUMP)) { 11750 - err = nft_chain_validate(ctx, data->verdict.chain); 11751 - if (err < 0) 11752 - return err; 11753 - } 11754 - 11755 11683 break; 11756 11684 default: 11757 11685 if (type != NFT_DATA_VALUE)
+9 -4
tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c
··· 33 33 exit(111); 34 34 } 35 35 36 - static void die_port(uint16_t got, uint16_t want) 36 + static void die_port(const struct sockaddr_in *sin, uint16_t want) 37 37 { 38 - fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got)); 38 + uint16_t got = ntohs(sin->sin_port); 39 + char str[INET_ADDRSTRLEN]; 40 + 41 + inet_ntop(AF_INET, &sin->sin_addr, str, sizeof(str)); 42 + 43 + fprintf(stderr, "Port number changed, wanted %d got %d from %s\n", want, got, str); 39 44 exit(1); 40 45 } 41 46 ··· 105 100 die("child recvfrom"); 106 101 107 102 if (peer.sin_port != htons(PORT)) 108 - die_port(peer.sin_port, PORT); 103 + die_port(&peer, PORT); 109 104 } else { 110 105 if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN) 111 106 continue; ··· 114 109 die("parent recvfrom"); 115 110 116 111 if (peer.sin_port != htons((PORT + 1))) 117 - die_port(peer.sin_port, PORT + 1); 112 + die_port(&peer, PORT + 1); 118 113 } 119 114 } 120 115
+2
tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh
··· 45 45 echo "PASS: No SNAT performed for null bindings" 46 46 else 47 47 echo "ERROR: SNAT performed without any matching snat rule" 48 + ip netns exec "$ns0" conntrack -L 49 + ip netns exec "$ns0" conntrack -S 48 50 exit 1 49 51 fi 50 52
+1 -1
tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
··· 26 26 27 27 +0.01 > R 643160523:643160523(0) win 0 28 28 29 - +0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` 29 + +0.1 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` 30 30 31 31 // Must go through. 32 32 +0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>