Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Restore set counter when one of the CPU loses race to add elements
to sets.

2) After NF_STOLEN, skb might be there no more, update nftables trace
infra to avoid access to skb in this case. From Florian Westphal.

3) nftables bridge might register a prerouting hook with zero priority,
br_netfilter incorrectly skips it. Also from Florian.

* git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf:
netfilter: br_netfilter: do not skip all hooks with 0 priority
netfilter: nf_tables: avoid skb access on nf_stolen
netfilter: nft_dynset: restore set element counter when failing to update
====================

Link: https://lore.kernel.org/r/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+75 -32
+10 -6
include/net/netfilter/nf_tables.h
··· 1338 1338 /** 1339 1339 * struct nft_traceinfo - nft tracing information and state 1340 1340 * 1341 + * @trace: other struct members are initialised 1342 + * @nf_trace: copy of skb->nf_trace before rule evaluation 1343 + * @type: event type (enum nft_trace_types) 1344 + * @skbid: hash of skb to be used as trace id 1345 + * @packet_dumped: packet headers sent in a previous traceinfo message 1341 1346 * @pkt: pktinfo currently processed 1342 1347 * @basechain: base chain currently processed 1343 1348 * @chain: chain currently processed 1344 1349 * @rule: rule that was evaluated 1345 1350 * @verdict: verdict given by rule 1346 - * @type: event type (enum nft_trace_types) 1347 - * @packet_dumped: packet headers sent in a previous traceinfo message 1348 - * @trace: other struct members are initialised 1349 1351 */ 1350 1352 struct nft_traceinfo { 1353 + bool trace; 1354 + bool nf_trace; 1355 + bool packet_dumped; 1356 + enum nft_trace_types type:8; 1357 + u32 skbid; 1351 1358 const struct nft_pktinfo *pkt; 1352 1359 const struct nft_base_chain *basechain; 1353 1360 const struct nft_chain *chain; 1354 1361 const struct nft_rule_dp *rule; 1355 1362 const struct nft_verdict *verdict; 1356 - enum nft_trace_types type; 1357 - bool packet_dumped; 1358 - bool trace; 1359 1363 }; 1360 1364 1361 1365 void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
+18 -3
net/bridge/br_netfilter_hooks.c
··· 1012 1012 return okfn(net, sk, skb); 1013 1013 1014 1014 ops = nf_hook_entries_get_hook_ops(e); 1015 - for (i = 0; i < e->num_hook_entries && 1016 - ops[i]->priority <= NF_BR_PRI_BRNF; i++) 1017 - ; 1015 + for (i = 0; i < e->num_hook_entries; i++) { 1016 + /* These hooks have already been called */ 1017 + if (ops[i]->priority < NF_BR_PRI_BRNF) 1018 + continue; 1019 + 1020 + /* These hooks have not been called yet, run them. */ 1021 + if (ops[i]->priority > NF_BR_PRI_BRNF) 1022 + break; 1023 + 1024 + /* take a closer look at NF_BR_PRI_BRNF. */ 1025 + if (ops[i]->hook == br_nf_pre_routing) { 1026 + /* This hook diverted the skb to this function, 1027 + * hooks after this have not been run yet. 1028 + */ 1029 + i++; 1030 + break; 1031 + } 1032 + } 1018 1033 1019 1034 nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, 1020 1035 sk, net, okfn);
+21 -3
net/netfilter/nf_tables_core.c
··· 25 25 const struct nft_chain *chain, 26 26 enum nft_trace_types type) 27 27 { 28 - const struct nft_pktinfo *pkt = info->pkt; 29 - 30 - if (!info->trace || !pkt->skb->nf_trace) 28 + if (!info->trace || !info->nf_trace) 31 29 return; 32 30 33 31 info->chain = chain; ··· 40 42 enum nft_trace_types type) 41 43 { 42 44 if (static_branch_unlikely(&nft_trace_enabled)) { 45 + const struct nft_pktinfo *pkt = info->pkt; 46 + 47 + info->nf_trace = pkt->skb->nf_trace; 43 48 info->rule = rule; 44 49 __nft_trace_packet(info, chain, type); 50 + } 51 + } 52 + 53 + static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info) 54 + { 55 + if (static_branch_unlikely(&nft_trace_enabled)) { 56 + const struct nft_pktinfo *pkt = info->pkt; 57 + 58 + if (info->trace) 59 + info->nf_trace = pkt->skb->nf_trace; 45 60 } 46 61 } 47 62 ··· 96 85 const struct nft_chain *chain, 97 86 const struct nft_regs *regs) 98 87 { 88 + const struct nft_pktinfo *pkt = info->pkt; 99 89 enum nft_trace_types type; 100 90 101 91 switch (regs->verdict.code) { ··· 104 92 case NFT_RETURN: 105 93 type = NFT_TRACETYPE_RETURN; 106 94 break; 95 + case NF_STOLEN: 96 + type = NFT_TRACETYPE_RULE; 97 + /* can't access skb->nf_trace; use copy */ 98 + break; 107 99 default: 108 100 type = NFT_TRACETYPE_RULE; 101 + info->nf_trace = pkt->skb->nf_trace; 109 102 break; 110 103 } 111 104 ··· 271 254 switch (regs.verdict.code) { 272 255 case NFT_BREAK: 273 256 regs.verdict.code = NFT_CONTINUE; 257 + nft_trace_copy_nftrace(&info); 274 258 continue; 275 259 case NFT_CONTINUE: 276 260 nft_trace_packet(&info, chain, rule,
+24 -20
net/netfilter/nf_tables_trace.c
··· 7 7 #include <linux/module.h> 8 8 #include <linux/static_key.h> 9 9 #include <linux/hash.h> 10 - #include <linux/jhash.h> 10 + #include <linux/siphash.h> 11 11 #include <linux/if_vlan.h> 12 12 #include <linux/init.h> 13 13 #include <linux/skbuff.h> ··· 24 24 25 25 DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); 26 26 EXPORT_SYMBOL_GPL(nft_trace_enabled); 27 - 28 - static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) 29 - { 30 - __be32 id; 31 - 32 - /* using skb address as ID results in a limited number of 33 - * values (and quick reuse). 34 - * 35 - * So we attempt to use as many skb members that will not 36 - * change while skb is with netfilter. 37 - */ 38 - id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), 39 - skb->skb_iif); 40 - 41 - return nla_put_be32(nlskb, NFTA_TRACE_ID, id); 42 - } 43 27 44 28 static int trace_fill_header(struct sk_buff *nlskb, u16 type, 45 29 const struct sk_buff *skb, ··· 170 186 struct nlmsghdr *nlh; 171 187 struct sk_buff *skb; 172 188 unsigned int size; 189 + u32 mark = 0; 173 190 u16 event; 174 191 175 192 if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) ··· 214 229 if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) 215 230 goto nla_put_failure; 216 231 217 - if (trace_fill_id(skb, pkt->skb)) 232 + if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) 218 233 goto nla_put_failure; 219 234 220 235 if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) ··· 234 249 case NFT_TRACETYPE_RULE: 235 250 if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) 236 251 goto nla_put_failure; 252 + 253 + /* pkt->skb undefined iff NF_STOLEN, disable dump */ 254 + if (info->verdict->code == NF_STOLEN) 255 + info->packet_dumped = true; 256 + else 257 + mark = pkt->skb->mark; 258 + 237 259 break; 238 260 case NFT_TRACETYPE_POLICY: 261 + mark = pkt->skb->mark; 262 + 239 263 if (nla_put_be32(skb, NFTA_TRACE_POLICY, 240 264 htonl(info->basechain->policy))) 241 265 goto nla_put_failure; 242 266 break; 243 267 } 244 268 245 - if (pkt->skb->mark && 246 - nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) 269 + if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark))) 247 270 goto nla_put_failure; 248 271 249 272 if (!info->packet_dumped) { ··· 276 283 const struct nft_verdict *verdict, 277 284 const struct nft_chain *chain) 278 285 { 286 + static siphash_key_t trace_key __read_mostly; 287 + struct sk_buff *skb = pkt->skb; 288 + 279 289 info->basechain = nft_base_chain(chain); 280 290 info->trace = true; 291 + info->nf_trace = pkt->skb->nf_trace; 281 292 info->packet_dumped = false; 282 293 info->pkt = pkt; 283 294 info->verdict = verdict; 295 + 296 + net_get_random_once(&trace_key, sizeof(trace_key)); 297 + 298 + info->skbid = (u32)siphash_3u32(hash32_ptr(skb), 299 + skb_get_hash(skb), 300 + skb->skb_iif, 301 + &trace_key); 284 302 }
+2
net/netfilter/nft_set_hash.c
··· 143 143 /* Another cpu may race to insert the element with the same key */ 144 144 if (prev) { 145 145 nft_set_elem_destroy(set, he, true); 146 + atomic_dec(&set->nelems); 146 147 he = prev; 147 148 } 148 149 ··· 153 152 154 153 err2: 155 154 nft_set_elem_destroy(set, he, true); 155 + atomic_dec(&set->nelems); 156 156 err1: 157 157 return false; 158 158 }