Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'nf-next-24-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next:

1) Extended netlink error reporting if nfnetlink attribute parser fails,
from Donald Hunter.

2) Incorrect request_module() module, from Simon Horman.

3) A series of patches to reduce memory consumption for set element
transactions.
Florian Westphal says:

"When doing a flush on a set or mass adding/removing elements from a
set, each element needs to allocate 96 bytes to hold the transactional
state.

In such cases, virtually all the information in struct nft_trans_elem
is the same.

Change nft_trans_elem to a flex-array, i.e. a single nft_trans_elem
can hold multiple set element pointers.

The number of elements that can be stored in one nft_trans_elem is limited
by the slab allocator, this series limits the compaction to at most 62
elements as it caps the reallocation to 2048 bytes of memory."

4) A series of patches to prepare the transition to dscp_t in .flowi_tos.
From Guillaume Nault.

5) Support for bitwise operations with two source registers,
from Jeremy Sowden.

* tag 'nf-next-24-11-15' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next:
netfilter: bitwise: add support for doing AND, OR and XOR directly
netfilter: bitwise: rename some boolean operation functions
netfilter: nf_dup4: Convert nf_dup_ipv4_route() to dscp_t.
netfilter: nft_fib: Convert nft_fib4_eval() to dscp_t.
netfilter: rpfilter: Convert rpfilter_mt() to dscp_t.
netfilter: flow_offload: Convert nft_flow_route() to dscp_t.
netfilter: ipv4: Convert ip_route_me_harder() to dscp_t.
netfilter: nf_tables: allocate element update information dynamically
netfilter: nf_tables: switch trans_elem to real flex array
netfilter: nf_tables: prepare nft audit for set element compaction
netfilter: nf_tables: prepare for multiple elements in nft_trans_elem structure
netfilter: nf_tables: add nft_trans_commit_list_add_elem helper
netfilter: bpf: Pass string literal as format argument of request_module()
netfilter: nfnetlink: Report extack policy errors for batched ops
====================

Link: https://patch.msgid.link/20241115133207.8907-1-pablo@netfilter.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+485 -128
+13 -12
include/net/netfilter/nf_tables.h
··· 1759 1759 NFT_TRANS_UPD_EXPIRATION = (1 << 1), 1760 1760 }; 1761 1761 1762 + struct nft_elem_update { 1763 + u64 timeout; 1764 + u64 expiration; 1765 + u8 flags; 1766 + }; 1767 + 1768 + struct nft_trans_one_elem { 1769 + struct nft_elem_priv *priv; 1770 + struct nft_elem_update *update; 1771 + }; 1772 + 1762 1773 struct nft_trans_elem { 1763 1774 struct nft_trans nft_trans; 1764 1775 struct nft_set *set; 1765 - struct nft_elem_priv *elem_priv; 1766 - u64 timeout; 1767 - u64 expiration; 1768 - u8 update_flags; 1769 1776 bool bound; 1777 + unsigned int nelems; 1778 + struct nft_trans_one_elem elems[] __counted_by(nelems); 1770 1779 }; 1771 1780 1772 1781 #define nft_trans_container_elem(t) \ 1773 1782 container_of(t, struct nft_trans_elem, nft_trans) 1774 1783 #define nft_trans_elem_set(trans) \ 1775 1784 nft_trans_container_elem(trans)->set 1776 - #define nft_trans_elem_priv(trans) \ 1777 - nft_trans_container_elem(trans)->elem_priv 1778 - #define nft_trans_elem_update_flags(trans) \ 1779 - nft_trans_container_elem(trans)->update_flags 1780 - #define nft_trans_elem_timeout(trans) \ 1781 - nft_trans_container_elem(trans)->timeout 1782 - #define nft_trans_elem_expiration(trans) \ 1783 - nft_trans_container_elem(trans)->expiration 1784 1785 #define nft_trans_elem_set_bound(trans) \ 1785 1786 nft_trans_container_elem(trans)->bound 1786 1787
+15 -3
include/uapi/linux/netfilter/nf_tables.h
··· 564 564 /** 565 565 * enum nft_bitwise_ops - nf_tables bitwise operations 566 566 * 567 - * @NFT_BITWISE_BOOL: mask-and-xor operation used to implement NOT, AND, OR and 568 - * XOR boolean operations 567 + * @NFT_BITWISE_MASK_XOR: mask-and-xor operation used to implement NOT, AND, OR 568 + * and XOR boolean operations 569 569 * @NFT_BITWISE_LSHIFT: left-shift operation 570 570 * @NFT_BITWISE_RSHIFT: right-shift operation 571 + * @NFT_BITWISE_AND: and operation 572 + * @NFT_BITWISE_OR: or operation 573 + * @NFT_BITWISE_XOR: xor operation 571 574 */ 572 575 enum nft_bitwise_ops { 573 - NFT_BITWISE_BOOL, 576 + NFT_BITWISE_MASK_XOR, 574 577 NFT_BITWISE_LSHIFT, 575 578 NFT_BITWISE_RSHIFT, 579 + NFT_BITWISE_AND, 580 + NFT_BITWISE_OR, 581 + NFT_BITWISE_XOR, 576 582 }; 583 + /* 584 + * Old name for NFT_BITWISE_MASK_XOR. Retained for backwards-compatibility. 585 + */ 586 + #define NFT_BITWISE_BOOL NFT_BITWISE_MASK_XOR 577 587 578 588 /** 579 589 * enum nft_bitwise_attributes - nf_tables bitwise expression netlink attributes ··· 596 586 * @NFTA_BITWISE_OP: type of operation (NLA_U32: nft_bitwise_ops) 597 587 * @NFTA_BITWISE_DATA: argument for non-boolean operations 598 588 * (NLA_NESTED: nft_data_attributes) 589 + * @NFTA_BITWISE_SREG2: second source register (NLA_U32: nft_registers) 599 590 * 600 591 * The bitwise expression supports boolean and shift operations. It implements 601 592 * the boolean operations by performing the following operation: ··· 620 609 NFTA_BITWISE_XOR, 621 610 NFTA_BITWISE_OP, 622 611 NFTA_BITWISE_DATA, 612 + NFTA_BITWISE_SREG2, 623 613 __NFTA_BITWISE_MAX 624 614 }; 625 615 #define NFTA_BITWISE_MAX (__NFTA_BITWISE_MAX - 1)
+1 -1
net/ipv4/netfilter.c
··· 44 44 */ 45 45 fl4.daddr = iph->daddr; 46 46 fl4.saddr = saddr; 47 - fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; 47 + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); 48 48 fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0; 49 49 fl4.flowi4_l3mdev = l3mdev_master_ifindex(dev); 50 50 fl4.flowi4_mark = skb->mark;
+1 -1
net/ipv4/netfilter/ipt_rpfilter.c
··· 76 76 flow.daddr = iph->saddr; 77 77 flow.saddr = rpfilter_get_saddr(iph->daddr); 78 78 flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; 79 - flow.flowi4_tos = iph->tos & INET_DSCP_MASK; 79 + flow.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); 80 80 flow.flowi4_scope = RT_SCOPE_UNIVERSE; 81 81 flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); 82 82 flow.flowi4_uid = sock_net_uid(xt_net(par), NULL);
+1 -1
net/ipv4/netfilter/nf_dup_ipv4.c
··· 33 33 fl4.flowi4_oif = oif; 34 34 35 35 fl4.daddr = gw->s_addr; 36 - fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; 36 + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); 37 37 fl4.flowi4_scope = RT_SCOPE_UNIVERSE; 38 38 fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH; 39 39 rt = ip_route_output_key(net, &fl4);
+2 -1
net/ipv4/netfilter/nft_fib_ipv4.c
··· 11 11 #include <net/netfilter/nft_fib.h> 12 12 13 13 #include <net/inet_dscp.h> 14 + #include <net/ip.h> 14 15 #include <net/ip_fib.h> 15 16 #include <net/route.h> 16 17 ··· 108 107 if (priv->flags & NFTA_FIB_F_MARK) 109 108 fl4.flowi4_mark = pkt->skb->mark; 110 109 111 - fl4.flowi4_tos = iph->tos & INET_DSCP_MASK; 110 + fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)); 112 111 113 112 if (priv->flags & NFTA_FIB_F_DADDR) { 114 113 fl4.daddr = iph->daddr;
+1 -1
net/netfilter/nf_bpf_link.c
··· 43 43 hook = rcu_dereference(*ptr_global_hook); 44 44 if (!hook) { 45 45 rcu_read_unlock(); 46 - err = request_module(mod); 46 + err = request_module("%s", mod); 47 47 if (err) 48 48 return ERR_PTR(err < 0 ? err : -EINVAL); 49 49
+309 -78
net/netfilter/nf_tables_api.c
··· 26 26 #define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-")) 27 27 #define NFT_SET_MAX_ANONLEN 16 28 28 29 + /* limit compaction to avoid huge kmalloc/krealloc sizes. */ 30 + #define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem)) 31 + 29 32 unsigned int nf_tables_net_id __read_mostly; 30 33 31 34 static LIST_HEAD(nf_tables_expressions); ··· 394 391 return __nf_tables_unregister_hook(net, table, chain, false); 395 392 } 396 393 394 + static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b) 395 + { 396 + /* NB: the ->bound equality check is defensive, at this time we only merge 397 + * a new nft_trans_elem transaction request with the transaction tail 398 + * element, but a->bound != b->bound would imply a NEWRULE transaction 399 + * is queued in-between. 400 + * 401 + * The set check is mandatory, the NFT_MAX_SET_NELEMS check prevents 402 + * huge krealloc() requests. 403 + */ 404 + return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS; 405 + } 406 + 407 + static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net, 408 + struct nft_trans_elem *tail, 409 + struct nft_trans_elem *trans, 410 + gfp_t gfp) 411 + { 412 + unsigned int nelems, old_nelems = tail->nelems; 413 + struct nft_trans_elem *new_trans; 414 + 415 + if (!nft_trans_collapse_set_elem_allowed(tail, trans)) 416 + return false; 417 + 418 + /* "cannot happen", at this time userspace element add 419 + * requests always allocate a new transaction element. 420 + * 421 + * This serves as a reminder to adjust the list_add_tail 422 + * logic below in case this ever changes. 423 + */ 424 + if (WARN_ON_ONCE(trans->nelems != 1)) 425 + return false; 426 + 427 + if (check_add_overflow(old_nelems, trans->nelems, &nelems)) 428 + return false; 429 + 430 + /* krealloc might free tail which invalidates list pointers */ 431 + list_del_init(&tail->nft_trans.list); 432 + 433 + new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp); 434 + if (!new_trans) { 435 + list_add_tail(&tail->nft_trans.list, &nft_net->commit_list); 436 + return false; 437 + } 438 + 439 + /* 440 + * new_trans->nft_trans.list contains garbage, but 441 + * list_add_tail() doesn't care. 442 + */ 443 + new_trans->nelems = nelems; 444 + new_trans->elems[old_nelems] = trans->elems[0]; 445 + list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list); 446 + 447 + return true; 448 + } 449 + 450 + static bool nft_trans_try_collapse(struct nftables_pernet *nft_net, 451 + struct nft_trans *trans, gfp_t gfp) 452 + { 453 + struct nft_trans *tail; 454 + 455 + if (list_empty(&nft_net->commit_list)) 456 + return false; 457 + 458 + tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list); 459 + 460 + if (tail->msg_type != trans->msg_type) 461 + return false; 462 + 463 + switch (trans->msg_type) { 464 + case NFT_MSG_NEWSETELEM: 465 + case NFT_MSG_DELSETELEM: 466 + return nft_trans_collapse_set_elem(nft_net, 467 + nft_trans_container_elem(tail), 468 + nft_trans_container_elem(trans), gfp); 469 + } 470 + 471 + return false; 472 + } 473 + 397 474 static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) 398 475 { 399 476 struct nftables_pernet *nft_net = nft_pernet(net); ··· 502 419 list_add_tail(&binding->binding_list, &nft_net->binding_list); 503 420 break; 504 421 } 422 + } 423 + 424 + static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans, 425 + gfp_t gfp) 426 + { 427 + struct nftables_pernet *nft_net = nft_pernet(net); 428 + 429 + WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM && 430 + trans->msg_type != NFT_MSG_DELSETELEM); 431 + 432 + might_alloc(gfp); 433 + 434 + if (nft_trans_try_collapse(nft_net, trans, gfp)) { 435 + kfree(trans); 436 + return; 437 + } 438 + 439 + nft_trans_commit_list_add_tail(net, trans); 505 440 } 506 441 507 442 static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) ··· 6536 6435 int msg_type, 6537 6436 struct nft_set *set) 6538 6437 { 6438 + struct nft_trans_elem *te; 6539 6439 struct nft_trans *trans; 6540 6440 6541 - trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem)); 6441 + trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1)); 6542 6442 if (trans == NULL) 6543 6443 return NULL; 6544 6444 6545 - nft_trans_elem_set(trans) = set; 6445 + te = nft_trans_container_elem(trans); 6446 + te->nelems = 1; 6447 + te->set = set; 6448 + 6546 6449 return trans; 6547 6450 } 6548 6451 ··· 6668 6563 } 6669 6564 6670 6565 /* Drop references and destroy. Called from gc, dynset and abort path. */ 6671 - void nft_set_elem_destroy(const struct nft_set *set, 6672 - const struct nft_elem_priv *elem_priv, 6673 - bool destroy_expr) 6566 + static void __nft_set_elem_destroy(const struct nft_ctx *ctx, 6567 + const struct nft_set *set, 6568 + const struct nft_elem_priv *elem_priv, 6569 + bool destroy_expr) 6674 6570 { 6675 6571 struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 6676 - struct nft_ctx ctx = { 6677 - .net = read_pnet(&set->net), 6678 - .family = set->table->family, 6679 - }; 6680 6572 6681 6573 nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); 6682 6574 if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) 6683 6575 nft_data_release(nft_set_ext_data(ext), set->dtype); 6684 6576 if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS)) 6685 - nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext)); 6577 + nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); 6686 6578 if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) 6687 6579 nft_use_dec(&(*nft_set_ext_obj(ext))->use); 6688 6580 6689 6581 kfree(elem_priv); 6690 6582 } 6583 + 6584 + /* Drop references and destroy. Called from gc and dynset. */ 6585 + void nft_set_elem_destroy(const struct nft_set *set, 6586 + const struct nft_elem_priv *elem_priv, 6587 + bool destroy_expr) 6588 + { 6589 + struct nft_ctx ctx = { 6590 + .net = read_pnet(&set->net), 6591 + .family = set->table->family, 6592 + }; 6593 + 6594 + __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr); 6595 + } 6691 6596 EXPORT_SYMBOL_GPL(nft_set_elem_destroy); 6597 + 6598 + /* Drop references and destroy. Called from abort path. */ 6599 + static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te) 6600 + { 6601 + int i; 6602 + 6603 + for (i = 0; i < te->nelems; i++) { 6604 + /* skip update request, see nft_trans_elems_new_abort() */ 6605 + if (!te->elems[i].priv) 6606 + continue; 6607 + 6608 + __nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true); 6609 + } 6610 + } 6692 6611 6693 6612 /* Destroy element. References have been already dropped in the preparation 6694 6613 * path via nft_setelem_data_deactivate(). ··· 6727 6598 nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext)); 6728 6599 6729 6600 kfree(elem_priv); 6601 + } 6602 + 6603 + static void nft_trans_elems_destroy(const struct nft_ctx *ctx, 6604 + const struct nft_trans_elem *te) 6605 + { 6606 + int i; 6607 + 6608 + for (i = 0; i < te->nelems; i++) 6609 + nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv); 6730 6610 } 6731 6611 6732 6612 int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set, ··· 6894 6756 } 6895 6757 } 6896 6758 6759 + static void nft_trans_elem_update(const struct nft_set *set, 6760 + const struct nft_trans_one_elem *elem) 6761 + { 6762 + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); 6763 + const struct nft_elem_update *update = elem->update; 6764 + 6765 + if (update->flags & NFT_TRANS_UPD_TIMEOUT) 6766 + WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout); 6767 + 6768 + if (update->flags & NFT_TRANS_UPD_EXPIRATION) 6769 + WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration); 6770 + } 6771 + 6772 + static void nft_trans_elems_add(const struct nft_ctx *ctx, 6773 + struct nft_trans_elem *te) 6774 + { 6775 + int i; 6776 + 6777 + for (i = 0; i < te->nelems; i++) { 6778 + struct nft_trans_one_elem *elem = &te->elems[i]; 6779 + 6780 + if (elem->update) 6781 + nft_trans_elem_update(te->set, elem); 6782 + else 6783 + nft_setelem_activate(ctx->net, te->set, elem->priv); 6784 + 6785 + nf_tables_setelem_notify(ctx, te->set, elem->priv, 6786 + NFT_MSG_NEWSETELEM); 6787 + kfree(elem->update); 6788 + } 6789 + } 6790 + 6897 6791 static int nft_setelem_catchall_deactivate(const struct net *net, 6898 6792 struct nft_set *set, 6899 6793 struct nft_set_elem *elem) ··· 7008 6838 set->ops->remove(net, set, elem_priv); 7009 6839 } 7010 6840 6841 + static void nft_trans_elems_remove(const struct nft_ctx *ctx, 6842 + const struct nft_trans_elem *te) 6843 + { 6844 + int i; 6845 + 6846 + for (i = 0; i < te->nelems; i++) { 6847 + WARN_ON_ONCE(te->elems[i].update); 6848 + 6849 + nf_tables_setelem_notify(ctx, te->set, 6850 + te->elems[i].priv, 6851 + te->nft_trans.msg_type); 6852 + 6853 + nft_setelem_remove(ctx->net, te->set, te->elems[i].priv); 6854 + if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) { 6855 + atomic_dec(&te->set->nelems); 6856 + te->set->ndeact--; 6857 + } 6858 + } 6859 + } 6860 + 7011 6861 static bool nft_setelem_valid_key_end(const struct nft_set *set, 7012 6862 struct nlattr **nla, u32 flags) 7013 6863 { ··· 7064 6874 struct nft_data_desc desc; 7065 6875 enum nft_registers dreg; 7066 6876 struct nft_trans *trans; 7067 - u8 update_flags; 7068 6877 u64 expiration; 7069 6878 u64 timeout; 7070 6879 int err, i; ··· 7378 7189 else if (!(nlmsg_flags & NLM_F_EXCL)) { 7379 7190 err = 0; 7380 7191 if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) { 7381 - update_flags = 0; 7192 + struct nft_elem_update update = { }; 7193 + 7382 7194 if (timeout != nft_set_ext_timeout(ext2)->timeout) { 7383 - nft_trans_elem_timeout(trans) = timeout; 7195 + update.timeout = timeout; 7384 7196 if (expiration == 0) 7385 7197 expiration = timeout; 7386 7198 7387 - update_flags |= NFT_TRANS_UPD_TIMEOUT; 7199 + update.flags |= NFT_TRANS_UPD_TIMEOUT; 7388 7200 } 7389 7201 if (expiration) { 7390 - nft_trans_elem_expiration(trans) = expiration; 7391 - update_flags |= NFT_TRANS_UPD_EXPIRATION; 7202 + update.expiration = expiration; 7203 + update.flags |= NFT_TRANS_UPD_EXPIRATION; 7392 7204 } 7393 7205 7394 - if (update_flags) { 7395 - nft_trans_elem_priv(trans) = elem_priv; 7396 - nft_trans_elem_update_flags(trans) = update_flags; 7397 - nft_trans_commit_list_add_tail(ctx->net, trans); 7206 + if (update.flags) { 7207 + struct nft_trans_one_elem *ue; 7208 + 7209 + ue = &nft_trans_container_elem(trans)->elems[0]; 7210 + 7211 + ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL); 7212 + if (!ue->update) { 7213 + err = -ENOMEM; 7214 + goto err_element_clash; 7215 + } 7216 + 7217 + ue->priv = elem_priv; 7218 + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); 7398 7219 goto err_elem_free; 7399 7220 } 7400 7221 } ··· 7427 7228 } 7428 7229 } 7429 7230 7430 - nft_trans_elem_priv(trans) = elem.priv; 7431 - nft_trans_commit_list_add_tail(ctx->net, trans); 7231 + nft_trans_container_elem(trans)->elems[0].priv = elem.priv; 7232 + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); 7432 7233 return 0; 7433 7234 7434 7235 err_set_full: ··· 7565 7366 nft_use_dec(&(*nft_set_ext_obj(ext))->use); 7566 7367 } 7567 7368 7369 + /* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem. 7370 + * No notifications and no ndeact changes. 7371 + * 7372 + * Returns true if set had been added to (i.e., elements need to be removed again). 7373 + */ 7374 + static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx, 7375 + struct nft_trans_elem *te) 7376 + { 7377 + bool removed = false; 7378 + int i; 7379 + 7380 + for (i = 0; i < te->nelems; i++) { 7381 + if (te->elems[i].update) { 7382 + kfree(te->elems[i].update); 7383 + te->elems[i].update = NULL; 7384 + /* Update request, so do not release this element */ 7385 + te->elems[i].priv = NULL; 7386 + continue; 7387 + } 7388 + 7389 + if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv)) 7390 + nft_setelem_remove(ctx->net, te->set, te->elems[i].priv); 7391 + 7392 + if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) 7393 + atomic_dec(&te->set->nelems); 7394 + 7395 + removed = true; 7396 + } 7397 + 7398 + return removed; 7399 + } 7400 + 7401 + /* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */ 7402 + static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx, 7403 + const struct nft_trans_elem *te) 7404 + { 7405 + int i; 7406 + 7407 + for (i = 0; i < te->nelems; i++) { 7408 + if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) { 7409 + nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv); 7410 + nft_setelem_activate(ctx->net, te->set, te->elems[i].priv); 7411 + } 7412 + 7413 + if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) 7414 + te->set->ndeact--; 7415 + } 7416 + } 7417 + 7568 7418 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, 7569 7419 const struct nlattr *attr) 7570 7420 { ··· 7693 7445 7694 7446 nft_setelem_data_deactivate(ctx->net, set, elem.priv); 7695 7447 7696 - nft_trans_elem_priv(trans) = elem.priv; 7697 - nft_trans_commit_list_add_tail(ctx->net, trans); 7448 + nft_trans_container_elem(trans)->elems[0].priv = elem.priv; 7449 + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); 7698 7450 return 0; 7699 7451 7700 7452 fail_ops: ··· 7720 7472 return 0; 7721 7473 7722 7474 trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, 7723 - sizeof(struct nft_trans_elem), GFP_ATOMIC); 7475 + struct_size_t(struct nft_trans_elem, elems, 1), 7476 + GFP_ATOMIC); 7724 7477 if (!trans) 7725 7478 return -ENOMEM; 7726 7479 ··· 7730 7481 7731 7482 nft_setelem_data_deactivate(ctx->net, set, elem_priv); 7732 7483 nft_trans_elem_set(trans) = set; 7733 - nft_trans_elem_priv(trans) = elem_priv; 7734 - nft_trans_commit_list_add_tail(ctx->net, trans); 7484 + nft_trans_container_elem(trans)->nelems = 1; 7485 + nft_trans_container_elem(trans)->elems[0].priv = elem_priv; 7486 + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC); 7735 7487 7736 7488 return 0; 7737 7489 } ··· 7748 7498 return -ENOMEM; 7749 7499 7750 7500 nft_setelem_data_deactivate(ctx->net, set, elem_priv); 7751 - nft_trans_elem_priv(trans) = elem_priv; 7752 - nft_trans_commit_list_add_tail(ctx->net, trans); 7501 + nft_trans_container_elem(trans)->elems[0].priv = elem_priv; 7502 + nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL); 7753 7503 7754 7504 return 0; 7755 7505 } ··· 9930 9680 break; 9931 9681 case NFT_MSG_DELSETELEM: 9932 9682 case NFT_MSG_DESTROYSETELEM: 9933 - nf_tables_set_elem_destroy(&ctx, 9934 - nft_trans_elem_set(trans), 9935 - nft_trans_elem_priv(trans)); 9683 + nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans)); 9936 9684 break; 9937 9685 case NFT_MSG_DELOBJ: 9938 9686 case NFT_MSG_DESTROYOBJ: ··· 10503 10255 } 10504 10256 } 10505 10257 10506 - static void nf_tables_commit_audit_collect(struct list_head *adl, 10507 - struct nft_table *table, u32 op) 10258 + /* nft audit emits the number of elements that get added/removed/updated, 10259 + * so NEW/DELSETELEM needs to increment based on the total elem count. 10260 + */ 10261 + static unsigned int nf_tables_commit_audit_entrycount(const struct nft_trans *trans) 10508 10262 { 10263 + switch (trans->msg_type) { 10264 + case NFT_MSG_NEWSETELEM: 10265 + case NFT_MSG_DELSETELEM: 10266 + return nft_trans_container_elem(trans)->nelems; 10267 + } 10268 + 10269 + return 1; 10270 + } 10271 + 10272 + static void nf_tables_commit_audit_collect(struct list_head *adl, 10273 + const struct nft_trans *trans, u32 op) 10274 + { 10275 + const struct nft_table *table = trans->table; 10509 10276 struct nft_audit_data *adp; 10510 10277 10511 10278 list_for_each_entry(adp, adl, list) { ··· 10530 10267 WARN_ONCE(1, "table=%s not expected in commit list", table->name); 10531 10268 return; 10532 10269 found: 10533 - adp->entries++; 10270 + adp->entries += nf_tables_commit_audit_entrycount(trans); 10534 10271 if (!adp->op || adp->op > op) 10535 10272 adp->op = op; 10536 10273 } ··· 10689 10426 10690 10427 nft_ctx_update(&ctx, trans); 10691 10428 10692 - nf_tables_commit_audit_collect(&adl, table, trans->msg_type); 10429 + nf_tables_commit_audit_collect(&adl, trans, trans->msg_type); 10693 10430 switch (trans->msg_type) { 10694 10431 case NFT_MSG_NEWTABLE: 10695 10432 if (nft_trans_table_update(trans)) { ··· 10798 10535 case NFT_MSG_NEWSETELEM: 10799 10536 te = nft_trans_container_elem(trans); 10800 10537 10801 - if (te->update_flags) { 10802 - const struct nft_set_ext *ext = 10803 - nft_set_elem_ext(te->set, te->elem_priv); 10538 + nft_trans_elems_add(&ctx, te); 10804 10539 10805 - if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) { 10806 - WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, 10807 - te->timeout); 10808 - } 10809 - if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) { 10810 - WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, 10811 - get_jiffies_64() + te->expiration); 10812 - } 10813 - } else { 10814 - nft_setelem_activate(net, te->set, te->elem_priv); 10815 - } 10816 - 10817 - nf_tables_setelem_notify(&ctx, te->set, 10818 - te->elem_priv, 10819 - NFT_MSG_NEWSETELEM); 10820 10540 if (te->set->ops->commit && 10821 10541 list_empty(&te->set->pending_update)) { 10822 10542 list_add_tail(&te->set->pending_update, ··· 10811 10565 case NFT_MSG_DESTROYSETELEM: 10812 10566 te = nft_trans_container_elem(trans); 10813 10567 10814 - nf_tables_setelem_notify(&ctx, te->set, 10815 - te->elem_priv, 10816 - trans->msg_type); 10817 - nft_setelem_remove(net, te->set, te->elem_priv); 10818 - if (!nft_setelem_is_catchall(te->set, te->elem_priv)) { 10819 - atomic_dec(&te->set->nelems); 10820 - te->set->ndeact--; 10821 - } 10568 + nft_trans_elems_remove(&ctx, te); 10569 + 10822 10570 if (te->set->ops->commit && 10823 10571 list_empty(&te->set->pending_update)) { 10824 10572 list_add_tail(&te->set->pending_update, ··· 10932 10692 nft_set_destroy(&ctx, nft_trans_set(trans)); 10933 10693 break; 10934 10694 case NFT_MSG_NEWSETELEM: 10935 - nft_set_elem_destroy(nft_trans_elem_set(trans), 10936 - nft_trans_elem_priv(trans), true); 10695 + nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans)); 10937 10696 break; 10938 10697 case NFT_MSG_NEWOBJ: 10939 10698 nft_obj_destroy(&ctx, nft_trans_obj(trans)); ··· 11089 10850 nft_trans_destroy(trans); 11090 10851 break; 11091 10852 case NFT_MSG_NEWSETELEM: 11092 - if (nft_trans_elem_update_flags(trans) || 11093 - nft_trans_elem_set_bound(trans)) { 10853 + if (nft_trans_elem_set_bound(trans)) { 11094 10854 nft_trans_destroy(trans); 11095 10855 break; 11096 10856 } 11097 10857 te = nft_trans_container_elem(trans); 11098 - if (!te->set->ops->abort || 11099 - nft_setelem_is_catchall(te->set, te->elem_priv)) 11100 - nft_setelem_remove(net, te->set, te->elem_priv); 11101 - 11102 - if (!nft_setelem_is_catchall(te->set, te->elem_priv)) 11103 - atomic_dec(&te->set->nelems); 10858 + if (!nft_trans_elems_new_abort(&ctx, te)) { 10859 + nft_trans_destroy(trans); 10860 + break; 10861 + } 11104 10862 11105 10863 if (te->set->ops->abort && 11106 10864 list_empty(&te->set->pending_update)) { ··· 11109 10873 case NFT_MSG_DESTROYSETELEM: 11110 10874 te = nft_trans_container_elem(trans); 11111 10875 11112 - if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { 11113 - nft_setelem_data_activate(net, te->set, te->elem_priv); 11114 - nft_setelem_activate(net, te->set, te->elem_priv); 11115 - } 11116 - if (!nft_setelem_is_catchall(te->set, te->elem_priv)) 11117 - te->set->ndeact--; 10876 + nft_trans_elems_destroy_abort(&ctx, te); 11118 10877 11119 10878 if (te->set->ops->abort && 11120 10879 list_empty(&te->set->pending_update)) {
+1 -1
net/netfilter/nfnetlink.c
··· 517 517 err = nla_parse_deprecated(cda, 518 518 ss->cb[cb_id].attr_count, 519 519 attr, attrlen, 520 - ss->cb[cb_id].policy, NULL); 520 + ss->cb[cb_id].policy, &extack); 521 521 if (err < 0) 522 522 goto ack; 523 523
+139 -27
net/netfilter/nft_bitwise.c
··· 17 17 18 18 struct nft_bitwise { 19 19 u8 sreg; 20 + u8 sreg2; 20 21 u8 dreg; 21 22 enum nft_bitwise_ops op:8; 22 23 u8 len; ··· 26 25 struct nft_data data; 27 26 }; 28 27 29 - static void nft_bitwise_eval_bool(u32 *dst, const u32 *src, 30 - const struct nft_bitwise *priv) 28 + static void nft_bitwise_eval_mask_xor(u32 *dst, const u32 *src, 29 + const struct nft_bitwise *priv) 31 30 { 32 31 unsigned int i; 33 32 ··· 61 60 } 62 61 } 63 62 63 + static void nft_bitwise_eval_and(u32 *dst, const u32 *src, const u32 *src2, 64 + const struct nft_bitwise *priv) 65 + { 66 + unsigned int i, n; 67 + 68 + for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++) 69 + dst[i] = src[i] & src2[i]; 70 + } 71 + 72 + static void nft_bitwise_eval_or(u32 *dst, const u32 *src, const u32 *src2, 73 + const struct nft_bitwise *priv) 74 + { 75 + unsigned int i, n; 76 + 77 + for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++) 78 + dst[i] = src[i] | src2[i]; 79 + } 80 + 81 + static void nft_bitwise_eval_xor(u32 *dst, const u32 *src, const u32 *src2, 82 + const struct nft_bitwise *priv) 83 + { 84 + unsigned int i, n; 85 + 86 + for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++) 87 + dst[i] = src[i] ^ src2[i]; 88 + } 89 + 64 90 void nft_bitwise_eval(const struct nft_expr *expr, 65 91 struct nft_regs *regs, const struct nft_pktinfo *pkt) 66 92 { 67 93 const struct nft_bitwise *priv = nft_expr_priv(expr); 68 - const u32 *src = &regs->data[priv->sreg]; 94 + const u32 *src = &regs->data[priv->sreg], *src2; 69 95 u32 *dst = &regs->data[priv->dreg]; 70 96 71 - switch (priv->op) { 72 - case NFT_BITWISE_BOOL: 73 - nft_bitwise_eval_bool(dst, src, priv); 74 - break; 75 - case NFT_BITWISE_LSHIFT: 97 + if (priv->op == NFT_BITWISE_MASK_XOR) { 98 + nft_bitwise_eval_mask_xor(dst, src, priv); 99 + return; 100 + } 101 + if (priv->op == NFT_BITWISE_LSHIFT) { 76 102 nft_bitwise_eval_lshift(dst, src, priv); 77 - break; 78 - case NFT_BITWISE_RSHIFT: 103 + return; 104 + } 105 + if (priv->op == NFT_BITWISE_RSHIFT) { 79 106 nft_bitwise_eval_rshift(dst, src, priv); 80 - break; 107 + return; 108 + } 109 + 110 + src2 = priv->sreg2 ? &regs->data[priv->sreg2] : priv->data.data; 111 + 112 + if (priv->op == NFT_BITWISE_AND) { 113 + nft_bitwise_eval_and(dst, src, src2, priv); 114 + return; 115 + } 116 + if (priv->op == NFT_BITWISE_OR) { 117 + nft_bitwise_eval_or(dst, src, src2, priv); 118 + return; 119 + } 120 + if (priv->op == NFT_BITWISE_XOR) { 121 + nft_bitwise_eval_xor(dst, src, src2, priv); 122 + return; 81 123 } 82 124 } 83 125 84 126 static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { 85 127 [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, 128 + [NFTA_BITWISE_SREG2] = { .type = NLA_U32 }, 86 129 [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, 87 130 [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, 88 131 [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, ··· 135 90 [NFTA_BITWISE_DATA] = { .type = NLA_NESTED }, 136 91 }; 137 92 138 - static int nft_bitwise_init_bool(struct nft_bitwise *priv, 139 - const struct nlattr *const tb[]) 93 + static int nft_bitwise_init_mask_xor(struct nft_bitwise *priv, 94 + const struct nlattr *const tb[]) 140 95 { 141 96 struct nft_data_desc mask = { 142 97 .type = NFT_DATA_VALUE, ··· 150 105 }; 151 106 int err; 152 107 153 - if (tb[NFTA_BITWISE_DATA]) 108 + if (tb[NFTA_BITWISE_DATA] || 109 + tb[NFTA_BITWISE_SREG2]) 154 110 return -EINVAL; 155 111 156 112 if (!tb[NFTA_BITWISE_MASK] || ··· 185 139 int err; 186 140 187 141 if (tb[NFTA_BITWISE_MASK] || 188 - tb[NFTA_BITWISE_XOR]) 142 + tb[NFTA_BITWISE_XOR] || 143 + tb[NFTA_BITWISE_SREG2]) 189 144 return -EINVAL; 190 145 191 146 if (!tb[NFTA_BITWISE_DATA]) ··· 199 152 if (priv->data.data[0] >= BITS_PER_TYPE(u32)) { 200 153 nft_data_release(&priv->data, desc.type); 201 154 return -EINVAL; 155 + } 156 + 157 + return 0; 158 + } 159 + 160 + static int nft_bitwise_init_bool(const struct nft_ctx *ctx, 161 + struct nft_bitwise *priv, 162 + const struct nlattr *const tb[]) 163 + { 164 + int err; 165 + 166 + if (tb[NFTA_BITWISE_MASK] || 167 + tb[NFTA_BITWISE_XOR]) 168 + return -EINVAL; 169 + 170 + if ((!tb[NFTA_BITWISE_DATA] && !tb[NFTA_BITWISE_SREG2]) || 171 + (tb[NFTA_BITWISE_DATA] && tb[NFTA_BITWISE_SREG2])) 172 + return -EINVAL; 173 + 174 + if (tb[NFTA_BITWISE_DATA]) { 175 + struct nft_data_desc desc = { 176 + .type = NFT_DATA_VALUE, 177 + .size = sizeof(priv->data), 178 + .len = priv->len, 179 + }; 180 + 181 + err = nft_data_init(NULL, &priv->data, &desc, 182 + tb[NFTA_BITWISE_DATA]); 183 + if (err < 0) 184 + return err; 185 + } else { 186 + err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG2], 187 + &priv->sreg2, priv->len); 188 + if (err < 0) 189 + return err; 202 190 } 203 191 204 192 return 0; ··· 267 185 if (tb[NFTA_BITWISE_OP]) { 268 186 priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])); 269 187 switch (priv->op) { 270 - case NFT_BITWISE_BOOL: 188 + case NFT_BITWISE_MASK_XOR: 271 189 case NFT_BITWISE_LSHIFT: 272 190 case NFT_BITWISE_RSHIFT: 191 + case NFT_BITWISE_AND: 192 + case NFT_BITWISE_OR: 193 + case NFT_BITWISE_XOR: 273 194 break; 274 195 default: 275 196 return -EOPNOTSUPP; 276 197 } 277 198 } else { 278 - priv->op = NFT_BITWISE_BOOL; 199 + priv->op = NFT_BITWISE_MASK_XOR; 279 200 } 280 201 281 202 switch(priv->op) { 282 - case NFT_BITWISE_BOOL: 283 - err = nft_bitwise_init_bool(priv, tb); 203 + case NFT_BITWISE_MASK_XOR: 204 + err = nft_bitwise_init_mask_xor(priv, tb); 284 205 break; 285 206 case NFT_BITWISE_LSHIFT: 286 207 case NFT_BITWISE_RSHIFT: 287 208 err = nft_bitwise_init_shift(priv, tb); 209 + break; 210 + case NFT_BITWISE_AND: 211 + case NFT_BITWISE_OR: 212 + case NFT_BITWISE_XOR: 213 + err = nft_bitwise_init_bool(ctx, priv, tb); 288 214 break; 289 215 } 290 216 291 217 return err; 292 218 } 293 219 294 - static int nft_bitwise_dump_bool(struct sk_buff *skb, 295 - const struct nft_bitwise *priv) 220 + static int nft_bitwise_dump_mask_xor(struct sk_buff *skb, 221 + const struct nft_bitwise *priv) 296 222 { 297 223 if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask, 298 224 NFT_DATA_VALUE, priv->len) < 0) ··· 322 232 return 0; 323 233 } 324 234 235 + static int nft_bitwise_dump_bool(struct sk_buff *skb, 236 + const struct nft_bitwise *priv) 237 + { 238 + if (priv->sreg2) { 239 + if (nft_dump_register(skb, NFTA_BITWISE_SREG2, priv->sreg2)) 240 + return -1; 241 + } else { 242 + if (nft_data_dump(skb, NFTA_BITWISE_DATA, &priv->data, 243 + NFT_DATA_VALUE, sizeof(u32)) < 0) 244 + return -1; 245 + } 246 + 247 + return 0; 248 + } 249 + 325 250 static int nft_bitwise_dump(struct sk_buff *skb, 326 251 const struct nft_expr *expr, bool reset) 327 252 { ··· 353 248 return -1; 354 249 355 250 switch (priv->op) { 356 - case NFT_BITWISE_BOOL: 357 - err = nft_bitwise_dump_bool(skb, priv); 251 + case NFT_BITWISE_MASK_XOR: 252 + err = nft_bitwise_dump_mask_xor(skb, priv); 358 253 break; 359 254 case NFT_BITWISE_LSHIFT: 360 255 case NFT_BITWISE_RSHIFT: 361 256 err = nft_bitwise_dump_shift(skb, priv); 257 + break; 258 + case NFT_BITWISE_AND: 259 + case NFT_BITWISE_OR: 260 + case NFT_BITWISE_XOR: 261 + err = nft_bitwise_dump_bool(skb, priv); 362 262 break; 363 263 } 364 264 ··· 379 269 const struct nft_bitwise *priv = nft_expr_priv(expr); 380 270 struct nft_offload_reg *reg = &ctx->regs[priv->dreg]; 381 271 382 - if (priv->op != NFT_BITWISE_BOOL) 272 + if (priv->op != NFT_BITWISE_MASK_XOR) 383 273 return -EOPNOTSUPP; 384 274 385 275 if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) || ··· 409 299 track->regs[priv->dreg].bitwise && 410 300 track->regs[priv->dreg].bitwise->ops == expr->ops && 411 301 priv->sreg == bitwise->sreg && 302 + priv->sreg2 == bitwise->sreg2 && 412 303 priv->dreg == bitwise->dreg && 413 304 priv->op == bitwise->op && 414 305 priv->len == bitwise->len && ··· 486 375 if (err < 0) 487 376 return err; 488 377 489 - if (tb[NFTA_BITWISE_DATA]) 378 + if (tb[NFTA_BITWISE_DATA] || 379 + tb[NFTA_BITWISE_SREG2]) 490 380 return -EINVAL; 491 381 492 382 if (!tb[NFTA_BITWISE_MASK] || ··· 518 406 return -1; 519 407 if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32)))) 520 408 return -1; 521 - if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL))) 409 + if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_MASK_XOR))) 522 410 return -1; 523 411 524 412 data.data[0] = priv->mask; ··· 613 501 return &nft_bitwise_ops; 614 502 615 503 if (tb[NFTA_BITWISE_OP] && 616 - ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL) 504 + ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_MASK_XOR) 617 505 return &nft_bitwise_ops; 618 506 619 507 return &nft_bitwise_fast_ops;
+2 -2
net/netfilter/nft_flow_offload.c
··· 8 8 #include <linux/spinlock.h> 9 9 #include <linux/netfilter/nf_conntrack_common.h> 10 10 #include <linux/netfilter/nf_tables.h> 11 - #include <net/ip.h> /* for ipv4 options. */ 11 + #include <net/ip.h> 12 12 #include <net/inet_dscp.h> 13 13 #include <net/netfilter/nf_tables.h> 14 14 #include <net/netfilter/nf_tables_core.h> ··· 236 236 fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip; 237 237 fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; 238 238 fl.u.ip4.flowi4_iif = this_dst->dev->ifindex; 239 - fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK; 239 + fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb))); 240 240 fl.u.ip4.flowi4_mark = pkt->skb->mark; 241 241 fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC; 242 242 break;