Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_tables: revisit chain/object refcounting from elements

Andreas reports that the following incremental update using our commit
protocol doesn't work.

# nft -f incremental-update.nft
delete element ip filter client_to_any { 10.180.86.22 : goto CIn_1 }
delete chain ip filter CIn_1
... Error: Could not process rule: Device or resource busy

The existing code is not well-integrated into the commit phase protocol,
since element deletions do not result in refcount decrement from the
preparation phase. This results in bogus EBUSY errors like the one
above.

Two new functions come with this patch:

* nft_set_elem_activate() function is used from the abort path, to
restore the set element refcounting on objects that occurred from
the preparation phase.

* nft_set_elem_deactivate() that is called from nft_del_setelem() to
decrement set element refcounting on objects from the preparation
phase in the commit protocol.

The nft_data_uninit() has been renamed to nft_data_release() since this
function does not uninitialize any data store in the data register,
instead just releases the references to objects. Moreover, a new
function nft_data_hold() has been introduced to be used from
nft_set_elem_activate().

Reported-by: Andreas Schultz <aschultz@tpip.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

+81 -18
+1 -1
include/net/netfilter/nf_tables.h
··· 176 176 int nft_data_init(const struct nft_ctx *ctx, 177 177 struct nft_data *data, unsigned int size, 178 178 struct nft_data_desc *desc, const struct nlattr *nla); 179 - void nft_data_uninit(const struct nft_data *data, enum nft_data_types type); 179 + void nft_data_release(const struct nft_data *data, enum nft_data_types type); 180 180 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, 181 181 enum nft_data_types type, unsigned int len); 182 182
+72 -10
net/netfilter/nf_tables_api.c
··· 3627 3627 { 3628 3628 struct nft_set_ext *ext = nft_set_elem_ext(set, elem); 3629 3629 3630 - nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); 3630 + nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE); 3631 3631 if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) 3632 - nft_data_uninit(nft_set_ext_data(ext), set->dtype); 3632 + nft_data_release(nft_set_ext_data(ext), set->dtype); 3633 3633 if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) 3634 3634 nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); 3635 3635 if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) ··· 3637 3637 kfree(elem); 3638 3638 } 3639 3639 EXPORT_SYMBOL_GPL(nft_set_elem_destroy); 3640 + 3641 + /* Only called from commit path, nft_set_elem_deactivate() already deals with 3642 + * the refcounting from the preparation phase. 3643 + */ 3644 + static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) 3645 + { 3646 + struct nft_set_ext *ext = nft_set_elem_ext(set, elem); 3647 + 3648 + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) 3649 + nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); 3650 + kfree(elem); 3651 + } 3640 3652 3641 3653 static int nft_setelem_parse_flags(const struct nft_set *set, 3642 3654 const struct nlattr *attr, u32 *flags) ··· 3861 3849 kfree(elem.priv); 3862 3850 err3: 3863 3851 if (nla[NFTA_SET_ELEM_DATA] != NULL) 3864 - nft_data_uninit(&data, d2.type); 3852 + nft_data_release(&data, d2.type); 3865 3853 err2: 3866 - nft_data_uninit(&elem.key.val, d1.type); 3854 + nft_data_release(&elem.key.val, d1.type); 3867 3855 err1: 3868 3856 return err; 3869 3857 } ··· 3906 3894 break; 3907 3895 } 3908 3896 return err; 3897 + } 3898 + 3899 + /** 3900 + * nft_data_hold - hold a nft_data item 3901 + * 3902 + * @data: struct nft_data to release 3903 + * @type: type of data 3904 + * 3905 + * Hold a nft_data item. NFT_DATA_VALUE types can be silently discarded, 3906 + * NFT_DATA_VERDICT bumps the reference to chains in case of NFT_JUMP and 3907 + * NFT_GOTO verdicts. This function must be called on active data objects 3908 + * from the second phase of the commit protocol. 3909 + */ 3910 + static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) 3911 + { 3912 + if (type == NFT_DATA_VERDICT) { 3913 + switch (data->verdict.code) { 3914 + case NFT_JUMP: 3915 + case NFT_GOTO: 3916 + data->verdict.chain->use++; 3917 + break; 3918 + } 3919 + } 3920 + } 3921 + 3922 + static void nft_set_elem_activate(const struct net *net, 3923 + const struct nft_set *set, 3924 + struct nft_set_elem *elem) 3925 + { 3926 + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); 3927 + 3928 + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) 3929 + nft_data_hold(nft_set_ext_data(ext), set->dtype); 3930 + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) 3931 + (*nft_set_ext_obj(ext))->use++; 3932 + } 3933 + 3934 + static void nft_set_elem_deactivate(const struct net *net, 3935 + const struct nft_set *set, 3936 + struct nft_set_elem *elem) 3937 + { 3938 + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv); 3939 + 3940 + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) 3941 + nft_data_release(nft_set_ext_data(ext), set->dtype); 3942 + if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF)) 3943 + (*nft_set_ext_obj(ext))->use--; 3909 3944 } 3910 3945 3911 3946 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, ··· 4020 3961 kfree(elem.priv); 4021 3962 elem.priv = priv; 4022 3963 3964 + nft_set_elem_deactivate(ctx->net, set, &elem); 3965 + 4023 3966 nft_trans_elem(trans) = elem; 4024 3967 list_add_tail(&trans->list, &ctx->net->nft.commit_list); 4025 3968 return 0; ··· 4031 3970 err3: 4032 3971 kfree(elem.priv); 4033 3972 err2: 4034 - nft_data_uninit(&elem.key.val, desc.type); 3973 + nft_data_release(&elem.key.val, desc.type); 4035 3974 err1: 4036 3975 return err; 4037 3976 } ··· 4838 4777 nft_set_destroy(nft_trans_set(trans)); 4839 4778 break; 4840 4779 case NFT_MSG_DELSETELEM: 4841 - nft_set_elem_destroy(nft_trans_elem_set(trans), 4842 - nft_trans_elem(trans).priv, true); 4780 + nf_tables_set_elem_destroy(nft_trans_elem_set(trans), 4781 + nft_trans_elem(trans).priv); 4843 4782 break; 4844 4783 case NFT_MSG_DELOBJ: 4845 4784 nft_obj_destroy(nft_trans_obj(trans)); ··· 5074 5013 case NFT_MSG_DELSETELEM: 5075 5014 te = (struct nft_trans_elem *)trans->data; 5076 5015 5016 + nft_set_elem_activate(net, te->set, &te->elem); 5077 5017 te->set->ops->activate(net, te->set, &te->elem); 5078 5018 te->set->ndeact--; 5079 5019 ··· 5560 5498 EXPORT_SYMBOL_GPL(nft_data_init); 5561 5499 5562 5500 /** 5563 - * nft_data_uninit - release a nft_data item 5501 + * nft_data_release - release a nft_data item 5564 5502 * 5565 5503 * @data: struct nft_data to release 5566 5504 * @type: type of data ··· 5568 5506 * Release a nft_data item. NFT_DATA_VALUE types can be silently discarded, 5569 5507 * all others need to be released by calling this function. 5570 5508 */ 5571 - void nft_data_uninit(const struct nft_data *data, enum nft_data_types type) 5509 + void nft_data_release(const struct nft_data *data, enum nft_data_types type) 5572 5510 { 5573 5511 if (type < NFT_DATA_VERDICT) 5574 5512 return; ··· 5579 5517 WARN_ON(1); 5580 5518 } 5581 5519 } 5582 - EXPORT_SYMBOL_GPL(nft_data_uninit); 5520 + EXPORT_SYMBOL_GPL(nft_data_release); 5583 5521 5584 5522 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, 5585 5523 enum nft_data_types type, unsigned int len)
+2 -2
net/netfilter/nft_bitwise.c
··· 99 99 100 100 return 0; 101 101 err2: 102 - nft_data_uninit(&priv->xor, d2.type); 102 + nft_data_release(&priv->xor, d2.type); 103 103 err1: 104 - nft_data_uninit(&priv->mask, d1.type); 104 + nft_data_release(&priv->mask, d1.type); 105 105 return err; 106 106 } 107 107
+1 -1
net/netfilter/nft_cmp.c
··· 211 211 212 212 return &nft_cmp_ops; 213 213 err1: 214 - nft_data_uninit(&data, desc.type); 214 + nft_data_release(&data, desc.type); 215 215 return ERR_PTR(-EINVAL); 216 216 } 217 217
+3 -2
net/netfilter/nft_immediate.c
··· 65 65 return 0; 66 66 67 67 err1: 68 - nft_data_uninit(&priv->data, desc.type); 68 + nft_data_release(&priv->data, desc.type); 69 69 return err; 70 70 } 71 71 ··· 73 73 const struct nft_expr *expr) 74 74 { 75 75 const struct nft_immediate_expr *priv = nft_expr_priv(expr); 76 - return nft_data_uninit(&priv->data, nft_dreg_to_type(priv->dreg)); 76 + 77 + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); 77 78 } 78 79 79 80 static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr)
+2 -2
net/netfilter/nft_range.c
··· 102 102 priv->len = desc_from.len; 103 103 return 0; 104 104 err2: 105 - nft_data_uninit(&priv->data_to, desc_to.type); 105 + nft_data_release(&priv->data_to, desc_to.type); 106 106 err1: 107 - nft_data_uninit(&priv->data_from, desc_from.type); 107 + nft_data_release(&priv->data_from, desc_from.type); 108 108 return err; 109 109 } 110 110