Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_tables: prepare set element accounting for async updates

Use atomic operations for the element count to avoid races with async
updates.

To properly handle the transactional semantics during netlink updates,
deleted but not yet committed elements are accounted for seperately and
are treated as being already removed. This means for the duration of
a netlink transaction, the limit might be exceeded by the amount of
elements deleted. Set implementations must be prepared to handle this.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Patrick McHardy and committed by
Pablo Neira Ayuso
3dd0673a 4a8678ef

+18 -12
+3 -1
include/net/netfilter/nf_tables.h
··· 258 258 * @dtype: data type (verdict or numeric type defined by userspace) 259 259 * @size: maximum set size 260 260 * @nelems: number of elements 261 + * @ndeact: number of deactivated elements queued for removal 261 262 * @timeout: default timeout value in msecs 262 263 * @gc_int: garbage collection interval in msecs 263 264 * @policy: set parameterization (see enum nft_set_policies) ··· 276 275 u32 ktype; 277 276 u32 dtype; 278 277 u32 size; 279 - u32 nelems; 278 + atomic_t nelems; 279 + u32 ndeact; 280 280 u64 timeout; 281 281 u32 gc_int; 282 282 u16 policy;
+13 -10
net/netfilter/nf_tables_api.c
··· 3238 3238 u32 flags; 3239 3239 int err; 3240 3240 3241 - if (set->size && set->nelems == set->size) 3242 - return -ENFILE; 3243 - 3244 3241 err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, 3245 3242 nft_set_elem_policy); 3246 3243 if (err < 0) ··· 3388 3391 return -EBUSY; 3389 3392 3390 3393 nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { 3391 - err = nft_add_set_elem(&ctx, set, attr); 3392 - if (err < 0) 3393 - break; 3394 + if (set->size && 3395 + !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) 3396 + return -ENFILE; 3394 3397 3395 - set->nelems++; 3398 + err = nft_add_set_elem(&ctx, set, attr); 3399 + if (err < 0) { 3400 + atomic_dec(&set->nelems); 3401 + break; 3402 + } 3396 3403 } 3397 3404 return err; 3398 3405 } ··· 3478 3477 if (err < 0) 3479 3478 break; 3480 3479 3481 - set->nelems--; 3480 + set->ndeact++; 3482 3481 } 3483 3482 return err; 3484 3483 } ··· 3811 3810 &te->elem, 3812 3811 NFT_MSG_DELSETELEM, 0); 3813 3812 te->set->ops->remove(te->set, &te->elem); 3813 + atomic_dec(&te->set->nelems); 3814 + te->set->ndeact--; 3814 3815 break; 3815 3816 } 3816 3817 } ··· 3916 3913 nft_trans_destroy(trans); 3917 3914 break; 3918 3915 case NFT_MSG_NEWSETELEM: 3919 - nft_trans_elem_set(trans)->nelems--; 3920 3916 te = (struct nft_trans_elem *)trans->data; 3921 3917 3922 3918 te->set->ops->remove(te->set, &te->elem); 3919 + atomic_dec(&te->set->nelems); 3923 3920 break; 3924 3921 case NFT_MSG_DELSETELEM: 3925 3922 te = (struct nft_trans_elem *)trans->data; 3926 3923 3927 - nft_trans_elem_set(trans)->nelems++; 3928 3924 te->set->ops->activate(te->set, &te->elem); 3925 + te->set->ndeact--; 3929 3926 3930 3927 nft_trans_destroy(trans); 3931 3928 break;
+2 -1
net/netfilter/nft_hash.c
··· 203 203 204 204 static void nft_hash_gc(struct work_struct *work) 205 205 { 206 - const struct nft_set *set; 206 + struct nft_set *set; 207 207 struct nft_hash_elem *he; 208 208 struct nft_hash *priv; 209 209 struct nft_set_gc_batch *gcb = NULL; ··· 237 237 if (gcb == NULL) 238 238 goto out; 239 239 rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); 240 + atomic_dec(&set->nelems); 240 241 nft_set_gc_batch_add(gcb, he); 241 242 } 242 243 out: