Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_tables: restore set elements when delete set fails

From abort path, nft_mapelem_activate() needs to restore refcounters to
the original state. Currently, it uses the set->ops->walk() to iterate
over these set elements. The existing set iterator skips inactive
elements in the next generation, this does not work from the abort path
to restore the original state since it has to skip active elements
instead (not inactive ones).

This patch moves the check for inactive elements to the set iterator
callback, then it reverses the logic for the .activate case which
needs to skip active elements.

Toggle next generation bit for elements when delete set command is
invoked and call nft_clear() from .activate (abort) path to restore the
next generation bit.

The splat below shows an object in mappings memleak:

[43929.457523] ------------[ cut here ]------------
[43929.457532] WARNING: CPU: 0 PID: 1139 at include/net/netfilter/nf_tables.h:1237 nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables]
[...]
[43929.458014] RIP: 0010:nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables]
[43929.458076] Code: 83 f8 01 77 ab 49 8d 7c 24 08 e8 37 5e d0 de 49 8b 6c 24 08 48 8d 7d 50 e8 e9 5c d0 de 8b 45 50 8d 50 ff 89 55 50 85 c0 75 86 <0f> 0b eb 82 0f 0b eb b3 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90
[43929.458081] RSP: 0018:ffff888140f9f4b0 EFLAGS: 00010246
[43929.458086] RAX: 0000000000000000 RBX: ffff8881434f5288 RCX: dffffc0000000000
[43929.458090] RDX: 00000000ffffffff RSI: ffffffffa26d28a7 RDI: ffff88810ecc9550
[43929.458093] RBP: ffff88810ecc9500 R08: 0000000000000001 R09: ffffed10281f3e8f
[43929.458096] R10: 0000000000000003 R11: ffff0000ffff0000 R12: ffff8881434f52a0
[43929.458100] R13: ffff888140f9f5f4 R14: ffff888151c7a800 R15: 0000000000000002
[43929.458103] FS: 00007f0c687c4740(0000) GS:ffff888390800000(0000) knlGS:0000000000000000
[43929.458107] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[43929.458111] CR2: 00007f58dbe5b008 CR3: 0000000123602005 CR4: 00000000001706f0
[43929.458114] Call Trace:
[43929.458118] <TASK>
[43929.458121] ? __warn+0x9f/0x1a0
[43929.458127] ? nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables]
[43929.458188] ? report_bug+0x1b1/0x1e0
[43929.458196] ? handle_bug+0x3c/0x70
[43929.458200] ? exc_invalid_op+0x17/0x40
[43929.458211] ? nft_setelem_data_deactivate+0xd7/0xf0 [nf_tables]
[43929.458271] ? nft_setelem_data_deactivate+0xe4/0xf0 [nf_tables]
[43929.458332] nft_mapelem_deactivate+0x24/0x30 [nf_tables]
[43929.458392] nft_rhash_walk+0xdd/0x180 [nf_tables]
[43929.458453] ? __pfx_nft_rhash_walk+0x10/0x10 [nf_tables]
[43929.458512] ? rb_insert_color+0x2e/0x280
[43929.458520] nft_map_deactivate+0xdc/0x1e0 [nf_tables]
[43929.458582] ? __pfx_nft_map_deactivate+0x10/0x10 [nf_tables]
[43929.458642] ? __pfx_nft_mapelem_deactivate+0x10/0x10 [nf_tables]
[43929.458701] ? __rcu_read_unlock+0x46/0x70
[43929.458709] nft_delset+0xff/0x110 [nf_tables]
[43929.458769] nft_flush_table+0x16f/0x460 [nf_tables]
[43929.458830] nf_tables_deltable+0x501/0x580 [nf_tables]

Fixes: 628bd3e49cba ("netfilter: nf_tables: drop map element references from preparation phase")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

+45 -20
+40 -4
net/netfilter/nf_tables_api.c
··· 594 594 const struct nft_set_iter *iter, 595 595 struct nft_elem_priv *elem_priv) 596 596 { 597 + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 598 + 599 + if (!nft_set_elem_active(ext, iter->genmask)) 600 + return 0; 601 + 602 + nft_set_elem_change_active(ctx->net, set, ext); 597 603 nft_setelem_data_deactivate(ctx->net, set, elem_priv); 598 604 599 605 return 0; ··· 623 617 if (!nft_set_elem_active(ext, genmask)) 624 618 continue; 625 619 620 + nft_set_elem_change_active(ctx->net, set, ext); 626 621 nft_setelem_data_deactivate(ctx->net, set, catchall->elem); 627 622 break; 628 623 } ··· 3887 3880 const struct nft_data *data; 3888 3881 int err; 3889 3882 3883 + if (!nft_set_elem_active(ext, iter->genmask)) 3884 + return 0; 3885 + 3890 3886 if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && 3891 3887 *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END) 3892 3888 return 0; ··· 3913 3903 3914 3904 int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set) 3915 3905 { 3916 - u8 genmask = nft_genmask_next(ctx->net); 3906 + struct nft_set_iter dummy_iter = { 3907 + .genmask = nft_genmask_next(ctx->net), 3908 + }; 3917 3909 struct nft_set_elem_catchall *catchall; 3910 + 3918 3911 struct nft_set_ext *ext; 3919 3912 int ret = 0; 3920 3913 3921 3914 list_for_each_entry_rcu(catchall, &set->catchall_list, list) { 3922 3915 ext = nft_set_elem_ext(set, catchall->elem); 3923 - if (!nft_set_elem_active(ext, genmask)) 3916 + if (!nft_set_elem_active(ext, dummy_iter.genmask)) 3924 3917 continue; 3925 3918 3926 - ret = nft_setelem_validate(ctx, set, NULL, catchall->elem); 3919 + ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem); 3927 3920 if (ret < 0) 3928 3921 return ret; 3929 3922 } ··· 5415 5402 const struct nft_set_iter *iter, 5416 5403 struct nft_elem_priv *elem_priv) 5417 5404 { 5405 + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 5406 + 5407 + if (!nft_set_elem_active(ext, iter->genmask)) 5408 + return 0; 5409 + 5418 5410 return nft_setelem_data_validate(ctx, set, elem_priv); 5419 5411 } 5420 5412 ··· 5512 5494 const struct nft_set_iter *iter, 5513 5495 struct nft_elem_priv *elem_priv) 5514 5496 { 5497 + struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 5498 + 5499 + /* called from abort path, reverse check to undo changes. */ 5500 + if (nft_set_elem_active(ext, iter->genmask)) 5501 + return 0; 5502 + 5503 + nft_clear(ctx->net, ext); 5515 5504 nft_setelem_data_activate(ctx->net, set, elem_priv); 5516 5505 5517 5506 return 0; ··· 5536 5511 if (!nft_set_elem_active(ext, genmask)) 5537 5512 continue; 5538 5513 5514 + nft_clear(ctx->net, ext); 5539 5515 nft_setelem_data_activate(ctx->net, set, catchall->elem); 5540 5516 break; 5541 5517 } ··· 5810 5784 { 5811 5785 const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 5812 5786 struct nft_set_dump_args *args; 5787 + 5788 + if (!nft_set_elem_active(ext, iter->genmask)) 5789 + return 0; 5813 5790 5814 5791 if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext)) 5815 5792 return 0; ··· 6664 6635 struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 6665 6636 6666 6637 if (nft_setelem_is_catchall(set, elem_priv)) { 6667 - nft_set_elem_change_active(net, set, ext); 6638 + nft_clear(net, ext); 6668 6639 } else { 6669 6640 set->ops->activate(net, set, elem_priv); 6670 6641 } ··· 7346 7317 const struct nft_set_iter *iter, 7347 7318 struct nft_elem_priv *elem_priv) 7348 7319 { 7320 + const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 7349 7321 struct nft_trans *trans; 7322 + 7323 + if (!nft_set_elem_active(ext, iter->genmask)) 7324 + return 0; 7350 7325 7351 7326 trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM, 7352 7327 sizeof(struct nft_trans_elem), GFP_ATOMIC); ··· 10832 10799 struct nft_elem_priv *elem_priv) 10833 10800 { 10834 10801 const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv); 10802 + 10803 + if (!nft_set_elem_active(ext, iter->genmask)) 10804 + return 0; 10835 10805 10836 10806 if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && 10837 10807 *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
+1 -3
net/netfilter/nft_set_bitmap.c
··· 172 172 nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off); 173 173 /* Enter 11 state. */ 174 174 priv->bitmap[idx] |= (genmask << off); 175 - nft_set_elem_change_active(net, set, &be->ext); 175 + nft_clear(net, &be->ext); 176 176 } 177 177 178 178 static void nft_bitmap_flush(const struct net *net, ··· 221 221 222 222 list_for_each_entry_rcu(be, &priv->list, head) { 223 223 if (iter->count < iter->skip) 224 - goto cont; 225 - if (!nft_set_elem_active(&be->ext, iter->genmask)) 226 224 goto cont; 227 225 228 226 iter->err = iter->fn(ctx, set, iter, &be->priv);
+2 -6
net/netfilter/nft_set_hash.c
··· 199 199 { 200 200 struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv); 201 201 202 - nft_set_elem_change_active(net, set, &he->ext); 202 + nft_clear(net, &he->ext); 203 203 } 204 204 205 205 static void nft_rhash_flush(const struct net *net, ··· 285 285 } 286 286 287 287 if (iter->count < iter->skip) 288 - goto cont; 289 - if (!nft_set_elem_active(&he->ext, iter->genmask)) 290 288 goto cont; 291 289 292 290 iter->err = iter->fn(ctx, set, iter, &he->priv); ··· 597 599 { 598 600 struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv); 599 601 600 - nft_set_elem_change_active(net, set, &he->ext); 602 + nft_clear(net, &he->ext); 601 603 } 602 604 603 605 static void nft_hash_flush(const struct net *net, ··· 649 651 for (i = 0; i < priv->buckets; i++) { 650 652 hlist_for_each_entry_rcu(he, &priv->table[i], node) { 651 653 if (iter->count < iter->skip) 652 - goto cont; 653 - if (!nft_set_elem_active(&he->ext, iter->genmask)) 654 654 goto cont; 655 655 656 656 iter->err = iter->fn(ctx, set, iter, &he->priv);
+1 -4
net/netfilter/nft_set_pipapo.c
··· 1847 1847 { 1848 1848 struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv); 1849 1849 1850 - nft_set_elem_change_active(net, set, &e->ext); 1850 + nft_clear(net, &e->ext); 1851 1851 } 1852 1852 1853 1853 /** ··· 2148 2148 goto cont; 2149 2149 2150 2150 e = f->mt[r].e; 2151 - 2152 - if (!nft_set_elem_active(&e->ext, iter->genmask)) 2153 - goto cont; 2154 2151 2155 2152 iter->err = iter->fn(ctx, set, iter, &e->priv); 2156 2153 if (iter->err < 0)
+1 -3
net/netfilter/nft_set_rbtree.c
··· 532 532 { 533 533 struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); 534 534 535 - nft_set_elem_change_active(net, set, &rbe->ext); 535 + nft_clear(net, &rbe->ext); 536 536 } 537 537 538 538 static void nft_rbtree_flush(const struct net *net, ··· 599 599 rbe = rb_entry(node, struct nft_rbtree_elem, node); 600 600 601 601 if (iter->count < iter->skip) 602 - goto cont; 603 - if (!nft_set_elem_active(&rbe->ext, iter->genmask)) 604 602 goto cont; 605 603 606 604 iter->err = iter->fn(ctx, set, iter, &rbe->priv);