Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_tables: make destruction work queue pernet

The call to flush_work before tearing down a table from the netlink
notifier was supposed to make sure that all earlier updates (e.g. rule
add) that might reference that table have been processed.

Unfortunately, flush_work() waits for the last queued instance.
This could be an instance that is different from the one that we must
wait for.

This is because transactions are protected with a pernet mutex, but the
work item is global, so holding the transaction mutex doesn't prevent
another netns from queueing more work.

Make the work item pernet so that flush_work() will wait for all
transactions queued from this netns.

A welcome side effect is that we no longer need to wait for transaction
objects from foreign netns.

The gc work queue is still global. This seems to be ok because nft_set
structures are reference counted and each container structure owns a
reference on the net namespace.

The destroy_list is still protected by a global spinlock rather than
pernet one but the hold time is very short anyway.

v2: call cancel_work_sync before reaping the remaining tables (Pablo).

Fixes: 9f6958ba2e90 ("netfilter: nf_tables: unconditionally flush pending work before notifier")
Reported-by: syzbot+5d8c5789c8cb076b2c25@syzkaller.appspotmail.com
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Florian Westphal and committed by
Pablo Neira Ayuso
fb828656 df08c94b

+21 -15
+3 -1
include/net/netfilter/nf_tables.h
··· 1891 1891 void __init nft_chain_route_init(void); 1892 1892 void nft_chain_route_fini(void); 1893 1893 1894 - void nf_tables_trans_destroy_flush_work(void); 1894 + void nf_tables_trans_destroy_flush_work(struct net *net); 1895 1895 1896 1896 int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); 1897 1897 __be64 nf_jiffies64_to_msecs(u64 input); ··· 1905 1905 struct nftables_pernet { 1906 1906 struct list_head tables; 1907 1907 struct list_head commit_list; 1908 + struct list_head destroy_list; 1908 1909 struct list_head commit_set_list; 1909 1910 struct list_head binding_list; 1910 1911 struct list_head module_list; ··· 1916 1915 unsigned int base_seq; 1917 1916 unsigned int gc_seq; 1918 1917 u8 validate_state; 1918 + struct work_struct destroy_work; 1919 1919 }; 1920 1920 1921 1921 extern unsigned int nf_tables_net_id;
+14 -10
net/netfilter/nf_tables_api.c
··· 34 34 static LIST_HEAD(nf_tables_expressions); 35 35 static LIST_HEAD(nf_tables_objects); 36 36 static LIST_HEAD(nf_tables_flowtables); 37 - static LIST_HEAD(nf_tables_destroy_list); 38 37 static LIST_HEAD(nf_tables_gc_list); 39 38 static DEFINE_SPINLOCK(nf_tables_destroy_list_lock); 40 39 static DEFINE_SPINLOCK(nf_tables_gc_list_lock); ··· 124 125 table->validate_state = new_validate_state; 125 126 } 126 127 static void nf_tables_trans_destroy_work(struct work_struct *w); 127 - static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work); 128 128 129 129 static void nft_trans_gc_work(struct work_struct *work); 130 130 static DECLARE_WORK(trans_gc_work, nft_trans_gc_work); ··· 10004 10006 10005 10007 static void nf_tables_trans_destroy_work(struct work_struct *w) 10006 10008 { 10009 + struct nftables_pernet *nft_net = container_of(w, struct nftables_pernet, destroy_work); 10007 10010 struct nft_trans *trans, *next; 10008 10011 LIST_HEAD(head); 10009 10012 10010 10013 spin_lock(&nf_tables_destroy_list_lock); 10011 - list_splice_init(&nf_tables_destroy_list, &head); 10014 + list_splice_init(&nft_net->destroy_list, &head); 10012 10015 spin_unlock(&nf_tables_destroy_list_lock); 10013 10016 10014 10017 if (list_empty(&head)) ··· 10023 10024 } 10024 10025 } 10025 10026 10026 - void nf_tables_trans_destroy_flush_work(void) 10027 + void nf_tables_trans_destroy_flush_work(struct net *net) 10027 10028 { 10028 - flush_work(&trans_destroy_work); 10029 + struct nftables_pernet *nft_net = nft_pernet(net); 10030 + 10031 + flush_work(&nft_net->destroy_work); 10029 10032 } 10030 10033 EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); 10031 10034 ··· 10485 10484 10486 10485 trans->put_net = true; 10487 10486 spin_lock(&nf_tables_destroy_list_lock); 10488 - list_splice_tail_init(&nft_net->commit_list, &nf_tables_destroy_list); 10487 + list_splice_tail_init(&nft_net->commit_list, &nft_net->destroy_list); 10489 10488 spin_unlock(&nf_tables_destroy_list_lock); 10490 10489 10491 10490 nf_tables_module_autoload_cleanup(net); 10492 - schedule_work(&trans_destroy_work); 10491 + schedule_work(&nft_net->destroy_work); 10493 10492 10494 10493 mutex_unlock(&nft_net->commit_mutex); 10495 10494 } ··· 11854 11853 11855 11854 gc_seq = nft_gc_seq_begin(nft_net); 11856 11855 11857 - nf_tables_trans_destroy_flush_work(); 11856 + nf_tables_trans_destroy_flush_work(net); 11858 11857 again: 11859 11858 list_for_each_entry(table, &nft_net->tables, list) { 11860 11859 if (nft_table_has_owner(table) && ··· 11896 11895 11897 11896 INIT_LIST_HEAD(&nft_net->tables); 11898 11897 INIT_LIST_HEAD(&nft_net->commit_list); 11898 + INIT_LIST_HEAD(&nft_net->destroy_list); 11899 11899 INIT_LIST_HEAD(&nft_net->commit_set_list); 11900 11900 INIT_LIST_HEAD(&nft_net->binding_list); 11901 11901 INIT_LIST_HEAD(&nft_net->module_list); ··· 11905 11903 nft_net->base_seq = 1; 11906 11904 nft_net->gc_seq = 0; 11907 11905 nft_net->validate_state = NFT_VALIDATE_SKIP; 11906 + INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work); 11908 11907 11909 11908 return 0; 11910 11909 } ··· 11934 11931 if (!list_empty(&nft_net->module_list)) 11935 11932 nf_tables_module_autoload_cleanup(net); 11936 11933 11934 + cancel_work_sync(&nft_net->destroy_work); 11937 11935 __nft_release_tables(net); 11938 11936 11939 11937 nft_gc_seq_end(nft_net, gc_seq); 11940 11938 11941 11939 mutex_unlock(&nft_net->commit_mutex); 11940 + 11942 11941 WARN_ON_ONCE(!list_empty(&nft_net->tables)); 11943 11942 WARN_ON_ONCE(!list_empty(&nft_net->module_list)); 11944 11943 WARN_ON_ONCE(!list_empty(&nft_net->notify_list)); 11944 + WARN_ON_ONCE(!list_empty(&nft_net->destroy_list)); 11945 11945 } 11946 11946 11947 11947 static void nf_tables_exit_batch(struct list_head *net_exit_list) ··· 12035 12029 unregister_netdevice_notifier(&nf_tables_flowtable_notifier); 12036 12030 nft_chain_filter_fini(); 12037 12031 nft_chain_route_fini(); 12038 - nf_tables_trans_destroy_flush_work(); 12039 12032 unregister_pernet_subsys(&nf_tables_net_ops); 12040 12033 cancel_work_sync(&trans_gc_work); 12041 - cancel_work_sync(&trans_destroy_work); 12042 12034 rcu_barrier(); 12043 12035 rhltable_destroy(&nft_objname_ht); 12044 12036 nf_tables_core_module_exit();
+4 -4
net/netfilter/nft_compat.c
··· 228 228 return 0; 229 229 } 230 230 231 - static void nft_compat_wait_for_destructors(void) 231 + static void nft_compat_wait_for_destructors(struct net *net) 232 232 { 233 233 /* xtables matches or targets can have side effects, e.g. 234 234 * creation/destruction of /proc files. ··· 236 236 * work queue. If we have pending invocations we thus 237 237 * need to wait for those to finish. 238 238 */ 239 - nf_tables_trans_destroy_flush_work(); 239 + nf_tables_trans_destroy_flush_work(net); 240 240 } 241 241 242 242 static int ··· 262 262 263 263 nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); 264 264 265 - nft_compat_wait_for_destructors(); 265 + nft_compat_wait_for_destructors(ctx->net); 266 266 267 267 ret = xt_check_target(&par, size, proto, inv); 268 268 if (ret < 0) { ··· 515 515 516 516 nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); 517 517 518 - nft_compat_wait_for_destructors(); 518 + nft_compat_wait_for_destructors(ctx->net); 519 519 520 520 return xt_check_match(&par, size, proto, inv); 521 521 }