Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: nf_tables: fix suspicious RCU usage in nft_chain_stats_replace()

basechain->stats is rcu protected data which is updated from
nft_chain_stats_replace(). This function is executed from the commit
phase which holds the pernet nf_tables commit mutex - not the global
nfnetlink subsystem mutex.

Test commands to reproduce the problem are:
%iptables-nft -I INPUT
%iptables-nft -Z
%iptables-nft -Z

This patch uses RCU calls to handle basechain->stats updates to fix a
splat that looks like:

[89279.358755] =============================
[89279.363656] WARNING: suspicious RCU usage
[89279.368458] 4.20.0-rc2+ #44 Tainted: G W L
[89279.374661] -----------------------------
[89279.379542] net/netfilter/nf_tables_api.c:1404 suspicious rcu_dereference_protected() usage!
[...]
[89279.406556] 1 lock held by iptables-nft/5225:
[89279.411728] #0: 00000000bf45a000 (&net->nft.commit_mutex){+.+.}, at: nf_tables_valid_genid+0x1f/0x70 [nf_tables]
[89279.424022] stack backtrace:
[89279.429236] CPU: 0 PID: 5225 Comm: iptables-nft Tainted: G W L 4.20.0-rc2+ #44
[89279.430135] Call Trace:
[89279.430135] dump_stack+0xc9/0x16b
[89279.430135] ? show_regs_print_info+0x5/0x5
[89279.430135] ? lockdep_rcu_suspicious+0x117/0x160
[89279.430135] nft_chain_commit_update+0x4ea/0x640 [nf_tables]
[89279.430135] ? sched_clock_local+0xd4/0x140
[89279.430135] ? check_flags.part.35+0x440/0x440
[89279.430135] ? __rhashtable_remove_fast.constprop.67+0xec0/0xec0 [nf_tables]
[89279.430135] ? sched_clock_cpu+0x126/0x170
[89279.430135] ? find_held_lock+0x39/0x1c0
[89279.430135] ? hlock_class+0x140/0x140
[89279.430135] ? is_bpf_text_address+0x5/0xf0
[89279.430135] ? check_flags.part.35+0x440/0x440
[89279.430135] ? __lock_is_held+0xb4/0x140
[89279.430135] nf_tables_commit+0x2555/0x39c0 [nf_tables]

Fixes: f102d66b335a4 ("netfilter: nf_tables: use dedicated mutex to guard transactions")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Taehee Yoo and committed by
Pablo Neira Ayuso
4c05ec47 986103e7

+14 -21
-12
include/linux/netfilter/nfnetlink.h
··· 62 62 } 63 63 #endif /* CONFIG_PROVE_LOCKING */ 64 64 65 - /* 66 - * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex 67 - * 68 - * @p: The pointer to read, prior to dereferencing 69 - * @ss: The nfnetlink subsystem ID 70 - * 71 - * Return the value of the specified RCU-protected pointer, but omit 72 - * the READ_ONCE(), because caller holds the NFNL subsystem mutex. 73 - */ 74 - #define nfnl_dereference(p, ss) \ 75 - rcu_dereference_protected(p, lockdep_nfnl_is_held(ss)) 76 - 77 65 #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ 78 66 MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) 79 67
+13 -8
net/netfilter/nf_tables_api.c
··· 1216 1216 if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) 1217 1217 goto nla_put_failure; 1218 1218 1219 - if (basechain->stats && nft_dump_stats(skb, basechain->stats)) 1219 + if (rcu_access_pointer(basechain->stats) && 1220 + nft_dump_stats(skb, rcu_dereference(basechain->stats))) 1220 1221 goto nla_put_failure; 1221 1222 } 1222 1223 ··· 1393 1392 return newstats; 1394 1393 } 1395 1394 1396 - static void nft_chain_stats_replace(struct nft_base_chain *chain, 1395 + static void nft_chain_stats_replace(struct net *net, 1396 + struct nft_base_chain *chain, 1397 1397 struct nft_stats __percpu *newstats) 1398 1398 { 1399 1399 struct nft_stats __percpu *oldstats; ··· 1402 1400 if (newstats == NULL) 1403 1401 return; 1404 1402 1405 - if (chain->stats) { 1406 - oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES); 1403 + if (rcu_access_pointer(chain->stats)) { 1404 + oldstats = rcu_dereference_protected(chain->stats, 1405 + lockdep_commit_lock_is_held(net)); 1407 1406 rcu_assign_pointer(chain->stats, newstats); 1408 1407 synchronize_rcu(); 1409 1408 free_percpu(oldstats); ··· 1442 1439 struct nft_base_chain *basechain = nft_base_chain(chain); 1443 1440 1444 1441 module_put(basechain->type->owner); 1445 - free_percpu(basechain->stats); 1446 - if (basechain->stats) 1442 + if (rcu_access_pointer(basechain->stats)) { 1447 1443 static_branch_dec(&nft_counters_enabled); 1444 + free_percpu(rcu_dereference_raw(basechain->stats)); 1445 + } 1448 1446 kfree(chain->name); 1449 1447 kfree(basechain); 1450 1448 } else { ··· 1594 1590 kfree(basechain); 1595 1591 return PTR_ERR(stats); 1596 1592 } 1597 - basechain->stats = stats; 1593 + rcu_assign_pointer(basechain->stats, stats); 1598 1594 static_branch_inc(&nft_counters_enabled); 1599 1595 } 1600 1596 ··· 6184 6180 return; 6185 6181 6186 6182 basechain = nft_base_chain(trans->ctx.chain); 6187 - nft_chain_stats_replace(basechain, nft_trans_chain_stats(trans)); 6183 + nft_chain_stats_replace(trans->ctx.net, basechain, 6184 + nft_trans_chain_stats(trans)); 6188 6185 6189 6186 switch (nft_trans_chain_policy(trans)) { 6190 6187 case NF_DROP:
+1 -1
net/netfilter/nf_tables_core.c
··· 101 101 struct nft_stats *stats; 102 102 103 103 base_chain = nft_base_chain(chain); 104 - if (!base_chain->stats) 104 + if (!rcu_access_pointer(base_chain->stats)) 105 105 return; 106 106 107 107 local_bh_disable();