Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains Netfilter fixes for net:

1) Switch to RCU in x_tables to fix possible NULL pointer dereference,
from Subash Abhinov Kasiviswanathan.

2) Fix netlink dump of dynset timeouts later than 23 days.

3) Add comment for the indirect serialization of the nft commit mutex
with rtnl_mutex.

4) Remove bogus check for confirmed conntrack when matching on the
conntrack ID, from Brett Mastbergen.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+55 -63
+4 -1
include/linux/netfilter/x_tables.h
··· 227 227 unsigned int valid_hooks; 228 228 229 229 /* Man behind the curtain... */ 230 - struct xt_table_info *private; 230 + struct xt_table_info __rcu *private; 231 231 232 232 /* Set this to THIS_MODULE if you are a module, otherwise NULL */ 233 233 struct module *me; ··· 447 447 } 448 448 449 449 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); 450 + 451 + struct xt_table_info 452 + *xt_table_get_private_protected(const struct xt_table *table); 450 453 451 454 #ifdef CONFIG_COMPAT 452 455 #include <net/compat.h>
+4
include/net/netfilter/nf_tables.h
··· 1524 1524 void nft_chain_route_fini(void); 1525 1525 1526 1526 void nf_tables_trans_destroy_flush_work(void); 1527 + 1528 + int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); 1529 + __be64 nf_jiffies64_to_msecs(u64 input); 1530 + 1527 1531 #endif /* _NET_NF_TABLES_H */
+7 -7
net/ipv4/netfilter/arp_tables.c
··· 203 203 204 204 local_bh_disable(); 205 205 addend = xt_write_recseq_begin(); 206 - private = READ_ONCE(table->private); /* Address dependency. */ 206 + private = rcu_access_pointer(table->private); 207 207 cpu = smp_processor_id(); 208 208 table_base = private->entries; 209 209 jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; ··· 649 649 { 650 650 unsigned int countersize; 651 651 struct xt_counters *counters; 652 - const struct xt_table_info *private = table->private; 652 + const struct xt_table_info *private = xt_table_get_private_protected(table); 653 653 654 654 /* We need atomic snapshot of counters: rest doesn't change 655 655 * (other than comefrom, which userspace doesn't care ··· 673 673 unsigned int off, num; 674 674 const struct arpt_entry *e; 675 675 struct xt_counters *counters; 676 - struct xt_table_info *private = table->private; 676 + struct xt_table_info *private = xt_table_get_private_protected(table); 677 677 int ret = 0; 678 678 void *loc_cpu_entry; 679 679 ··· 807 807 t = xt_request_find_table_lock(net, NFPROTO_ARP, name); 808 808 if (!IS_ERR(t)) { 809 809 struct arpt_getinfo info; 810 - const struct xt_table_info *private = t->private; 810 + const struct xt_table_info *private = xt_table_get_private_protected(t); 811 811 #ifdef CONFIG_COMPAT 812 812 struct xt_table_info tmp; 813 813 ··· 860 860 861 861 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 862 862 if (!IS_ERR(t)) { 863 - const struct xt_table_info *private = t->private; 863 + const struct xt_table_info *private = xt_table_get_private_protected(t); 864 864 865 865 if (get.size == private->size) 866 866 ret = copy_entries_to_user(private->size, ··· 1017 1017 } 1018 1018 1019 1019 local_bh_disable(); 1020 - private = t->private; 1020 + private = xt_table_get_private_protected(t); 1021 1021 if (private->number != tmp.num_counters) { 1022 1022 ret = -EINVAL; 1023 1023 goto unlock_up_free; ··· 1330 1330 void __user *userptr) 1331 1331 { 1332 1332 struct xt_counters *counters; 1333 - const struct xt_table_info *private = table->private; 1333 + const struct xt_table_info *private = xt_table_get_private_protected(table); 1334 1334 void __user *pos; 1335 1335 unsigned int size; 1336 1336 int ret = 0;
+7 -7
net/ipv4/netfilter/ip_tables.c
··· 258 258 WARN_ON(!(table->valid_hooks & (1 << hook))); 259 259 local_bh_disable(); 260 260 addend = xt_write_recseq_begin(); 261 - private = READ_ONCE(table->private); /* Address dependency. */ 261 + private = rcu_access_pointer(table->private); 262 262 cpu = smp_processor_id(); 263 263 table_base = private->entries; 264 264 jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; ··· 791 791 { 792 792 unsigned int countersize; 793 793 struct xt_counters *counters; 794 - const struct xt_table_info *private = table->private; 794 + const struct xt_table_info *private = xt_table_get_private_protected(table); 795 795 796 796 /* We need atomic snapshot of counters: rest doesn't change 797 797 (other than comefrom, which userspace doesn't care ··· 815 815 unsigned int off, num; 816 816 const struct ipt_entry *e; 817 817 struct xt_counters *counters; 818 - const struct xt_table_info *private = table->private; 818 + const struct xt_table_info *private = xt_table_get_private_protected(table); 819 819 int ret = 0; 820 820 const void *loc_cpu_entry; 821 821 ··· 964 964 t = xt_request_find_table_lock(net, AF_INET, name); 965 965 if (!IS_ERR(t)) { 966 966 struct ipt_getinfo info; 967 - const struct xt_table_info *private = t->private; 967 + const struct xt_table_info *private = xt_table_get_private_protected(t); 968 968 #ifdef CONFIG_COMPAT 969 969 struct xt_table_info tmp; 970 970 ··· 1018 1018 1019 1019 t = xt_find_table_lock(net, AF_INET, get.name); 1020 1020 if (!IS_ERR(t)) { 1021 - const struct xt_table_info *private = t->private; 1021 + const struct xt_table_info *private = xt_table_get_private_protected(t); 1022 1022 if (get.size == private->size) 1023 1023 ret = copy_entries_to_user(private->size, 1024 1024 t, uptr->entrytable); ··· 1173 1173 } 1174 1174 1175 1175 local_bh_disable(); 1176 - private = t->private; 1176 + private = xt_table_get_private_protected(t); 1177 1177 if (private->number != tmp.num_counters) { 1178 1178 ret = -EINVAL; 1179 1179 goto unlock_up_free; ··· 1543 1543 void __user *userptr) 1544 1544 { 1545 1545 struct xt_counters *counters; 1546 - const struct xt_table_info *private = table->private; 1546 + const struct xt_table_info *private = xt_table_get_private_protected(table); 1547 1547 void __user *pos; 1548 1548 unsigned int size; 1549 1549 int ret = 0;
+7 -7
net/ipv6/netfilter/ip6_tables.c
··· 280 280 281 281 local_bh_disable(); 282 282 addend = xt_write_recseq_begin(); 283 - private = READ_ONCE(table->private); /* Address dependency. */ 283 + private = rcu_access_pointer(table->private); 284 284 cpu = smp_processor_id(); 285 285 table_base = private->entries; 286 286 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; ··· 807 807 { 808 808 unsigned int countersize; 809 809 struct xt_counters *counters; 810 - const struct xt_table_info *private = table->private; 810 + const struct xt_table_info *private = xt_table_get_private_protected(table); 811 811 812 812 /* We need atomic snapshot of counters: rest doesn't change 813 813 (other than comefrom, which userspace doesn't care ··· 831 831 unsigned int off, num; 832 832 const struct ip6t_entry *e; 833 833 struct xt_counters *counters; 834 - const struct xt_table_info *private = table->private; 834 + const struct xt_table_info *private = xt_table_get_private_protected(table); 835 835 int ret = 0; 836 836 const void *loc_cpu_entry; 837 837 ··· 980 980 t = xt_request_find_table_lock(net, AF_INET6, name); 981 981 if (!IS_ERR(t)) { 982 982 struct ip6t_getinfo info; 983 - const struct xt_table_info *private = t->private; 983 + const struct xt_table_info *private = xt_table_get_private_protected(t); 984 984 #ifdef CONFIG_COMPAT 985 985 struct xt_table_info tmp; 986 986 ··· 1035 1035 1036 1036 t = xt_find_table_lock(net, AF_INET6, get.name); 1037 1037 if (!IS_ERR(t)) { 1038 - struct xt_table_info *private = t->private; 1038 + struct xt_table_info *private = xt_table_get_private_protected(t); 1039 1039 if (get.size == private->size) 1040 1040 ret = copy_entries_to_user(private->size, 1041 1041 t, uptr->entrytable); ··· 1189 1189 } 1190 1190 1191 1191 local_bh_disable(); 1192 - private = t->private; 1192 + private = xt_table_get_private_protected(t); 1193 1193 if (private->number != tmp.num_counters) { 1194 1194 ret = -EINVAL; 1195 1195 goto unlock_up_free; ··· 1552 1552 void __user *userptr) 1553 1553 { 1554 1554 struct xt_counters *counters; 1555 - const struct xt_table_info *private = table->private; 1555 + const struct xt_table_info *private = xt_table_get_private_protected(table); 1556 1556 void __user *pos; 1557 1557 unsigned int size; 1558 1558 int ret = 0;
+6 -2
net/netfilter/nf_tables_api.c
··· 1723 1723 } 1724 1724 1725 1725 nla_strlcpy(ifname, attr, IFNAMSIZ); 1726 + /* nf_tables_netdev_event() is called under rtnl_mutex, this is 1727 + * indirectly serializing all the other holders of the commit_mutex with 1728 + * the rtnl_mutex. 1729 + */ 1726 1730 dev = __dev_get_by_name(net, ifname); 1727 1731 if (!dev) { 1728 1732 err = -ENOENT; ··· 3723 3719 return 0; 3724 3720 } 3725 3721 3726 - static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) 3722 + int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) 3727 3723 { 3728 3724 u64 ms = be64_to_cpu(nla_get_be64(nla)); 3729 3725 u64 max = (u64)(~((u64)0)); ··· 3737 3733 return 0; 3738 3734 } 3739 3735 3740 - static __be64 nf_jiffies64_to_msecs(u64 input) 3736 + __be64 nf_jiffies64_to_msecs(u64 input) 3741 3737 { 3742 3738 return cpu_to_be64(jiffies64_to_msecs(input)); 3743 3739 }
-2
net/netfilter/nft_ct.c
··· 177 177 } 178 178 #endif 179 179 case NFT_CT_ID: 180 - if (!nf_ct_is_confirmed(ct)) 181 - goto err; 182 180 *dest = nf_ct_get_id(ct); 183 181 return; 184 182 default:
+5 -3
net/netfilter/nft_dynset.c
··· 157 157 if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { 158 158 if (!(set->flags & NFT_SET_TIMEOUT)) 159 159 return -EINVAL; 160 - timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( 161 - tb[NFTA_DYNSET_TIMEOUT]))); 160 + 161 + err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); 162 + if (err) 163 + return err; 162 164 } 163 165 164 166 priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); ··· 269 267 if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) 270 268 goto nla_put_failure; 271 269 if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, 272 - cpu_to_be64(jiffies_to_msecs(priv->timeout)), 270 + nf_jiffies64_to_msecs(priv->timeout), 273 271 NFTA_DYNSET_PAD)) 274 272 goto nla_put_failure; 275 273 if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
+15 -34
net/netfilter/x_tables.c
··· 1349 1349 } 1350 1350 EXPORT_SYMBOL(xt_counters_alloc); 1351 1351 1352 + struct xt_table_info 1353 + *xt_table_get_private_protected(const struct xt_table *table) 1354 + { 1355 + return rcu_dereference_protected(table->private, 1356 + mutex_is_locked(&xt[table->af].mutex)); 1357 + } 1358 + EXPORT_SYMBOL(xt_table_get_private_protected); 1359 + 1352 1360 struct xt_table_info * 1353 1361 xt_replace_table(struct xt_table *table, 1354 1362 unsigned int num_counters, ··· 1364 1356 int *error) 1365 1357 { 1366 1358 struct xt_table_info *private; 1367 - unsigned int cpu; 1368 1359 int ret; 1369 1360 1370 1361 ret = xt_jumpstack_alloc(newinfo); ··· 1373 1366 } 1374 1367 1375 1368 /* Do the substitution. */ 1376 - local_bh_disable(); 1377 - private = table->private; 1369 + private = xt_table_get_private_protected(table); 1378 1370 1379 1371 /* Check inside lock: is the old number correct? */ 1380 1372 if (num_counters != private->number) { 1381 1373 pr_debug("num_counters != table->private->number (%u/%u)\n", 1382 1374 num_counters, private->number); 1383 - local_bh_enable(); 1384 1375 *error = -EAGAIN; 1385 1376 return NULL; 1386 1377 } 1387 1378 1388 1379 newinfo->initial_entries = private->initial_entries; 1389 - /* 1390 - * Ensure contents of newinfo are visible before assigning to 1391 - * private. 1392 - */ 1393 - smp_wmb(); 1394 - table->private = newinfo; 1395 1380 1396 - /* make sure all cpus see new ->private value */ 1397 - smp_wmb(); 1398 - 1399 - /* 1400 - * Even though table entries have now been swapped, other CPU's 1401 - * may still be using the old entries... 1402 - */ 1403 - local_bh_enable(); 1404 - 1405 - /* ... so wait for even xt_recseq on all cpus */ 1406 - for_each_possible_cpu(cpu) { 1407 - seqcount_t *s = &per_cpu(xt_recseq, cpu); 1408 - u32 seq = raw_read_seqcount(s); 1409 - 1410 - if (seq & 1) { 1411 - do { 1412 - cond_resched(); 1413 - cpu_relax(); 1414 - } while (seq == raw_read_seqcount(s)); 1415 - } 1416 - } 1381 + rcu_assign_pointer(table->private, newinfo); 1382 + synchronize_rcu(); 1417 1383 1418 1384 audit_log_nfcfg(table->name, table->af, private->number, 1419 1385 !private->number ? AUDIT_XT_OP_REGISTER : ··· 1422 1442 } 1423 1443 1424 1444 /* Simplifies replace_table code. */ 1425 - table->private = bootstrap; 1445 + rcu_assign_pointer(table->private, bootstrap); 1426 1446 1427 1447 if (!xt_replace_table(table, 0, newinfo, &ret)) 1428 1448 goto unlock; 1429 1449 1430 - private = table->private; 1450 + private = xt_table_get_private_protected(table); 1431 1451 pr_debug("table->private->number = %u\n", private->number); 1432 1452 1433 1453 /* save number of initial entries */ ··· 1450 1470 struct xt_table_info *private; 1451 1471 1452 1472 mutex_lock(&xt[table->af].mutex); 1453 - private = table->private; 1473 + private = xt_table_get_private_protected(table); 1474 + RCU_INIT_POINTER(table->private, NULL); 1454 1475 list_del(&table->list); 1455 1476 mutex_unlock(&xt[table->af].mutex); 1456 1477 audit_log_nfcfg(table->name, table->af, private->number,