Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

1) Several patches to testore use of memory barriers instead of RCU to
ensure consistent access to ruleset, from Mark Tomlinson.

2) Fix dump of expectation via ctnetlink, from Florian Westphal.

3) GRE helper works for IPv6, from Ludovic Senecaux.

4) Set error on unsupported flowtable flags.

5) Use delayed instead of deferrable workqueue in the flowtable,
from Yinjun Zhang.

6) Fix spurious EEXIST in case of add-after-delete flowtable in
the same batch.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>

+86 -49
+2 -5
include/linux/netfilter/x_tables.h
··· 227 227 unsigned int valid_hooks; 228 228 229 229 /* Man behind the curtain... */ 230 - struct xt_table_info __rcu *private; 230 + struct xt_table_info *private; 231 231 232 232 /* Set this to THIS_MODULE if you are a module, otherwise NULL */ 233 233 struct module *me; ··· 376 376 * since addend is most likely 1 377 377 */ 378 378 __this_cpu_add(xt_recseq.sequence, addend); 379 - smp_wmb(); 379 + smp_mb(); 380 380 381 381 return addend; 382 382 } ··· 447 447 } 448 448 449 449 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); 450 - 451 - struct xt_table_info 452 - *xt_table_get_private_protected(const struct xt_table *table); 453 450 454 451 #ifdef CONFIG_COMPAT 455 452 #include <net/compat.h>
+3
include/net/netfilter/nf_tables.h
··· 1536 1536 struct nft_flowtable *flowtable; 1537 1537 bool update; 1538 1538 struct list_head hook_list; 1539 + u32 flags; 1539 1540 }; 1540 1541 1541 1542 #define nft_trans_flowtable(trans) \ ··· 1545 1544 (((struct nft_trans_flowtable *)trans->data)->update) 1546 1545 #define nft_trans_flowtable_hooks(trans) \ 1547 1546 (((struct nft_trans_flowtable *)trans->data)->hook_list) 1547 + #define nft_trans_flowtable_flags(trans) \ 1548 + (((struct nft_trans_flowtable *)trans->data)->flags) 1548 1549 1549 1550 int __init nft_chain_filter_init(void); 1550 1551 void nft_chain_filter_fini(void);
+8 -8
net/ipv4/netfilter/arp_tables.c
··· 203 203 204 204 local_bh_disable(); 205 205 addend = xt_write_recseq_begin(); 206 - private = rcu_access_pointer(table->private); 206 + private = READ_ONCE(table->private); /* Address dependency. */ 207 207 cpu = smp_processor_id(); 208 208 table_base = private->entries; 209 209 jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; ··· 649 649 { 650 650 unsigned int countersize; 651 651 struct xt_counters *counters; 652 - const struct xt_table_info *private = xt_table_get_private_protected(table); 652 + const struct xt_table_info *private = table->private; 653 653 654 654 /* We need atomic snapshot of counters: rest doesn't change 655 655 * (other than comefrom, which userspace doesn't care ··· 673 673 unsigned int off, num; 674 674 const struct arpt_entry *e; 675 675 struct xt_counters *counters; 676 - struct xt_table_info *private = xt_table_get_private_protected(table); 676 + struct xt_table_info *private = table->private; 677 677 int ret = 0; 678 678 void *loc_cpu_entry; 679 679 ··· 807 807 t = xt_request_find_table_lock(net, NFPROTO_ARP, name); 808 808 if (!IS_ERR(t)) { 809 809 struct arpt_getinfo info; 810 - const struct xt_table_info *private = xt_table_get_private_protected(t); 810 + const struct xt_table_info *private = t->private; 811 811 #ifdef CONFIG_COMPAT 812 812 struct xt_table_info tmp; 813 813 ··· 860 860 861 861 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 862 862 if (!IS_ERR(t)) { 863 - const struct xt_table_info *private = xt_table_get_private_protected(t); 863 + const struct xt_table_info *private = t->private; 864 864 865 865 if (get.size == private->size) 866 866 ret = copy_entries_to_user(private->size, ··· 1017 1017 } 1018 1018 1019 1019 local_bh_disable(); 1020 - private = xt_table_get_private_protected(t); 1020 + private = t->private; 1021 1021 if (private->number != tmp.num_counters) { 1022 1022 ret = -EINVAL; 1023 1023 goto unlock_up_free; ··· 1330 1330 void __user *userptr) 1331 1331 { 1332 1332 struct xt_counters *counters; 1333 - const struct xt_table_info *private = xt_table_get_private_protected(table); 1333 + const struct xt_table_info *private = table->private; 1334 1334 void __user *pos; 1335 1335 unsigned int size; 1336 1336 int ret = 0; ··· 1379 1379 xt_compat_lock(NFPROTO_ARP); 1380 1380 t = xt_find_table_lock(net, NFPROTO_ARP, get.name); 1381 1381 if (!IS_ERR(t)) { 1382 - const struct xt_table_info *private = xt_table_get_private_protected(t); 1382 + const struct xt_table_info *private = t->private; 1383 1383 struct xt_table_info info; 1384 1384 1385 1385 ret = compat_table_info(private, &info);
+8 -8
net/ipv4/netfilter/ip_tables.c
··· 258 258 WARN_ON(!(table->valid_hooks & (1 << hook))); 259 259 local_bh_disable(); 260 260 addend = xt_write_recseq_begin(); 261 - private = rcu_access_pointer(table->private); 261 + private = READ_ONCE(table->private); /* Address dependency. */ 262 262 cpu = smp_processor_id(); 263 263 table_base = private->entries; 264 264 jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; ··· 791 791 { 792 792 unsigned int countersize; 793 793 struct xt_counters *counters; 794 - const struct xt_table_info *private = xt_table_get_private_protected(table); 794 + const struct xt_table_info *private = table->private; 795 795 796 796 /* We need atomic snapshot of counters: rest doesn't change 797 797 (other than comefrom, which userspace doesn't care ··· 815 815 unsigned int off, num; 816 816 const struct ipt_entry *e; 817 817 struct xt_counters *counters; 818 - const struct xt_table_info *private = xt_table_get_private_protected(table); 818 + const struct xt_table_info *private = table->private; 819 819 int ret = 0; 820 820 const void *loc_cpu_entry; 821 821 ··· 964 964 t = xt_request_find_table_lock(net, AF_INET, name); 965 965 if (!IS_ERR(t)) { 966 966 struct ipt_getinfo info; 967 - const struct xt_table_info *private = xt_table_get_private_protected(t); 967 + const struct xt_table_info *private = t->private; 968 968 #ifdef CONFIG_COMPAT 969 969 struct xt_table_info tmp; 970 970 ··· 1018 1018 1019 1019 t = xt_find_table_lock(net, AF_INET, get.name); 1020 1020 if (!IS_ERR(t)) { 1021 - const struct xt_table_info *private = xt_table_get_private_protected(t); 1021 + const struct xt_table_info *private = t->private; 1022 1022 if (get.size == private->size) 1023 1023 ret = copy_entries_to_user(private->size, 1024 1024 t, uptr->entrytable); ··· 1173 1173 } 1174 1174 1175 1175 local_bh_disable(); 1176 - private = xt_table_get_private_protected(t); 1176 + private = t->private; 1177 1177 if (private->number != tmp.num_counters) { 1178 1178 ret = -EINVAL; 1179 1179 goto unlock_up_free; ··· 1543 1543 void __user *userptr) 1544 1544 { 1545 1545 struct xt_counters *counters; 1546 - const struct xt_table_info *private = xt_table_get_private_protected(table); 1546 + const struct xt_table_info *private = table->private; 1547 1547 void __user *pos; 1548 1548 unsigned int size; 1549 1549 int ret = 0; ··· 1589 1589 xt_compat_lock(AF_INET); 1590 1590 t = xt_find_table_lock(net, AF_INET, get.name); 1591 1591 if (!IS_ERR(t)) { 1592 - const struct xt_table_info *private = xt_table_get_private_protected(t); 1592 + const struct xt_table_info *private = t->private; 1593 1593 struct xt_table_info info; 1594 1594 ret = compat_table_info(private, &info); 1595 1595 if (!ret && get.size == info.size)
+8 -8
net/ipv6/netfilter/ip6_tables.c
··· 280 280 281 281 local_bh_disable(); 282 282 addend = xt_write_recseq_begin(); 283 - private = rcu_access_pointer(table->private); 283 + private = READ_ONCE(table->private); /* Address dependency. */ 284 284 cpu = smp_processor_id(); 285 285 table_base = private->entries; 286 286 jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; ··· 807 807 { 808 808 unsigned int countersize; 809 809 struct xt_counters *counters; 810 - const struct xt_table_info *private = xt_table_get_private_protected(table); 810 + const struct xt_table_info *private = table->private; 811 811 812 812 /* We need atomic snapshot of counters: rest doesn't change 813 813 (other than comefrom, which userspace doesn't care ··· 831 831 unsigned int off, num; 832 832 const struct ip6t_entry *e; 833 833 struct xt_counters *counters; 834 - const struct xt_table_info *private = xt_table_get_private_protected(table); 834 + const struct xt_table_info *private = table->private; 835 835 int ret = 0; 836 836 const void *loc_cpu_entry; 837 837 ··· 980 980 t = xt_request_find_table_lock(net, AF_INET6, name); 981 981 if (!IS_ERR(t)) { 982 982 struct ip6t_getinfo info; 983 - const struct xt_table_info *private = xt_table_get_private_protected(t); 983 + const struct xt_table_info *private = t->private; 984 984 #ifdef CONFIG_COMPAT 985 985 struct xt_table_info tmp; 986 986 ··· 1035 1035 1036 1036 t = xt_find_table_lock(net, AF_INET6, get.name); 1037 1037 if (!IS_ERR(t)) { 1038 - struct xt_table_info *private = xt_table_get_private_protected(t); 1038 + struct xt_table_info *private = t->private; 1039 1039 if (get.size == private->size) 1040 1040 ret = copy_entries_to_user(private->size, 1041 1041 t, uptr->entrytable); ··· 1189 1189 } 1190 1190 1191 1191 local_bh_disable(); 1192 - private = xt_table_get_private_protected(t); 1192 + private = t->private; 1193 1193 if (private->number != tmp.num_counters) { 1194 1194 ret = -EINVAL; 1195 1195 goto unlock_up_free; ··· 1552 1552 void __user *userptr) 1553 1553 { 1554 1554 struct xt_counters *counters; 1555 - const struct xt_table_info *private = xt_table_get_private_protected(table); 1555 + const struct xt_table_info *private = table->private; 1556 1556 void __user *pos; 1557 1557 unsigned int size; 1558 1558 int ret = 0; ··· 1598 1598 xt_compat_lock(AF_INET6); 1599 1599 t = xt_find_table_lock(net, AF_INET6, get.name); 1600 1600 if (!IS_ERR(t)) { 1601 - const struct xt_table_info *private = xt_table_get_private_protected(t); 1601 + const struct xt_table_info *private = t->private; 1602 1602 struct xt_table_info info; 1603 1603 ret = compat_table_info(private, &info); 1604 1604 if (!ret && get.size == info.size)
+1
net/netfilter/nf_conntrack_netlink.c
··· 2962 2962 memset(&m, 0xFF, sizeof(m)); 2963 2963 memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); 2964 2964 m.src.u.all = mask->src.u.all; 2965 + m.src.l3num = tuple->src.l3num; 2965 2966 m.dst.protonum = tuple->dst.protonum; 2966 2967 2967 2968 nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK);
-3
net/netfilter/nf_conntrack_proto_gre.c
··· 218 218 enum ip_conntrack_info ctinfo, 219 219 const struct nf_hook_state *state) 220 220 { 221 - if (state->pf != NFPROTO_IPV4) 222 - return -NF_ACCEPT; 223 - 224 221 if (!nf_ct_is_confirmed(ct)) { 225 222 unsigned int *timeouts = nf_ct_timeout_lookup(ct); 226 223
+1 -1
net/netfilter/nf_flow_table_core.c
··· 506 506 { 507 507 int err; 508 508 509 - INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); 509 + INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); 510 510 flow_block_init(&flowtable->flow_block); 511 511 init_rwsem(&flowtable->flow_block_lock); 512 512
+21 -1
net/netfilter/nf_tables_api.c
··· 6783 6783 6784 6784 list_for_each_entry(hook, hook_list, list) { 6785 6785 list_for_each_entry(ft, &table->flowtables, list) { 6786 + if (!nft_is_active_next(net, ft)) 6787 + continue; 6788 + 6786 6789 list_for_each_entry(hook2, &ft->hook_list, list) { 6787 6790 if (hook->ops.dev == hook2->ops.dev && 6788 6791 hook->ops.pf == hook2->ops.pf) { ··· 6845 6842 struct nft_hook *hook, *next; 6846 6843 struct nft_trans *trans; 6847 6844 bool unregister = false; 6845 + u32 flags; 6848 6846 int err; 6849 6847 6850 6848 err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], ··· 6858 6854 list_del(&hook->list); 6859 6855 kfree(hook); 6860 6856 } 6857 + } 6858 + 6859 + if (nla[NFTA_FLOWTABLE_FLAGS]) { 6860 + flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); 6861 + if (flags & ~NFT_FLOWTABLE_MASK) 6862 + return -EOPNOTSUPP; 6863 + if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^ 6864 + (flags & NFT_FLOWTABLE_HW_OFFLOAD)) 6865 + return -EOPNOTSUPP; 6866 + } else { 6867 + flags = flowtable->data.flags; 6861 6868 } 6862 6869 6863 6870 err = nft_register_flowtable_net_hooks(ctx->net, ctx->table, ··· 6884 6869 goto err_flowtable_update_hook; 6885 6870 } 6886 6871 6872 + nft_trans_flowtable_flags(trans) = flags; 6887 6873 nft_trans_flowtable(trans) = flowtable; 6888 6874 nft_trans_flowtable_update(trans) = true; 6889 6875 INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); ··· 6979 6963 if (nla[NFTA_FLOWTABLE_FLAGS]) { 6980 6964 flowtable->data.flags = 6981 6965 ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); 6982 - if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) 6966 + if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) { 6967 + err = -EOPNOTSUPP; 6983 6968 goto err3; 6969 + } 6984 6970 } 6985 6971 6986 6972 write_pnet(&flowtable->data.net, net); ··· 8194 8176 break; 8195 8177 case NFT_MSG_NEWFLOWTABLE: 8196 8178 if (nft_trans_flowtable_update(trans)) { 8179 + nft_trans_flowtable(trans)->data.flags = 8180 + nft_trans_flowtable_flags(trans); 8197 8181 nf_tables_flowtable_notify(&trans->ctx, 8198 8182 nft_trans_flowtable(trans), 8199 8183 &nft_trans_flowtable_hooks(trans),
+34 -15
net/netfilter/x_tables.c
··· 1351 1351 } 1352 1352 EXPORT_SYMBOL(xt_counters_alloc); 1353 1353 1354 - struct xt_table_info 1355 - *xt_table_get_private_protected(const struct xt_table *table) 1356 - { 1357 - return rcu_dereference_protected(table->private, 1358 - mutex_is_locked(&xt[table->af].mutex)); 1359 - } 1360 - EXPORT_SYMBOL(xt_table_get_private_protected); 1361 - 1362 1354 struct xt_table_info * 1363 1355 xt_replace_table(struct xt_table *table, 1364 1356 unsigned int num_counters, ··· 1358 1366 int *error) 1359 1367 { 1360 1368 struct xt_table_info *private; 1369 + unsigned int cpu; 1361 1370 int ret; 1362 1371 1363 1372 ret = xt_jumpstack_alloc(newinfo); ··· 1368 1375 } 1369 1376 1370 1377 /* Do the substitution. */ 1371 - private = xt_table_get_private_protected(table); 1378 + local_bh_disable(); 1379 + private = table->private; 1372 1380 1373 1381 /* Check inside lock: is the old number correct? */ 1374 1382 if (num_counters != private->number) { 1375 1383 pr_debug("num_counters != table->private->number (%u/%u)\n", 1376 1384 num_counters, private->number); 1385 + local_bh_enable(); 1377 1386 *error = -EAGAIN; 1378 1387 return NULL; 1379 1388 } 1380 1389 1381 1390 newinfo->initial_entries = private->initial_entries; 1391 + /* 1392 + * Ensure contents of newinfo are visible before assigning to 1393 + * private. 1394 + */ 1395 + smp_wmb(); 1396 + table->private = newinfo; 1382 1397 1383 - rcu_assign_pointer(table->private, newinfo); 1384 - synchronize_rcu(); 1398 + /* make sure all cpus see new ->private value */ 1399 + smp_mb(); 1400 + 1401 + /* 1402 + * Even though table entries have now been swapped, other CPU's 1403 + * may still be using the old entries... 1404 + */ 1405 + local_bh_enable(); 1406 + 1407 + /* ... so wait for even xt_recseq on all cpus */ 1408 + for_each_possible_cpu(cpu) { 1409 + seqcount_t *s = &per_cpu(xt_recseq, cpu); 1410 + u32 seq = raw_read_seqcount(s); 1411 + 1412 + if (seq & 1) { 1413 + do { 1414 + cond_resched(); 1415 + cpu_relax(); 1416 + } while (seq == raw_read_seqcount(s)); 1417 + } 1418 + } 1385 1419 1386 1420 audit_log_nfcfg(table->name, table->af, private->number, 1387 1421 !private->number ? AUDIT_XT_OP_REGISTER : ··· 1444 1424 } 1445 1425 1446 1426 /* Simplifies replace_table code. */ 1447 - rcu_assign_pointer(table->private, bootstrap); 1427 + table->private = bootstrap; 1448 1428 1449 1429 if (!xt_replace_table(table, 0, newinfo, &ret)) 1450 1430 goto unlock; 1451 1431 1452 - private = xt_table_get_private_protected(table); 1432 + private = table->private; 1453 1433 pr_debug("table->private->number = %u\n", private->number); 1454 1434 1455 1435 /* save number of initial entries */ ··· 1472 1452 struct xt_table_info *private; 1473 1453 1474 1454 mutex_lock(&xt[table->af].mutex); 1475 - private = xt_table_get_private_protected(table); 1476 - RCU_INIT_POINTER(table->private, NULL); 1455 + private = table->private; 1477 1456 list_del(&table->list); 1478 1457 mutex_unlock(&xt[table->af].mutex); 1479 1458 audit_log_nfcfg(table->name, table->af, private->number,