netfilter: conntrack: use a single hashtable for all namespaces

+1

include/net/netfilter/nf_conntrack_core.h

··· 81 81 82 82 #define CONNTRACK_LOCKS 1024 83 83 84 + extern struct hlist_nulls_head *nf_conntrack_hash; 84 85 extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; 85 86 void nf_conntrack_lock(spinlock_t *lock); 86 87

-2

include/net/netns/conntrack.h

··· 93 93 int sysctl_tstamp; 94 94 int sysctl_checksum; 95 95 96 - unsigned int htable_size; 97 96 struct kmem_cache *nf_conntrack_cachep; 98 - struct hlist_nulls_head *hash; 99 97 struct hlist_head *expect_hash; 100 98 struct ct_pcpu __percpu *pcpu_lists; 101 99 struct ip_conntrack_stat __percpu *stat;

+1 -1

net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c

··· 360 360 361 361 in->ctl_table[0].data = &nf_conntrack_max; 362 362 in->ctl_table[1].data = &net->ct.count; 363 - in->ctl_table[2].data = &net->ct.htable_size; 363 + in->ctl_table[2].data = &nf_conntrack_htable_size; 364 364 in->ctl_table[3].data = &net->ct.sysctl_checksum; 365 365 in->ctl_table[4].data = &net->ct.sysctl_log_invalid; 366 366 #endif

+4 -6

net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c

··· 31 31 32 32 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 33 33 { 34 - struct net *net = seq_file_net(seq); 35 34 struct ct_iter_state *st = seq->private; 36 35 struct hlist_nulls_node *n; 37 36 38 37 for (st->bucket = 0; 39 - st->bucket < net->ct.htable_size; 38 + st->bucket < nf_conntrack_htable_size; 40 39 st->bucket++) { 41 40 n = rcu_dereference( 42 - hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 41 + hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); 43 42 if (!is_a_nulls(n)) 44 43 return n; 45 44 } ··· 48 49 static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 49 50 struct hlist_nulls_node *head) 50 51 { 51 - struct net *net = seq_file_net(seq); 52 52 struct ct_iter_state *st = seq->private; 53 53 54 54 head = rcu_dereference(hlist_nulls_next_rcu(head)); 55 55 while (is_a_nulls(head)) { 56 56 if (likely(get_nulls_value(head) == st->bucket)) { 57 - if (++st->bucket >= net->ct.htable_size) 57 + if (++st->bucket >= nf_conntrack_htable_size) 58 58 return NULL; 59 59 } 60 60 head = rcu_dereference( 61 - hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 61 + hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); 62 62 } 63 63 return head; 64 64 }

+40 -40

net/netfilter/nf_conntrack_core.c

··· 69 69 __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); 70 70 EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); 71 71 72 + struct hlist_nulls_head *nf_conntrack_hash __read_mostly; 73 + EXPORT_SYMBOL_GPL(nf_conntrack_hash); 74 + 72 75 static __read_mostly spinlock_t nf_conntrack_locks_all_lock; 73 76 static __read_mostly seqcount_t nf_conntrack_generation; 74 77 static __read_mostly bool nf_conntrack_locks_all; ··· 167 164 tuple->dst.protonum)); 168 165 } 169 166 170 - static u32 hash_bucket(u32 hash, const struct net *net) 167 + static u32 scale_hash(u32 hash) 171 168 { 172 - return reciprocal_scale(hash, net->ct.htable_size); 169 + return reciprocal_scale(hash, nf_conntrack_htable_size); 173 170 } 174 171 175 172 static u32 __hash_conntrack(const struct net *net, ··· 182 179 static u32 hash_conntrack(const struct net *net, 183 180 const struct nf_conntrack_tuple *tuple) 184 181 { 185 - return __hash_conntrack(net, tuple, net->ct.htable_size); 182 + return scale_hash(hash_conntrack_raw(tuple, net)); 186 183 } 187 184 188 185 bool ··· 481 478 begin: 482 479 do { 483 480 sequence = read_seqcount_begin(&nf_conntrack_generation); 484 - bucket = hash_bucket(hash, net); 485 - ct_hash = net->ct.hash; 481 + bucket = scale_hash(hash); 482 + ct_hash = nf_conntrack_hash; 486 483 } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); 487 484 488 485 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { ··· 546 543 unsigned int hash, 547 544 unsigned int reply_hash) 548 545 { 549 - struct net *net = nf_ct_net(ct); 550 - 551 546 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, 552 - &net->ct.hash[hash]); 547 + &nf_conntrack_hash[hash]); 553 548 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, 554 - &net->ct.hash[reply_hash]); 549 + &nf_conntrack_hash[reply_hash]); 555 550 } 556 551 557 552 int ··· 574 573 } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); 575 574 576 575 /* See if there's one in the list already, including reverse */ 577 - hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 576 + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) 578 577 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 579 578 zone, net)) 580 579 goto out; 581 580 582 - hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) 581 + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) 583 582 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 584 583 zone, net)) 585 584 goto out; ··· 634 633 sequence = read_seqcount_begin(&nf_conntrack_generation); 635 634 /* reuse the hash saved before */ 636 635 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; 637 - hash = hash_bucket(hash, net); 636 + hash = scale_hash(hash); 638 637 reply_hash = hash_conntrack(net, 639 638 &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 640 639 ··· 664 663 /* See if there's one in the list already, including reverse: 665 664 NAT could have grabbed it without realizing, since we're 666 665 not in the hash. If there is, we lost race. */ 667 - hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) 666 + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) 668 667 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 669 668 zone, net)) 670 669 goto out; 671 670 672 - hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode) 671 + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) 673 672 if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 674 673 zone, net)) 675 674 goto out; ··· 737 736 do { 738 737 sequence = read_seqcount_begin(&nf_conntrack_generation); 739 738 hash = hash_conntrack(net, tuple); 740 - ct_hash = net->ct.hash; 739 + ct_hash = nf_conntrack_hash; 741 740 } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); 742 741 743 742 hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ··· 774 773 local_bh_disable(); 775 774 restart: 776 775 sequence = read_seqcount_begin(&nf_conntrack_generation); 777 - hash = hash_bucket(_hash, net); 778 - for (; i < net->ct.htable_size; i++) { 776 + hash = scale_hash(_hash); 777 + for (; i < nf_conntrack_htable_size; i++) { 779 778 lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; 780 779 nf_conntrack_lock(lockp); 781 780 if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { 782 781 spin_unlock(lockp); 783 782 goto restart; 784 783 } 785 - hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], 786 - hnnode) { 784 + hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], 785 + hnnode) { 787 786 tmp = nf_ct_tuplehash_to_ctrack(h); 788 787 if (!test_bit(IPS_ASSURED_BIT, &tmp->status) && 789 788 !nf_ct_is_dying(tmp) && ··· 794 793 cnt++; 795 794 } 796 795 797 - hash = (hash + 1) % net->ct.htable_size; 796 + hash = (hash + 1) % nf_conntrack_htable_size; 798 797 spin_unlock(lockp); 799 798 800 799 if (ct || cnt >= NF_CT_EVICTION_RANGE) ··· 1377 1376 int cpu; 1378 1377 spinlock_t *lockp; 1379 1378 1380 - for (; *bucket < net->ct.htable_size; (*bucket)++) { 1379 + for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 1381 1380 lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; 1382 1381 local_bh_disable(); 1383 1382 nf_conntrack_lock(lockp); 1384 - if (*bucket < net->ct.htable_size) { 1385 - hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { 1383 + if (*bucket < nf_conntrack_htable_size) { 1384 + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { 1386 1385 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 1387 1386 continue; 1388 1387 ct = nf_ct_tuplehash_to_ctrack(h); ··· 1479 1478 while (untrack_refs() > 0) 1480 1479 schedule(); 1481 1480 1481 + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 1482 + 1482 1483 #ifdef CONFIG_NF_CONNTRACK_ZONES 1483 1484 nf_ct_extend_unregister(&nf_ct_zone_extend); 1484 1485 #endif ··· 1531 1528 } 1532 1529 1533 1530 list_for_each_entry(net, net_exit_list, exit_list) { 1534 - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1535 1531 nf_conntrack_proto_pernet_fini(net); 1536 1532 nf_conntrack_helper_pernet_fini(net); 1537 1533 nf_conntrack_ecache_pernet_fini(net); ··· 1601 1599 * though since that required taking the locks. 1602 1600 */ 1603 1601 1604 - for (i = 0; i < init_net.ct.htable_size; i++) { 1605 - while (!hlist_nulls_empty(&init_net.ct.hash[i])) { 1606 - h = hlist_nulls_entry(init_net.ct.hash[i].first, 1607 - struct nf_conntrack_tuple_hash, hnnode); 1602 + for (i = 0; i < nf_conntrack_htable_size; i++) { 1603 + while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { 1604 + h = hlist_nulls_entry(nf_conntrack_hash[i].first, 1605 + struct nf_conntrack_tuple_hash, hnnode); 1608 1606 ct = nf_ct_tuplehash_to_ctrack(h); 1609 1607 hlist_nulls_del_rcu(&h->hnnode); 1610 1608 bucket = __hash_conntrack(nf_ct_net(ct), ··· 1612 1610 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1613 1611 } 1614 1612 } 1615 - old_size = init_net.ct.htable_size; 1616 - old_hash = init_net.ct.hash; 1613 + old_size = nf_conntrack_htable_size; 1614 + old_hash = nf_conntrack_hash; 1617 1615 1618 - init_net.ct.htable_size = nf_conntrack_htable_size = hashsize; 1619 - init_net.ct.hash = hash; 1616 + nf_conntrack_hash = hash; 1617 + nf_conntrack_htable_size = hashsize; 1620 1618 1621 1619 write_seqcount_end(&nf_conntrack_generation); 1622 1620 nf_conntrack_all_unlock(); ··· 1672 1670 * entries. */ 1673 1671 max_factor = 4; 1674 1672 } 1673 + 1674 + nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); 1675 + if (!nf_conntrack_hash) 1676 + return -ENOMEM; 1677 + 1675 1678 nf_conntrack_max = max_factor * nf_conntrack_htable_size; 1676 1679 1677 1680 printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n", ··· 1755 1748 err_acct: 1756 1749 nf_conntrack_expect_fini(); 1757 1750 err_expect: 1751 + nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 1758 1752 return ret; 1759 1753 } 1760 1754 ··· 1808 1800 goto err_cache; 1809 1801 } 1810 1802 1811 - net->ct.htable_size = nf_conntrack_htable_size; 1812 - net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1); 1813 - if (!net->ct.hash) { 1814 - printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1815 - goto err_hash; 1816 - } 1817 1803 ret = nf_conntrack_expect_pernet_init(net); 1818 1804 if (ret < 0) 1819 1805 goto err_expect; ··· 1839 1837 err_acct: 1840 1838 nf_conntrack_expect_pernet_fini(net); 1841 1839 err_expect: 1842 - nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size); 1843 - err_hash: 1844 1840 kmem_cache_destroy(net->ct.nf_conntrack_cachep); 1845 1841 err_cache: 1846 1842 kfree(net->ct.slabname);

+3 -3

net/netfilter/nf_conntrack_helper.c

··· 424 424 spin_unlock_bh(&pcpu->lock); 425 425 } 426 426 local_bh_disable(); 427 - for (i = 0; i < net->ct.htable_size; i++) { 427 + for (i = 0; i < nf_conntrack_htable_size; i++) { 428 428 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 429 - if (i < net->ct.htable_size) { 430 - hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 429 + if (i < nf_conntrack_htable_size) { 430 + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) 431 431 unhelp(h, me); 432 432 } 433 433 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);

+4 -4

net/netfilter/nf_conntrack_netlink.c

··· 824 824 last = (struct nf_conn *)cb->args[1]; 825 825 826 826 local_bh_disable(); 827 - for (; cb->args[0] < net->ct.htable_size; cb->args[0]++) { 827 + for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { 828 828 restart: 829 829 lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; 830 830 nf_conntrack_lock(lockp); 831 - if (cb->args[0] >= net->ct.htable_size) { 831 + if (cb->args[0] >= nf_conntrack_htable_size) { 832 832 spin_unlock(lockp); 833 833 goto out; 834 834 } 835 - hlist_nulls_for_each_entry(h, n, &net->ct.hash[cb->args[0]], 836 - hnnode) { 835 + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[cb->args[0]], 836 + hnnode) { 837 837 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 838 838 continue; 839 839 ct = nf_ct_tuplehash_to_ctrack(h);

+5 -8

net/netfilter/nf_conntrack_standalone.c

··· 54 54 55 55 static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) 56 56 { 57 - struct net *net = seq_file_net(seq); 58 57 struct ct_iter_state *st = seq->private; 59 58 struct hlist_nulls_node *n; 60 59 61 60 for (st->bucket = 0; 62 - st->bucket < net->ct.htable_size; 61 + st->bucket < nf_conntrack_htable_size; 63 62 st->bucket++) { 64 - n = rcu_dereference(hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); 63 + n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); 65 64 if (!is_a_nulls(n)) 66 65 return n; 67 66 } ··· 70 71 static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, 71 72 struct hlist_nulls_node *head) 72 73 { 73 - struct net *net = seq_file_net(seq); 74 74 struct ct_iter_state *st = seq->private; 75 75 76 76 head = rcu_dereference(hlist_nulls_next_rcu(head)); 77 77 while (is_a_nulls(head)) { 78 78 if (likely(get_nulls_value(head) == st->bucket)) { 79 - if (++st->bucket >= net->ct.htable_size) 79 + if (++st->bucket >= nf_conntrack_htable_size) 80 80 return NULL; 81 81 } 82 82 head = rcu_dereference( 83 83 hlist_nulls_first_rcu( 84 - &net->ct.hash[st->bucket])); 84 + &nf_conntrack_hash[st->bucket])); 85 85 } 86 86 return head; 87 87 } ··· 456 458 }, 457 459 { 458 460 .procname = "nf_conntrack_buckets", 459 - .data = &init_net.ct.htable_size, 461 + .data = &nf_conntrack_htable_size, 460 462 .maxlen = sizeof(unsigned int), 461 463 .mode = 0444, 462 464 .proc_handler = proc_dointvec, ··· 510 512 goto out_kmemdup; 511 513 512 514 table[1].data = &net->ct.count; 513 - table[2].data = &net->ct.htable_size; 514 515 table[3].data = &net->ct.sysctl_checksum; 515 516 table[4].data = &net->ct.sysctl_log_invalid; 516 517

+1 -1

net/netfilter/nf_nat_core.c

··· 824 824 static int __net_init nf_nat_net_init(struct net *net) 825 825 { 826 826 /* Leave them the same for the moment. */ 827 - net->ct.nat_htable_size = net->ct.htable_size; 827 + net->ct.nat_htable_size = nf_conntrack_htable_size; 828 828 net->ct.nat_bysource = nf_ct_alloc_hashtable(&net->ct.nat_htable_size, 0); 829 829 if (!net->ct.nat_bysource) 830 830 return -ENOMEM;

+3 -3

net/netfilter/nfnetlink_cttimeout.c

··· 306 306 int i; 307 307 308 308 local_bh_disable(); 309 - for (i = 0; i < net->ct.htable_size; i++) { 309 + for (i = 0; i < nf_conntrack_htable_size; i++) { 310 310 nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); 311 - if (i < net->ct.htable_size) { 312 - hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) 311 + if (i < nf_conntrack_htable_size) { 312 + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) 313 313 untimeout(h, timeout); 314 314 } 315 315 spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);