Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

netfilter: use kvmalloc_array to allocate memory for hashtable

nf_ct_alloc_hashtable is used to allocate memory for conntrack,
NAT bysrc and expectation hashtable. Assuming 64k bucket size,
which means 7th order page allocation, __get_free_pages, called
by nf_ct_alloc_hashtable, will trigger the direct memory reclaim
and stall for a long time, when system has lots of memory stress

so replace combination of __get_free_pages and vzalloc with
kvmalloc_array, which provides a overflow check and a fallback
if no high order memory is available, and do not retry to reclaim
memory, reduce stall

and remove nf_ct_free_hashtable, since it is just a kvfree

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Wang Li <wangli39@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

authored by

Li RongQing and committed by
Pablo Neira Ayuso
285189c7 4ed8eb65

+11 -30
-2
include/net/netfilter/nf_conntrack.h
··· 176 176 */ 177 177 void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls); 178 178 179 - void nf_ct_free_hashtable(void *hash, unsigned int size); 180 - 181 179 int nf_conntrack_hash_check_insert(struct nf_conn *ct); 182 180 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report); 183 181
+6 -23
net/netfilter/nf_conntrack_core.c
··· 2022 2022 return net_eq(nf_ct_net(i), data); 2023 2023 } 2024 2024 2025 - void nf_ct_free_hashtable(void *hash, unsigned int size) 2026 - { 2027 - if (is_vmalloc_addr(hash)) 2028 - vfree(hash); 2029 - else 2030 - free_pages((unsigned long)hash, 2031 - get_order(sizeof(struct hlist_head) * size)); 2032 - } 2033 - EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); 2034 - 2035 2025 void nf_conntrack_cleanup_start(void) 2036 2026 { 2037 2027 conntrack_gc_work.exiting = true; ··· 2032 2042 { 2033 2043 RCU_INIT_POINTER(nf_ct_hook, NULL); 2034 2044 cancel_delayed_work_sync(&conntrack_gc_work.dwork); 2035 - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 2045 + kvfree(nf_conntrack_hash); 2036 2046 2037 2047 nf_conntrack_proto_fini(); 2038 2048 nf_conntrack_seqadj_fini(); ··· 2098 2108 { 2099 2109 struct hlist_nulls_head *hash; 2100 2110 unsigned int nr_slots, i; 2101 - size_t sz; 2102 2111 2103 2112 if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head))) 2104 2113 return NULL; ··· 2105 2116 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); 2106 2117 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); 2107 2118 2108 - if (nr_slots > (UINT_MAX / sizeof(struct hlist_nulls_head))) 2109 - return NULL; 2110 - 2111 - sz = nr_slots * sizeof(struct hlist_nulls_head); 2112 - hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, 2113 - get_order(sz)); 2114 - if (!hash) 2115 - hash = vzalloc(sz); 2119 + hash = kvmalloc_array(nr_slots, sizeof(struct hlist_nulls_head), 2120 + GFP_KERNEL | __GFP_ZERO); 2116 2121 2117 2122 if (hash && nulls) 2118 2123 for (i = 0; i < nr_slots; i++) ··· 2133 2150 2134 2151 old_size = nf_conntrack_htable_size; 2135 2152 if (old_size == hashsize) { 2136 - nf_ct_free_hashtable(hash, hashsize); 2153 + kvfree(hash); 2137 2154 return 0; 2138 2155 } 2139 2156 ··· 2169 2186 local_bh_enable(); 2170 2187 2171 2188 synchronize_net(); 2172 - nf_ct_free_hashtable(old_hash, old_size); 2189 + kvfree(old_hash); 2173 2190 return 0; 2174 2191 } 2175 2192 ··· 2333 2350 err_expect: 2334 2351 kmem_cache_destroy(nf_conntrack_cachep); 2335 2352 err_cachep: 2336 - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); 2353 + kvfree(nf_conntrack_hash); 2337 2354 return ret; 2338 2355 } 2339 2356
+1 -1
net/netfilter/nf_conntrack_expect.c
··· 712 712 { 713 713 rcu_barrier(); /* Wait for call_rcu() before destroy */ 714 714 kmem_cache_destroy(nf_ct_expect_cachep); 715 - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_hsize); 715 + kvfree(nf_ct_expect_hash); 716 716 }
+2 -2
net/netfilter/nf_conntrack_helper.c
··· 562 562 563 563 return 0; 564 564 out_extend: 565 - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); 565 + kvfree(nf_ct_helper_hash); 566 566 return ret; 567 567 } 568 568 569 569 void nf_conntrack_helper_fini(void) 570 570 { 571 571 nf_ct_extend_unregister(&helper_extend); 572 - nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize); 572 + kvfree(nf_ct_helper_hash); 573 573 }
+2 -2
net/netfilter/nf_nat_core.c
··· 1056 1056 1057 1057 ret = nf_ct_extend_register(&nat_extend); 1058 1058 if (ret < 0) { 1059 - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); 1059 + kvfree(nf_nat_bysource); 1060 1060 pr_err("Unable to register extension\n"); 1061 1061 return ret; 1062 1062 } ··· 1094 1094 for (i = 0; i < NFPROTO_NUMPROTO; i++) 1095 1095 kfree(nf_nat_l4protos[i]); 1096 1096 synchronize_net(); 1097 - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); 1097 + kvfree(nf_nat_bysource); 1098 1098 unregister_pernet_subsys(&nat_net_ops); 1099 1099 } 1100 1100