Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'bpf-fixes-for-per-cpu-kptr'

Hou Tao says:

====================
bpf: Fixes for per-cpu kptr

From: Hou Tao <houtao1@huawei.com>

Hi,

The patchset aims to fix the problems found in the review of per-cpu
kptr patch-set [0]. Patch #1 moves pcpu_lock after the invocation of
pcpu_chunk_addr_search() and it is a micro-optimization for
free_percpu(). The reason includes it in the patch is that the same
logic is used in newly-added API pcpu_alloc_size(). Patch #2 introduces
pcpu_alloc_size() for dynamic per-cpu area. Patch #2 and #3 use
pcpu_alloc_size() to check whether or not unit_size matches with the
size of underlying per-cpu area and to select a matching bpf_mem_cache.
Patch #4 fixes the freeing of per-cpu kptr when these kptrs are freed by
map destruction. The last patch adds test cases for these problems.

Please see individual patches for details. And comments are always
welcome.

Change Log:
v3:
* rebased on bpf-next
* patch 2: update API document to note that pcpu_alloc_size() doesn't
support statically allocated per-cpu area. (Dennis)
* patch 1 & 2: add Acked-by from Dennis

v2: https://lore.kernel.org/bpf/20231018113343.2446300-1-houtao@huaweicloud.com/
* add a new patch "don't acquire pcpu_lock for pcpu_chunk_addr_search()"
* patch 2: change type of bit_off and end to unsigned long (Andrew)
* patch 2: rename the new API as pcpu_alloc_size and follow 80-column convention (Dennis)
* patch 5: move the common declaration into bpf.h (Stanislav, Alxei)

v1: https://lore.kernel.org/bpf/20231007135106.3031284-1-houtao@huaweicloud.com/

[0]: https://lore.kernel.org/bpf/20230827152729.1995219-1-yonghong.song@linux.dev
====================

Link: https://lore.kernel.org/r/20231020133202.4043247-1-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+271 -37
+1
include/linux/bpf.h
··· 2058 2058 bool btf_record_equal(const struct btf_record *rec_a, const struct btf_record *rec_b); 2059 2059 void bpf_obj_free_timer(const struct btf_record *rec, void *obj); 2060 2060 void bpf_obj_free_fields(const struct btf_record *rec, void *obj); 2061 + void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); 2061 2062 2062 2063 struct bpf_map *bpf_map_get(u32 ufd); 2063 2064 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
+1
include/linux/bpf_mem_alloc.h
··· 11 11 struct bpf_mem_alloc { 12 12 struct bpf_mem_caches __percpu *caches; 13 13 struct bpf_mem_cache __percpu *cache; 14 + bool percpu; 14 15 struct work_struct work; 15 16 }; 16 17
+1
include/linux/percpu.h
··· 132 132 extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); 133 133 extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); 134 134 extern void free_percpu(void __percpu *__pdata); 135 + extern size_t pcpu_alloc_size(void __percpu *__pdata); 135 136 136 137 DEFINE_FREE(free_percpu, void __percpu *, free_percpu(_T)) 137 138
+15 -11
kernel/bpf/helpers.c
··· 1811 1811 } 1812 1812 } 1813 1813 1814 - void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); 1815 - 1816 1814 void bpf_list_head_free(const struct btf_field *field, void *list_head, 1817 1815 struct bpf_spin_lock *spin_lock) 1818 1816 { ··· 1842 1844 * bpf_list_head which needs to be freed. 1843 1845 */ 1844 1846 migrate_disable(); 1845 - __bpf_obj_drop_impl(obj, field->graph_root.value_rec); 1847 + __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); 1846 1848 migrate_enable(); 1847 1849 } 1848 1850 } ··· 1881 1883 1882 1884 1883 1885 migrate_disable(); 1884 - __bpf_obj_drop_impl(obj, field->graph_root.value_rec); 1886 + __bpf_obj_drop_impl(obj, field->graph_root.value_rec, false); 1885 1887 migrate_enable(); 1886 1888 } 1887 1889 } ··· 1913 1915 } 1914 1916 1915 1917 /* Must be called under migrate_disable(), as required by bpf_mem_free */ 1916 - void __bpf_obj_drop_impl(void *p, const struct btf_record *rec) 1918 + void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) 1917 1919 { 1920 + struct bpf_mem_alloc *ma; 1921 + 1918 1922 if (rec && rec->refcount_off >= 0 && 1919 1923 !refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { 1920 1924 /* Object is refcounted and refcount_dec didn't result in 0 ··· 1928 1928 if (rec) 1929 1929 bpf_obj_free_fields(rec, p); 1930 1930 1931 - if (rec && rec->refcount_off >= 0) 1932 - bpf_mem_free_rcu(&bpf_global_ma, p); 1931 + if (percpu) 1932 + ma = &bpf_global_percpu_ma; 1933 1933 else 1934 - bpf_mem_free(&bpf_global_ma, p); 1934 + ma = &bpf_global_ma; 1935 + if (rec && rec->refcount_off >= 0) 1936 + bpf_mem_free_rcu(ma, p); 1937 + else 1938 + bpf_mem_free(ma, p); 1935 1939 } 1936 1940 1937 1941 __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) ··· 1943 1939 struct btf_struct_meta *meta = meta__ign; 1944 1940 void *p = p__alloc; 1945 1941 1946 - __bpf_obj_drop_impl(p, meta ? meta->record : NULL); 1942 + __bpf_obj_drop_impl(p, meta ? meta->record : NULL, false); 1947 1943 } 1948 1944 1949 1945 __bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign) ··· 1987 1983 */ 1988 1984 if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { 1989 1985 /* Only called from BPF prog, no need to migrate_disable */ 1990 - __bpf_obj_drop_impl((void *)n - off, rec); 1986 + __bpf_obj_drop_impl((void *)n - off, rec, false); 1991 1987 return -EINVAL; 1992 1988 } 1993 1989 ··· 2086 2082 */ 2087 2083 if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { 2088 2084 /* Only called from BPF prog, no need to migrate_disable */ 2089 - __bpf_obj_drop_impl((void *)n - off, rec); 2085 + __bpf_obj_drop_impl((void *)n - off, rec, false); 2090 2086 return -EINVAL; 2091 2087 } 2092 2088
+26 -12
kernel/bpf/memalloc.c
··· 491 491 struct llist_node *first; 492 492 unsigned int obj_size; 493 493 494 - /* For per-cpu allocator, the size of free objects in free list doesn't 495 - * match with unit_size and now there is no way to get the size of 496 - * per-cpu pointer saved in free object, so just skip the checking. 497 - */ 498 - if (c->percpu_size) 499 - return 0; 500 - 501 494 first = c->free_llist.first; 502 495 if (!first) 503 496 return 0; 504 497 505 - obj_size = ksize(first); 498 + if (c->percpu_size) 499 + obj_size = pcpu_alloc_size(((void **)first)[1]); 500 + else 501 + obj_size = ksize(first); 506 502 if (obj_size != c->unit_size) { 507 - WARN_ONCE(1, "bpf_mem_cache[%u]: unexpected object size %u, expect %u\n", 508 - idx, obj_size, c->unit_size); 503 + WARN_ONCE(1, "bpf_mem_cache[%u]: percpu %d, unexpected object size %u, expect %u\n", 504 + idx, c->percpu_size, obj_size, c->unit_size); 509 505 return -EINVAL; 510 506 } 511 507 return 0; ··· 525 529 /* room for llist_node and per-cpu pointer */ 526 530 if (percpu) 527 531 percpu_size = LLIST_NODE_SZ + sizeof(void *); 532 + ma->percpu = percpu; 528 533 529 534 if (size) { 530 535 pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); ··· 875 878 return !ret ? NULL : ret + LLIST_NODE_SZ; 876 879 } 877 880 881 + static notrace int bpf_mem_free_idx(void *ptr, bool percpu) 882 + { 883 + size_t size; 884 + 885 + if (percpu) 886 + size = pcpu_alloc_size(*((void **)ptr)); 887 + else 888 + size = ksize(ptr - LLIST_NODE_SZ); 889 + return bpf_mem_cache_idx(size); 890 + } 891 + 878 892 void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) 879 893 { 880 894 int idx; ··· 893 885 if (!ptr) 894 886 return; 895 887 896 - idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ)); 888 + idx = bpf_mem_free_idx(ptr, ma->percpu); 897 889 if (idx < 0) 898 890 return; 899 891 ··· 907 899 if (!ptr) 908 900 return; 909 901 910 - idx = bpf_mem_cache_idx(ksize(ptr - LLIST_NODE_SZ)); 902 + idx = bpf_mem_free_idx(ptr, ma->percpu); 911 903 if (idx < 0) 912 904 return; 913 905 ··· 981 973 return !ret ? NULL : ret + LLIST_NODE_SZ; 982 974 } 983 975 976 + /* The alignment of dynamic per-cpu area is 8, so c->unit_size and the 977 + * actual size of dynamic per-cpu area will always be matched and there is 978 + * no need to adjust size_index for per-cpu allocation. However for the 979 + * simplicity of the implementation, use an unified size_index for both 980 + * kmalloc and per-cpu allocation. 981 + */ 984 982 static __init int bpf_mem_cache_adjust_size(void) 985 983 { 986 984 unsigned int size;
+2 -4
kernel/bpf/syscall.c
··· 626 626 bpf_timer_cancel_and_free(obj + rec->timer_off); 627 627 } 628 628 629 - extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec); 630 - 631 629 void bpf_obj_free_fields(const struct btf_record *rec, void *obj) 632 630 { 633 631 const struct btf_field *fields; ··· 660 662 field->kptr.btf_id); 661 663 migrate_disable(); 662 664 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 663 - pointee_struct_meta->record : 664 - NULL); 665 + pointee_struct_meta->record : NULL, 666 + fields[i].type == BPF_KPTR_PERCPU); 665 667 migrate_enable(); 666 668 } else { 667 669 field->kptr.dtor(xchgd_field);
+32 -3
mm/percpu.c
··· 2245 2245 } 2246 2246 2247 2247 /** 2248 + * pcpu_alloc_size - the size of the dynamic percpu area 2249 + * @ptr: pointer to the dynamic percpu area 2250 + * 2251 + * Returns the size of the @ptr allocation. This is undefined for statically 2252 + * defined percpu variables as there is no corresponding chunk->bound_map. 2253 + * 2254 + * RETURNS: 2255 + * The size of the dynamic percpu area. 2256 + * 2257 + * CONTEXT: 2258 + * Can be called from atomic context. 2259 + */ 2260 + size_t pcpu_alloc_size(void __percpu *ptr) 2261 + { 2262 + struct pcpu_chunk *chunk; 2263 + unsigned long bit_off, end; 2264 + void *addr; 2265 + 2266 + if (!ptr) 2267 + return 0; 2268 + 2269 + addr = __pcpu_ptr_to_addr(ptr); 2270 + /* No pcpu_lock here: ptr has not been freed, so chunk is still alive */ 2271 + chunk = pcpu_chunk_addr_search(addr); 2272 + bit_off = (addr - chunk->base_addr) / PCPU_MIN_ALLOC_SIZE; 2273 + end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk), 2274 + bit_off + 1); 2275 + return (end - bit_off) * PCPU_MIN_ALLOC_SIZE; 2276 + } 2277 + 2278 + /** 2248 2279 * free_percpu - free percpu area 2249 2280 * @ptr: pointer to area to free 2250 2281 * ··· 2298 2267 kmemleak_free_percpu(ptr); 2299 2268 2300 2269 addr = __pcpu_ptr_to_addr(ptr); 2301 - 2302 - spin_lock_irqsave(&pcpu_lock, flags); 2303 - 2304 2270 chunk = pcpu_chunk_addr_search(addr); 2305 2271 off = addr - chunk->base_addr; 2306 2272 2273 + spin_lock_irqsave(&pcpu_lock, flags); 2307 2274 size = pcpu_free_area(chunk, off); 2308 2275 2309 2276 pcpu_memcg_free_hook(chunk, off, size);
+19 -1
tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
··· 9 9 10 10 #include "test_bpf_ma.skel.h" 11 11 12 - void test_test_bpf_ma(void) 12 + static void do_bpf_ma_test(const char *name) 13 13 { 14 14 struct test_bpf_ma *skel; 15 + struct bpf_program *prog; 15 16 struct btf *btf; 16 17 int i, err; 17 18 ··· 35 34 skel->rodata->data_btf_ids[i] = id; 36 35 } 37 36 37 + prog = bpf_object__find_program_by_name(skel->obj, name); 38 + if (!ASSERT_OK_PTR(prog, "invalid prog name")) 39 + goto out; 40 + bpf_program__set_autoload(prog, true); 41 + 38 42 err = test_bpf_ma__load(skel); 39 43 if (!ASSERT_OK(err, "load")) 40 44 goto out; ··· 53 47 ASSERT_OK(skel->bss->err, "test error"); 54 48 out: 55 49 test_bpf_ma__destroy(skel); 50 + } 51 + 52 + void test_test_bpf_ma(void) 53 + { 54 + if (test__start_subtest("batch_alloc_free")) 55 + do_bpf_ma_test("test_batch_alloc_free"); 56 + if (test__start_subtest("free_through_map_free")) 57 + do_bpf_ma_test("test_free_through_map_free"); 58 + if (test__start_subtest("batch_percpu_alloc_free")) 59 + do_bpf_ma_test("test_batch_percpu_alloc_free"); 60 + if (test__start_subtest("percpu_free_through_map_free")) 61 + do_bpf_ma_test("test_percpu_free_through_map_free"); 56 62 }
+174 -6
tools/testing/selftests/bpf/progs/test_bpf_ma.c
··· 37 37 __type(key, int); \ 38 38 __type(value, struct map_value_##_size); \ 39 39 __uint(max_entries, 128); \ 40 - } array_##_size SEC(".maps"); 40 + } array_##_size SEC(".maps") 41 41 42 - static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch, 43 - unsigned int idx) 42 + #define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \ 43 + struct map_value_percpu_##_size { \ 44 + struct bin_data_##_size __percpu_kptr * data; \ 45 + }; \ 46 + struct { \ 47 + __uint(type, BPF_MAP_TYPE_ARRAY); \ 48 + __type(key, int); \ 49 + __type(value, struct map_value_percpu_##_size); \ 50 + __uint(max_entries, 128); \ 51 + } array_percpu_##_size SEC(".maps") 52 + 53 + static __always_inline void batch_alloc(struct bpf_map *map, unsigned int batch, unsigned int idx) 44 54 { 45 55 struct generic_map_value *value; 46 56 unsigned int i, key; ··· 75 65 return; 76 66 } 77 67 } 68 + } 69 + 70 + static __always_inline void batch_free(struct bpf_map *map, unsigned int batch, unsigned int idx) 71 + { 72 + struct generic_map_value *value; 73 + unsigned int i, key; 74 + void *old; 75 + 78 76 for (i = 0; i < batch; i++) { 79 77 key = i; 80 78 value = bpf_map_lookup_elem(map, &key); ··· 99 81 } 100 82 } 101 83 84 + static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int batch, 85 + unsigned int idx) 86 + { 87 + struct generic_map_value *value; 88 + unsigned int i, key; 89 + void *old, *new; 90 + 91 + for (i = 0; i < batch; i++) { 92 + key = i; 93 + value = bpf_map_lookup_elem(map, &key); 94 + if (!value) { 95 + err = 1; 96 + return; 97 + } 98 + /* per-cpu allocator may not be able to refill in time */ 99 + new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL); 100 + if (!new) 101 + continue; 102 + 103 + old = bpf_kptr_xchg(&value->data, new); 104 + if (old) { 105 + bpf_percpu_obj_drop(old); 106 + err = 2; 107 + return; 108 + } 109 + } 110 + } 111 + 112 + static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch, 113 + unsigned int idx) 114 + { 115 + struct generic_map_value *value; 116 + unsigned int i, key; 117 + void *old; 118 + 119 + for (i = 0; i < batch; i++) { 120 + key = i; 121 + value = bpf_map_lookup_elem(map, &key); 122 + if (!value) { 123 + err = 3; 124 + return; 125 + } 126 + old = bpf_kptr_xchg(&value->data, NULL); 127 + if (!old) 128 + continue; 129 + bpf_percpu_obj_drop(old); 130 + } 131 + } 132 + 133 + #define CALL_BATCH_ALLOC(size, batch, idx) \ 134 + batch_alloc((struct bpf_map *)(&array_##size), batch, idx) 135 + 102 136 #define CALL_BATCH_ALLOC_FREE(size, batch, idx) \ 103 - batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx) 137 + do { \ 138 + batch_alloc((struct bpf_map *)(&array_##size), batch, idx); \ 139 + batch_free((struct bpf_map *)(&array_##size), batch, idx); \ 140 + } while (0) 141 + 142 + #define CALL_BATCH_PERCPU_ALLOC(size, batch, idx) \ 143 + batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx) 144 + 145 + #define CALL_BATCH_PERCPU_ALLOC_FREE(size, batch, idx) \ 146 + do { \ 147 + batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx); \ 148 + batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \ 149 + } while (0) 104 150 105 151 DEFINE_ARRAY_WITH_KPTR(8); 106 152 DEFINE_ARRAY_WITH_KPTR(16); ··· 179 97 DEFINE_ARRAY_WITH_KPTR(2048); 180 98 DEFINE_ARRAY_WITH_KPTR(4096); 181 99 182 - SEC("fentry/" SYS_PREFIX "sys_nanosleep") 183 - int test_bpf_mem_alloc_free(void *ctx) 100 + /* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */ 101 + DEFINE_ARRAY_WITH_PERCPU_KPTR(16); 102 + DEFINE_ARRAY_WITH_PERCPU_KPTR(32); 103 + DEFINE_ARRAY_WITH_PERCPU_KPTR(64); 104 + DEFINE_ARRAY_WITH_PERCPU_KPTR(96); 105 + DEFINE_ARRAY_WITH_PERCPU_KPTR(128); 106 + DEFINE_ARRAY_WITH_PERCPU_KPTR(192); 107 + DEFINE_ARRAY_WITH_PERCPU_KPTR(256); 108 + DEFINE_ARRAY_WITH_PERCPU_KPTR(512); 109 + DEFINE_ARRAY_WITH_PERCPU_KPTR(1024); 110 + DEFINE_ARRAY_WITH_PERCPU_KPTR(2048); 111 + DEFINE_ARRAY_WITH_PERCPU_KPTR(4096); 112 + 113 + SEC("?fentry/" SYS_PREFIX "sys_nanosleep") 114 + int test_batch_alloc_free(void *ctx) 184 115 { 185 116 if ((u32)bpf_get_current_pid_tgid() != pid) 186 117 return 0; ··· 213 118 CALL_BATCH_ALLOC_FREE(1024, 32, 9); 214 119 CALL_BATCH_ALLOC_FREE(2048, 16, 10); 215 120 CALL_BATCH_ALLOC_FREE(4096, 8, 11); 121 + 122 + return 0; 123 + } 124 + 125 + SEC("?fentry/" SYS_PREFIX "sys_nanosleep") 126 + int test_free_through_map_free(void *ctx) 127 + { 128 + if ((u32)bpf_get_current_pid_tgid() != pid) 129 + return 0; 130 + 131 + /* Alloc 128 8-bytes objects in batch to trigger refilling, 132 + * then free these objects through map free. 133 + */ 134 + CALL_BATCH_ALLOC(8, 128, 0); 135 + CALL_BATCH_ALLOC(16, 128, 1); 136 + CALL_BATCH_ALLOC(32, 128, 2); 137 + CALL_BATCH_ALLOC(64, 128, 3); 138 + CALL_BATCH_ALLOC(96, 128, 4); 139 + CALL_BATCH_ALLOC(128, 128, 5); 140 + CALL_BATCH_ALLOC(192, 128, 6); 141 + CALL_BATCH_ALLOC(256, 128, 7); 142 + CALL_BATCH_ALLOC(512, 64, 8); 143 + CALL_BATCH_ALLOC(1024, 32, 9); 144 + CALL_BATCH_ALLOC(2048, 16, 10); 145 + CALL_BATCH_ALLOC(4096, 8, 11); 146 + 147 + return 0; 148 + } 149 + 150 + SEC("?fentry/" SYS_PREFIX "sys_nanosleep") 151 + int test_batch_percpu_alloc_free(void *ctx) 152 + { 153 + if ((u32)bpf_get_current_pid_tgid() != pid) 154 + return 0; 155 + 156 + /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, 157 + * then free 128 16-bytes per-cpu objects in batch to trigger freeing. 158 + */ 159 + CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1); 160 + CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2); 161 + CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3); 162 + CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4); 163 + CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5); 164 + CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6); 165 + CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7); 166 + CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8); 167 + CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9); 168 + CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10); 169 + CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11); 170 + 171 + return 0; 172 + } 173 + 174 + SEC("?fentry/" SYS_PREFIX "sys_nanosleep") 175 + int test_percpu_free_through_map_free(void *ctx) 176 + { 177 + if ((u32)bpf_get_current_pid_tgid() != pid) 178 + return 0; 179 + 180 + /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling, 181 + * then free these object through map free. 182 + */ 183 + CALL_BATCH_PERCPU_ALLOC(16, 128, 1); 184 + CALL_BATCH_PERCPU_ALLOC(32, 128, 2); 185 + CALL_BATCH_PERCPU_ALLOC(64, 128, 3); 186 + CALL_BATCH_PERCPU_ALLOC(96, 128, 4); 187 + CALL_BATCH_PERCPU_ALLOC(128, 128, 5); 188 + CALL_BATCH_PERCPU_ALLOC(192, 128, 6); 189 + CALL_BATCH_PERCPU_ALLOC(256, 128, 7); 190 + CALL_BATCH_PERCPU_ALLOC(512, 64, 8); 191 + CALL_BATCH_PERCPU_ALLOC(1024, 32, 9); 192 + CALL_BATCH_PERCPU_ALLOC(2048, 16, 10); 193 + CALL_BATCH_PERCPU_ALLOC(4096, 8, 11); 216 194 217 195 return 0; 218 196 }