Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'BPF array map fixes and improvements'

Andrii Nakryiko says:

====================

Fix 32-bit overflow in value pointer calculations in BPF array map. And then
raise obsolete limit on array map value size. Add selftest making sure this is
working as intended.

v1->v2:
- fix broken patch #1 (no mask_index use in helper, as stated in commit
message; and add missing semicolon).
====================

Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+28 -18
+22 -18
kernel/bpf/arraymap.c
··· 70 70 attr->map_flags & BPF_F_PRESERVE_ELEMS) 71 71 return -EINVAL; 72 72 73 - if (attr->value_size > KMALLOC_MAX_SIZE) 74 - /* if value_size is bigger, the user space won't be able to 75 - * access the elements. 76 - */ 73 + /* avoid overflow on round_up(map->value_size) */ 74 + if (attr->value_size > INT_MAX) 77 75 return -E2BIG; 78 76 79 77 return 0; ··· 154 156 return &array->map; 155 157 } 156 158 159 + static void *array_map_elem_ptr(struct bpf_array* array, u32 index) 160 + { 161 + return array->value + (u64)array->elem_size * index; 162 + } 163 + 157 164 /* Called from syscall or from eBPF program */ 158 165 static void *array_map_lookup_elem(struct bpf_map *map, void *key) 159 166 { ··· 168 165 if (unlikely(index >= array->map.max_entries)) 169 166 return NULL; 170 167 171 - return array->value + array->elem_size * (index & array->index_mask); 168 + return array->value + (u64)array->elem_size * (index & array->index_mask); 172 169 } 173 170 174 171 static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, ··· 206 203 { 207 204 struct bpf_array *array = container_of(map, struct bpf_array, map); 208 205 struct bpf_insn *insn = insn_buf; 209 - u32 elem_size = round_up(map->value_size, 8); 206 + u32 elem_size = array->elem_size; 210 207 const int ret = BPF_REG_0; 211 208 const int map_ptr = BPF_REG_1; 212 209 const int index = BPF_REG_2; ··· 275 272 * access 'value_size' of them, so copying rounded areas 276 273 * will not leak any kernel data 277 274 */ 278 - size = round_up(map->value_size, 8); 275 + size = array->elem_size; 279 276 rcu_read_lock(); 280 277 pptr = array->pptrs[index & array->index_mask]; 281 278 for_each_possible_cpu(cpu) { ··· 342 339 value, map->value_size); 343 340 } else { 344 341 val = array->value + 345 - array->elem_size * (index & array->index_mask); 342 + (u64)array->elem_size * (index & array->index_mask); 346 343 if (map_flags & BPF_F_LOCK) 347 344 copy_map_value_locked(map, val, value, false); 348 345 else ··· 379 376 * returned or zeros which were zero-filled by percpu_alloc, 380 377 * so no kernel data leaks possible 381 378 */ 382 - size = round_up(map->value_size, 8); 379 + size = array->elem_size; 383 380 rcu_read_lock(); 384 381 pptr = array->pptrs[index & array->index_mask]; 385 382 for_each_possible_cpu(cpu) { ··· 411 408 return; 412 409 413 410 for (i = 0; i < array->map.max_entries; i++) 414 - bpf_timer_cancel_and_free(array->value + array->elem_size * i + 415 - map->timer_off); 411 + bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off); 416 412 } 417 413 418 414 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ ··· 422 420 423 421 if (map_value_has_kptrs(map)) { 424 422 for (i = 0; i < array->map.max_entries; i++) 425 - bpf_map_free_kptrs(map, array->value + array->elem_size * i); 423 + bpf_map_free_kptrs(map, array_map_elem_ptr(array, i)); 426 424 bpf_map_free_kptr_off_tab(map); 427 425 } 428 426 ··· 558 556 index = info->index & array->index_mask; 559 557 if (info->percpu_value_buf) 560 558 return array->pptrs[index]; 561 - return array->value + array->elem_size * index; 559 + return array_map_elem_ptr(array, index); 562 560 } 563 561 564 562 static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) ··· 577 575 index = info->index & array->index_mask; 578 576 if (info->percpu_value_buf) 579 577 return array->pptrs[index]; 580 - return array->value + array->elem_size * index; 578 + return array_map_elem_ptr(array, index); 581 579 } 582 580 583 581 static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) ··· 585 583 struct bpf_iter_seq_array_map_info *info = seq->private; 586 584 struct bpf_iter__bpf_map_elem ctx = {}; 587 585 struct bpf_map *map = info->map; 586 + struct bpf_array *array = container_of(map, struct bpf_array, map); 588 587 struct bpf_iter_meta meta; 589 588 struct bpf_prog *prog; 590 589 int off = 0, cpu = 0; ··· 606 603 ctx.value = v; 607 604 } else { 608 605 pptr = v; 609 - size = round_up(map->value_size, 8); 606 + size = array->elem_size; 610 607 for_each_possible_cpu(cpu) { 611 608 bpf_long_memcpy(info->percpu_value_buf + off, 612 609 per_cpu_ptr(pptr, cpu), ··· 636 633 { 637 634 struct bpf_iter_seq_array_map_info *seq_info = priv_data; 638 635 struct bpf_map *map = aux->map; 636 + struct bpf_array *array = container_of(map, struct bpf_array, map); 639 637 void *value_buf; 640 638 u32 buf_size; 641 639 642 640 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { 643 - buf_size = round_up(map->value_size, 8) * num_possible_cpus(); 641 + buf_size = array->elem_size * num_possible_cpus(); 644 642 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); 645 643 if (!value_buf) 646 644 return -ENOMEM; ··· 694 690 if (is_percpu) 695 691 val = this_cpu_ptr(array->pptrs[i]); 696 692 else 697 - val = array->value + array->elem_size * i; 693 + val = array_map_elem_ptr(array, i); 698 694 num_elems++; 699 695 key = i; 700 696 ret = callback_fn((u64)(long)map, (u64)(long)&key, ··· 1326 1322 struct bpf_insn *insn_buf) 1327 1323 { 1328 1324 struct bpf_array *array = container_of(map, struct bpf_array, map); 1329 - u32 elem_size = round_up(map->value_size, 8); 1325 + u32 elem_size = array->elem_size; 1330 1326 struct bpf_insn *insn = insn_buf; 1331 1327 const int ret = BPF_REG_0; 1332 1328 const int map_ptr = BPF_REG_1;
+2
tools/testing/selftests/bpf/prog_tests/skeleton.c
··· 122 122 123 123 ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); 124 124 125 + ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); 126 + 125 127 elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); 126 128 ASSERT_OK_PTR(elf_bytes, "elf_bytes"); 127 129 ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
+4
tools/testing/selftests/bpf/progs/test_skeleton.c
··· 51 51 int read_mostly_var __read_mostly; 52 52 int out_mostly_var; 53 53 54 + char huge_arr[16 * 1024 * 1024]; 55 + 54 56 SEC("raw_tp/sys_enter") 55 57 int handler(const void *ctx) 56 58 { ··· 72 70 out_dynarr[i] = in_dynarr[i]; 73 71 74 72 out_mostly_var = read_mostly_var; 73 + 74 + huge_arr[sizeof(huge_arr) - 1] = 123; 75 75 76 76 return 0; 77 77 }