Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Andrii Nakryiko says:

====================
bpf 2022-11-11

We've added 11 non-merge commits during the last 8 day(s) which contain
a total of 11 files changed, 83 insertions(+), 74 deletions(-).

The main changes are:

1) Fix strncpy_from_kernel_nofault() to prevent out-of-bounds writes,
from Alban Crequy.

2) Fix for bpf_prog_test_run_skb() to prevent wrong alignment,
from Baisong Zhong.

3) Switch BPF_DISPATCHER to static_call() instead of ftrace infra, with
a small build fix on top, from Peter Zijlstra and Nathan Chancellor.

4) Fix memory leak in BPF verifier in some error cases, from Wang Yufen.

5) 32-bit compilation error fixes for BPF selftests, from Pu Lehui and
Yang Jihong.

6) Ensure even distribution of per-CPU free list elements, from Xu Kuohai.

7) Fix copy_map_value() to track special zeroed out areas properly,
from Xu Kuohai.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
bpf: Fix offset calculation error in __copy_map_value and zero_map_value
bpf: Initialize same number of free nodes for each pcpu_freelist
selftests: bpf: Add a test when bpf_probe_read_kernel_str() returns EFAULT
maccess: Fix writing offset in case of fault in strncpy_from_kernel_nofault()
selftests/bpf: Fix test_progs compilation failure in 32-bit arch
selftests/bpf: Fix casting error when cross-compiling test_verifier for 32-bit platforms
bpf: Fix memory leaks in __check_func_call
bpf: Add explicit cast to 'void *' for __BPF_DISPATCHER_UPDATE()
bpf: Convert BPF_DISPATCHER to use static_call() (not ftrace)
bpf: Revert ("Fix dispatcher patchable function entry to 5 bytes nop")
bpf, test_run: Fix alignment problem in bpf_prog_test_run_skb()
====================

Link: https://lore.kernel.org/r/20221111231624.938829-1-andrii@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+82 -73
-13
arch/x86/net/bpf_jit_comp.c
··· 11 11 #include <linux/bpf.h> 12 12 #include <linux/memory.h> 13 13 #include <linux/sort.h> 14 - #include <linux/init.h> 15 14 #include <asm/extable.h> 16 15 #include <asm/set_memory.h> 17 16 #include <asm/nospec-branch.h> ··· 386 387 out: 387 388 mutex_unlock(&text_mutex); 388 389 return ret; 389 - } 390 - 391 - int __init bpf_arch_init_dispatcher_early(void *ip) 392 - { 393 - const u8 *nop_insn = x86_nops[5]; 394 - 395 - if (is_endbr(*(u32 *)ip)) 396 - ip += ENDBR_INSN_SIZE; 397 - 398 - if (memcmp(ip, nop_insn, X86_PATCH_SIZE)) 399 - text_poke_early(ip, nop_insn, X86_PATCH_SIZE); 400 - return 0; 401 390 } 402 391 403 392 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+39 -21
include/linux/bpf.h
··· 27 27 #include <linux/bpfptr.h> 28 28 #include <linux/btf.h> 29 29 #include <linux/rcupdate_trace.h> 30 - #include <linux/init.h> 30 + #include <linux/static_call.h> 31 31 32 32 struct bpf_verifier_env; 33 33 struct bpf_verifier_log; ··· 315 315 u32 next_off = map->off_arr->field_off[i]; 316 316 317 317 memcpy(dst + curr_off, src + curr_off, next_off - curr_off); 318 - curr_off += map->off_arr->field_sz[i]; 318 + curr_off = next_off + map->off_arr->field_sz[i]; 319 319 } 320 320 memcpy(dst + curr_off, src + curr_off, map->value_size - curr_off); 321 321 } ··· 344 344 u32 next_off = map->off_arr->field_off[i]; 345 345 346 346 memset(dst + curr_off, 0, next_off - curr_off); 347 - curr_off += map->off_arr->field_sz[i]; 347 + curr_off = next_off + map->off_arr->field_sz[i]; 348 348 } 349 349 memset(dst + curr_off, 0, map->value_size - curr_off); 350 350 } ··· 954 954 void *rw_image; 955 955 u32 image_off; 956 956 struct bpf_ksym ksym; 957 + #ifdef CONFIG_HAVE_STATIC_CALL 958 + struct static_call_key *sc_key; 959 + void *sc_tramp; 960 + #endif 957 961 }; 958 962 959 963 static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func( ··· 975 971 struct bpf_attach_target_info *tgt_info); 976 972 void bpf_trampoline_put(struct bpf_trampoline *tr); 977 973 int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); 978 - int __init bpf_arch_init_dispatcher_early(void *ip); 974 + 975 + /* 976 + * When the architecture supports STATIC_CALL replace the bpf_dispatcher_fn 977 + * indirection with a direct call to the bpf program. If the architecture does 978 + * not have STATIC_CALL, avoid a double-indirection. 979 + */ 980 + #ifdef CONFIG_HAVE_STATIC_CALL 981 + 982 + #define __BPF_DISPATCHER_SC_INIT(_name) \ 983 + .sc_key = &STATIC_CALL_KEY(_name), \ 984 + .sc_tramp = STATIC_CALL_TRAMP_ADDR(_name), 985 + 986 + #define __BPF_DISPATCHER_SC(name) \ 987 + DEFINE_STATIC_CALL(bpf_dispatcher_##name##_call, bpf_dispatcher_nop_func) 988 + 989 + #define __BPF_DISPATCHER_CALL(name) \ 990 + static_call(bpf_dispatcher_##name##_call)(ctx, insnsi, bpf_func) 991 + 992 + #define __BPF_DISPATCHER_UPDATE(_d, _new) \ 993 + __static_call_update((_d)->sc_key, (_d)->sc_tramp, (_new)) 994 + 995 + #else 996 + #define __BPF_DISPATCHER_SC_INIT(name) 997 + #define __BPF_DISPATCHER_SC(name) 998 + #define __BPF_DISPATCHER_CALL(name) bpf_func(ctx, insnsi) 999 + #define __BPF_DISPATCHER_UPDATE(_d, _new) 1000 + #endif 979 1001 980 1002 #define BPF_DISPATCHER_INIT(_name) { \ 981 1003 .mutex = __MUTEX_INITIALIZER(_name.mutex), \ ··· 1014 984 .name = #_name, \ 1015 985 .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ 1016 986 }, \ 987 + __BPF_DISPATCHER_SC_INIT(_name##_call) \ 1017 988 } 1018 989 1019 - #define BPF_DISPATCHER_INIT_CALL(_name) \ 1020 - static int __init _name##_init(void) \ 1021 - { \ 1022 - return bpf_arch_init_dispatcher_early(_name##_func); \ 1023 - } \ 1024 - early_initcall(_name##_init) 1025 - 1026 - #ifdef CONFIG_X86_64 1027 - #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) 1028 - #else 1029 - #define BPF_DISPATCHER_ATTRIBUTES 1030 - #endif 1031 - 1032 990 #define DEFINE_BPF_DISPATCHER(name) \ 1033 - notrace BPF_DISPATCHER_ATTRIBUTES \ 991 + __BPF_DISPATCHER_SC(name); \ 1034 992 noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ 1035 993 const void *ctx, \ 1036 994 const struct bpf_insn *insnsi, \ 1037 995 bpf_func_t bpf_func) \ 1038 996 { \ 1039 - return bpf_func(ctx, insnsi); \ 997 + return __BPF_DISPATCHER_CALL(name); \ 1040 998 } \ 1041 999 EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ 1042 1000 struct bpf_dispatcher bpf_dispatcher_##name = \ 1043 - BPF_DISPATCHER_INIT(bpf_dispatcher_##name); \ 1044 - BPF_DISPATCHER_INIT_CALL(bpf_dispatcher_##name); 1001 + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); 1045 1002 1046 1003 #define DECLARE_BPF_DISPATCHER(name) \ 1047 1004 unsigned int bpf_dispatcher_##name##_func( \ ··· 1036 1019 const struct bpf_insn *insnsi, \ 1037 1020 bpf_func_t bpf_func); \ 1038 1021 extern struct bpf_dispatcher bpf_dispatcher_##name; 1022 + 1039 1023 #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func 1040 1024 #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) 1041 1025 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
+7 -19
kernel/bpf/dispatcher.c
··· 4 4 #include <linux/hash.h> 5 5 #include <linux/bpf.h> 6 6 #include <linux/filter.h> 7 - #include <linux/init.h> 7 + #include <linux/static_call.h> 8 8 9 9 /* The BPF dispatcher is a multiway branch code generator. The 10 10 * dispatcher is a mechanism to avoid the performance penalty of an ··· 91 91 return -ENOTSUPP; 92 92 } 93 93 94 - int __weak __init bpf_arch_init_dispatcher_early(void *ip) 95 - { 96 - return -ENOTSUPP; 97 - } 98 - 99 94 static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) 100 95 { 101 96 s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; ··· 105 110 106 111 static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) 107 112 { 108 - void *old, *new, *tmp; 109 - u32 noff; 110 - int err; 113 + void *new, *tmp; 114 + u32 noff = 0; 111 115 112 - if (!prev_num_progs) { 113 - old = NULL; 114 - noff = 0; 115 - } else { 116 - old = d->image + d->image_off; 116 + if (prev_num_progs) 117 117 noff = d->image_off ^ (PAGE_SIZE / 2); 118 - } 119 118 120 119 new = d->num_progs ? d->image + noff : NULL; 121 120 tmp = d->num_progs ? d->rw_image + noff : NULL; ··· 123 134 return; 124 135 } 125 136 126 - err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); 127 - if (err || !new) 128 - return; 137 + __BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func); 129 138 130 - d->image_off = noff; 139 + if (new) 140 + d->image_off = noff; 131 141 } 132 142 133 143 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
+11 -12
kernel/bpf/percpu_freelist.c
··· 100 100 u32 nr_elems) 101 101 { 102 102 struct pcpu_freelist_head *head; 103 - int i, cpu, pcpu_entries; 103 + unsigned int cpu, cpu_idx, i, j, n, m; 104 104 105 - pcpu_entries = nr_elems / num_possible_cpus() + 1; 106 - i = 0; 105 + n = nr_elems / num_possible_cpus(); 106 + m = nr_elems % num_possible_cpus(); 107 107 108 + cpu_idx = 0; 108 109 for_each_possible_cpu(cpu) { 109 - again: 110 110 head = per_cpu_ptr(s->freelist, cpu); 111 - /* No locking required as this is not visible yet. */ 112 - pcpu_freelist_push_node(head, buf); 113 - i++; 114 - buf += elem_size; 115 - if (i == nr_elems) 116 - break; 117 - if (i % pcpu_entries) 118 - goto again; 111 + j = n + (cpu_idx < m ? 1 : 0); 112 + for (i = 0; i < j; i++) { 113 + /* No locking required as this is not visible yet. */ 114 + pcpu_freelist_push_node(head, buf); 115 + buf += elem_size; 116 + } 117 + cpu_idx++; 119 118 } 120 119 } 121 120
+9 -5
kernel/bpf/verifier.c
··· 6745 6745 /* Transfer references to the callee */ 6746 6746 err = copy_reference_state(callee, caller); 6747 6747 if (err) 6748 - return err; 6748 + goto err_out; 6749 6749 6750 6750 err = set_callee_state_cb(env, caller, callee, *insn_idx); 6751 6751 if (err) 6752 - return err; 6752 + goto err_out; 6753 6753 6754 6754 clear_caller_saved_regs(env, caller->regs); 6755 6755 ··· 6766 6766 print_verifier_state(env, callee, true); 6767 6767 } 6768 6768 return 0; 6769 + 6770 + err_out: 6771 + free_func_state(callee); 6772 + state->frame[state->curframe + 1] = NULL; 6773 + return err; 6769 6774 } 6770 6775 6771 6776 int map_set_for_each_callback_args(struct bpf_verifier_env *env, ··· 6984 6979 return -EINVAL; 6985 6980 } 6986 6981 6987 - state->curframe--; 6988 - caller = state->frame[state->curframe]; 6982 + caller = state->frame[state->curframe - 1]; 6989 6983 if (callee->in_callback_fn) { 6990 6984 /* enforce R0 return value range [0, 1]. */ 6991 6985 struct tnum range = callee->callback_ret_range; ··· 7023 7019 } 7024 7020 /* clear everything in the callee */ 7025 7021 free_func_state(callee); 7026 - state->frame[state->curframe + 1] = NULL; 7022 + state->frame[state->curframe--] = NULL; 7027 7023 return 0; 7028 7024 } 7029 7025
+1 -1
mm/maccess.c
··· 97 97 return src - unsafe_addr; 98 98 Efault: 99 99 pagefault_enable(); 100 - dst[-1] = '\0'; 100 + dst[0] = '\0'; 101 101 return -EFAULT; 102 102 } 103 103
+1
net/bpf/test_run.c
··· 774 774 if (user_size > size) 775 775 return ERR_PTR(-EMSGSIZE); 776 776 777 + size = SKB_DATA_ALIGN(size); 777 778 data = kzalloc(size + headroom + tailroom, GFP_USER); 778 779 if (!data) 779 780 return ERR_PTR(-ENOMEM);
+7
tools/testing/selftests/bpf/prog_tests/varlen.c
··· 63 63 CHECK_VAL(data->total4, size1 + size2); 64 64 CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", 65 65 "doesn't match!\n"); 66 + 67 + CHECK_VAL(bss->ret_bad_read, -EFAULT); 68 + CHECK_VAL(data->payload_bad[0], 0x42); 69 + CHECK_VAL(data->payload_bad[1], 0x42); 70 + CHECK_VAL(data->payload_bad[2], 0); 71 + CHECK_VAL(data->payload_bad[3], 0x42); 72 + CHECK_VAL(data->payload_bad[4], 0x42); 66 73 cleanup: 67 74 test_varlen__destroy(skel); 68 75 }
+5
tools/testing/selftests/bpf/progs/test_varlen.c
··· 19 19 __u64 payload1_len2 = 0; 20 20 __u64 total1 = 0; 21 21 char payload1[MAX_LEN + MAX_LEN] = {}; 22 + __u64 ret_bad_read = 0; 22 23 23 24 /* .data */ 24 25 int payload2_len1 = -1; ··· 36 35 int payload4_len2 = -1; 37 36 int total4= -1; 38 37 char payload4[MAX_LEN + MAX_LEN] = { 1 }; 38 + 39 + char payload_bad[5] = { 0x42, 0x42, 0x42, 0x42, 0x42 }; 39 40 40 41 SEC("raw_tp/sys_enter") 41 42 int handler64_unsigned(void *regs) ··· 63 60 } 64 61 65 62 total1 = payload - (void *)payload1; 63 + 64 + ret_bad_read = bpf_probe_read_kernel_str(payload_bad + 2, 1, (void *) -1); 66 65 67 66 return 0; 68 67 }
+1 -1
tools/testing/selftests/bpf/test_progs.c
··· 1010 1010 msg->subtest_done.have_log); 1011 1011 break; 1012 1012 case MSG_TEST_LOG: 1013 - sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)", 1013 + sprintf(buf, "MSG_TEST_LOG (cnt: %zu, last: %d)", 1014 1014 strlen(msg->test_log.log_buf), 1015 1015 msg->test_log.is_last); 1016 1016 break;
+1 -1
tools/testing/selftests/bpf/test_verifier.c
··· 1260 1260 1261 1261 bzero(&info, sizeof(info)); 1262 1262 info.xlated_prog_len = xlated_prog_len; 1263 - info.xlated_prog_insns = (__u64)*buf; 1263 + info.xlated_prog_insns = (__u64)(unsigned long)*buf; 1264 1264 if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { 1265 1265 perror("second bpf_obj_get_info_by_fd failed"); 1266 1266 goto out_free_buf;