Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Introduce bpf_per_cpu_ptr()

Add bpf_per_cpu_ptr() to help bpf programs access percpu vars.
bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the kernel
except that it may return NULL. This happens when the cpu parameter is
out of range. So the caller must check the returned value.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200929235049.2533242-5-haoluo@google.com

authored by

Hao Luo and committed by
Alexei Starovoitov
eaa6bcb7 2c2f6abe

+132 -13
+4
include/linux/bpf.h
··· 293 293 ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ 294 294 ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ 295 295 ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ 296 + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ 296 297 __BPF_ARG_TYPE_MAX, 297 298 }; 298 299 ··· 308 307 RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ 309 308 RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ 310 309 RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ 310 + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ 311 311 }; 312 312 313 313 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs ··· 407 405 PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ 408 406 PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ 409 407 PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ 408 + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ 410 409 }; 411 410 412 411 /* The information passed from prog-specific *_is_valid_access ··· 1831 1828 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto; 1832 1829 extern const struct bpf_func_proto bpf_copy_from_user_proto; 1833 1830 extern const struct bpf_func_proto bpf_snprintf_btf_proto; 1831 + extern const struct bpf_func_proto bpf_per_cpu_ptr_proto; 1834 1832 1835 1833 const struct bpf_func_proto *bpf_tracing_func_proto( 1836 1834 enum bpf_func_id func_id, const struct bpf_prog *prog);
+11
include/linux/btf.h
··· 110 110 i < btf_type_vlen(struct_type); \ 111 111 i++, member++) 112 112 113 + #define for_each_vsi(i, datasec_type, member) \ 114 + for (i = 0, member = btf_type_var_secinfo(datasec_type); \ 115 + i < btf_type_vlen(datasec_type); \ 116 + i++, member++) 117 + 113 118 static inline bool btf_type_is_ptr(const struct btf_type *t) 114 119 { 115 120 return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; ··· 197 192 static inline const struct btf_member *btf_type_member(const struct btf_type *t) 198 193 { 199 194 return (const struct btf_member *)(t + 1); 195 + } 196 + 197 + static inline const struct btf_var_secinfo *btf_type_var_secinfo( 198 + const struct btf_type *t) 199 + { 200 + return (const struct btf_var_secinfo *)(t + 1); 200 201 } 201 202 202 203 #ifdef CONFIG_BPF_SYSCALL
+18
include/uapi/linux/bpf.h
··· 3686 3686 * Return 3687 3687 * The helper returns **TC_ACT_REDIRECT** on success or 3688 3688 * **TC_ACT_SHOT** on error. 3689 + * 3690 + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) 3691 + * Description 3692 + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a 3693 + * pointer to the percpu kernel variable on *cpu*. A ksym is an 3694 + * extern variable decorated with '__ksym'. For ksym, there is a 3695 + * global var (either static or global) defined of the same name 3696 + * in the kernel. The ksym is percpu if the global var is percpu. 3697 + * The returned pointer points to the global percpu var on *cpu*. 3698 + * 3699 + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the 3700 + * kernel, except that bpf_per_cpu_ptr() may return NULL. This 3701 + * happens if *cpu* is larger than nr_cpu_ids. The caller of 3702 + * bpf_per_cpu_ptr() must check the returned value. 3703 + * Return 3704 + * A pointer pointing to the kernel percpu variable on *cpu*, or 3705 + * NULL, if *cpu* is invalid. 3689 3706 */ 3690 3707 #define __BPF_FUNC_MAPPER(FN) \ 3691 3708 FN(unspec), \ ··· 3858 3841 FN(seq_printf_btf), \ 3859 3842 FN(skb_cgroup_classid), \ 3860 3843 FN(redirect_neigh), \ 3844 + FN(bpf_per_cpu_ptr), \ 3861 3845 /* */ 3862 3846 3863 3847 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-10
kernel/bpf/btf.c
··· 188 188 i < btf_type_vlen(struct_type); \ 189 189 i++, member++) 190 190 191 - #define for_each_vsi(i, struct_type, member) \ 192 - for (i = 0, member = btf_type_var_secinfo(struct_type); \ 193 - i < btf_type_vlen(struct_type); \ 194 - i++, member++) 195 - 196 191 #define for_each_vsi_from(i, from, struct_type, member) \ 197 192 for (i = from, member = btf_type_var_secinfo(struct_type) + from; \ 198 193 i < btf_type_vlen(struct_type); \ ··· 591 596 static const struct btf_var *btf_type_var(const struct btf_type *t) 592 597 { 593 598 return (const struct btf_var *)(t + 1); 594 - } 595 - 596 - static const struct btf_var_secinfo *btf_type_var_secinfo(const struct btf_type *t) 597 - { 598 - return (const struct btf_var_secinfo *)(t + 1); 599 599 } 600 600 601 601 static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
+18
kernel/bpf/helpers.c
··· 623 623 .arg3_type = ARG_ANYTHING, 624 624 }; 625 625 626 + BPF_CALL_2(bpf_per_cpu_ptr, const void *, ptr, u32, cpu) 627 + { 628 + if (cpu >= nr_cpu_ids) 629 + return (unsigned long)NULL; 630 + 631 + return (unsigned long)per_cpu_ptr((const void __percpu *)ptr, cpu); 632 + } 633 + 634 + const struct bpf_func_proto bpf_per_cpu_ptr_proto = { 635 + .func = bpf_per_cpu_ptr, 636 + .gpl_only = false, 637 + .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, 638 + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, 639 + .arg2_type = ARG_ANYTHING, 640 + }; 641 + 626 642 const struct bpf_func_proto bpf_get_current_task_proto __weak; 627 643 const struct bpf_func_proto bpf_probe_read_user_proto __weak; 628 644 const struct bpf_func_proto bpf_probe_read_user_str_proto __weak; ··· 705 689 return &bpf_snprintf_btf_proto; 706 690 case BPF_FUNC_jiffies64: 707 691 return &bpf_jiffies64_proto; 692 + case BPF_FUNC_bpf_per_cpu_ptr: 693 + return &bpf_per_cpu_ptr_proto; 708 694 default: 709 695 break; 710 696 }
+61 -3
kernel/bpf/verifier.c
··· 238 238 u64 msize_max_value; 239 239 int ref_obj_id; 240 240 int func_id; 241 + u32 btf_id; 242 + u32 ret_btf_id; 241 243 }; 242 244 243 245 struct btf *btf_vmlinux; ··· 519 517 [PTR_TO_XDP_SOCK] = "xdp_sock", 520 518 [PTR_TO_BTF_ID] = "ptr_", 521 519 [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", 520 + [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", 522 521 [PTR_TO_MEM] = "mem", 523 522 [PTR_TO_MEM_OR_NULL] = "mem_or_null", 524 523 [PTR_TO_RDONLY_BUF] = "rdonly_buf", ··· 586 583 /* reg->off should be 0 for SCALAR_VALUE */ 587 584 verbose(env, "%lld", reg->var_off.value + reg->off); 588 585 } else { 589 - if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL) 586 + if (t == PTR_TO_BTF_ID || 587 + t == PTR_TO_BTF_ID_OR_NULL || 588 + t == PTR_TO_PERCPU_BTF_ID) 590 589 verbose(env, "%s", kernel_type_name(reg->btf_id)); 591 590 verbose(env, "(id=%d", reg->id); 592 591 if (reg_type_may_be_refcounted_or_null(t)) ··· 2209 2204 case PTR_TO_RDONLY_BUF_OR_NULL: 2210 2205 case PTR_TO_RDWR_BUF: 2211 2206 case PTR_TO_RDWR_BUF_OR_NULL: 2207 + case PTR_TO_PERCPU_BTF_ID: 2212 2208 return true; 2213 2209 default: 2214 2210 return false; ··· 4023 4017 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; 4024 4018 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; 4025 4019 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; 4020 + static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; 4026 4021 4027 4022 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { 4028 4023 [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, ··· 4049 4042 [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, 4050 4043 [ARG_PTR_TO_INT] = &int_ptr_types, 4051 4044 [ARG_PTR_TO_LONG] = &int_ptr_types, 4045 + [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, 4052 4046 }; 4053 4047 4054 4048 static int check_reg_type(struct bpf_verifier_env *env, u32 regno, ··· 4213 4205 err = check_helper_mem_access(env, regno, 4214 4206 meta->map_ptr->value_size, false, 4215 4207 meta); 4208 + } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) { 4209 + if (!reg->btf_id) { 4210 + verbose(env, "Helper has invalid btf_id in R%d\n", regno); 4211 + return -EACCES; 4212 + } 4213 + meta->ret_btf_id = reg->btf_id; 4216 4214 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { 4217 4215 if (meta->func_id == BPF_FUNC_spin_lock) { 4218 4216 if (process_spin_lock(env, regno, true)) ··· 5128 5114 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; 5129 5115 regs[BPF_REG_0].id = ++env->id_gen; 5130 5116 regs[BPF_REG_0].mem_size = meta.mem_size; 5117 + } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL) { 5118 + const struct btf_type *t; 5119 + 5120 + mark_reg_known_zero(env, regs, BPF_REG_0); 5121 + t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL); 5122 + if (!btf_type_is_struct(t)) { 5123 + u32 tsize; 5124 + const struct btf_type *ret; 5125 + const char *tname; 5126 + 5127 + /* resolve the type size of ksym. */ 5128 + ret = btf_resolve_size(btf_vmlinux, t, &tsize); 5129 + if (IS_ERR(ret)) { 5130 + tname = btf_name_by_offset(btf_vmlinux, t->name_off); 5131 + verbose(env, "unable to resolve the size of type '%s': %ld\n", 5132 + tname, PTR_ERR(ret)); 5133 + return -EINVAL; 5134 + } 5135 + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; 5136 + regs[BPF_REG_0].mem_size = tsize; 5137 + } else { 5138 + regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; 5139 + regs[BPF_REG_0].btf_id = meta.ret_btf_id; 5140 + } 5131 5141 } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { 5132 5142 int ret_btf_id; 5133 5143 ··· 7561 7523 dst_reg->mem_size = aux->btf_var.mem_size; 7562 7524 break; 7563 7525 case PTR_TO_BTF_ID: 7526 + case PTR_TO_PERCPU_BTF_ID: 7564 7527 dst_reg->btf_id = aux->btf_var.btf_id; 7565 7528 break; 7566 7529 default: ··· 9488 9449 struct bpf_insn *insn, 9489 9450 struct bpf_insn_aux_data *aux) 9490 9451 { 9491 - u32 type, id = insn->imm; 9452 + u32 datasec_id, type, id = insn->imm; 9453 + const struct btf_var_secinfo *vsi; 9454 + const struct btf_type *datasec; 9492 9455 const struct btf_type *t; 9493 9456 const char *sym_name; 9457 + bool percpu = false; 9494 9458 u64 addr; 9459 + int i; 9495 9460 9496 9461 if (!btf_vmlinux) { 9497 9462 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n"); ··· 9527 9484 return -ENOENT; 9528 9485 } 9529 9486 9487 + datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", 9488 + BTF_KIND_DATASEC); 9489 + if (datasec_id > 0) { 9490 + datasec = btf_type_by_id(btf_vmlinux, datasec_id); 9491 + for_each_vsi(i, datasec, vsi) { 9492 + if (vsi->type == id) { 9493 + percpu = true; 9494 + break; 9495 + } 9496 + } 9497 + } 9498 + 9530 9499 insn[0].imm = (u32)addr; 9531 9500 insn[1].imm = addr >> 32; 9532 9501 9533 9502 type = t->type; 9534 9503 t = btf_type_skip_modifiers(btf_vmlinux, type, NULL); 9535 - if (!btf_type_is_struct(t)) { 9504 + if (percpu) { 9505 + aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID; 9506 + aux->btf_var.btf_id = type; 9507 + } else if (!btf_type_is_struct(t)) { 9536 9508 const struct btf_type *ret; 9537 9509 const char *tname; 9538 9510 u32 tsize;
+2
kernel/trace/bpf_trace.c
··· 1327 1327 return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL; 1328 1328 case BPF_FUNC_snprintf_btf: 1329 1329 return &bpf_snprintf_btf_proto; 1330 + case BPF_FUNC_bpf_per_cpu_ptr: 1331 + return &bpf_per_cpu_ptr_proto; 1330 1332 default: 1331 1333 return NULL; 1332 1334 }
+18
tools/include/uapi/linux/bpf.h
··· 3686 3686 * Return 3687 3687 * The helper returns **TC_ACT_REDIRECT** on success or 3688 3688 * **TC_ACT_SHOT** on error. 3689 + * 3690 + * void *bpf_per_cpu_ptr(const void *percpu_ptr, u32 cpu) 3691 + * Description 3692 + * Take a pointer to a percpu ksym, *percpu_ptr*, and return a 3693 + * pointer to the percpu kernel variable on *cpu*. A ksym is an 3694 + * extern variable decorated with '__ksym'. For ksym, there is a 3695 + * global var (either static or global) defined of the same name 3696 + * in the kernel. The ksym is percpu if the global var is percpu. 3697 + * The returned pointer points to the global percpu var on *cpu*. 3698 + * 3699 + * bpf_per_cpu_ptr() has the same semantic as per_cpu_ptr() in the 3700 + * kernel, except that bpf_per_cpu_ptr() may return NULL. This 3701 + * happens if *cpu* is larger than nr_cpu_ids. The caller of 3702 + * bpf_per_cpu_ptr() must check the returned value. 3703 + * Return 3704 + * A pointer pointing to the kernel percpu variable on *cpu*, or 3705 + * NULL, if *cpu* is invalid. 3689 3706 */ 3690 3707 #define __BPF_FUNC_MAPPER(FN) \ 3691 3708 FN(unspec), \ ··· 3858 3841 FN(seq_printf_btf), \ 3859 3842 FN(skb_cgroup_classid), \ 3860 3843 FN(redirect_neigh), \ 3844 + FN(bpf_per_cpu_ptr), \ 3861 3845 /* */ 3862 3846 3863 3847 /* integer value in 'imm' field of BPF_CALL instruction selects which helper