Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

libbpf,bpf: Share BTF relocate-related code with kernel

Share relocation implementation with the kernel. As part of this,
we also need the type/string iteration functions so also share
btf_iter.c file. Relocation code in kernel and userspace is identical
save for the impementation of the reparenting of split BTF to the
relocated base BTF and retrieval of the BTF header from "struct btf";
these small functions need separate user-space and kernel implementations
for the separate "struct btf"s they operate upon.

One other wrinkle on the kernel side is we have to map .BTF.ids in
modules as they were generated with the type ids used at BTF encoding
time. btf_relocate() optionally returns an array mapping from old BTF
ids to relocated ids, so we use that to fix up these references where
needed for kfuncs.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/bpf/20240620091733.1967885-5-alan.maguire@oracle.com

authored by

Alan Maguire and committed by
Andrii Nakryiko
8646db23 e7ac331b

+227 -54
+64
include/linux/btf.h
··· 140 140 const char *btf_get_name(const struct btf *btf); 141 141 void btf_get(struct btf *btf); 142 142 void btf_put(struct btf *btf); 143 + const struct btf_header *btf_header(const struct btf *btf); 143 144 int btf_new_fd(const union bpf_attr *attr, bpfptr_t uattr, u32 uattr_sz); 144 145 struct btf *btf_get_by_fd(int fd); 145 146 int btf_get_info_by_fd(const struct btf *btf, ··· 213 212 u32 btf_obj_id(const struct btf *btf); 214 213 bool btf_is_kernel(const struct btf *btf); 215 214 bool btf_is_module(const struct btf *btf); 215 + bool btf_is_vmlinux(const struct btf *btf); 216 216 struct module *btf_try_get_module(const struct btf *btf); 217 217 u32 btf_nr_types(const struct btf *btf); 218 + struct btf *btf_base_btf(const struct btf *btf); 218 219 bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, 219 220 const struct btf_member *m, 220 221 u32 expected_offset, u32 expected_size); ··· 340 337 static inline u8 btf_int_offset(const struct btf_type *t) 341 338 { 342 339 return BTF_INT_OFFSET(*(u32 *)(t + 1)); 340 + } 341 + 342 + static inline __u8 btf_int_bits(const struct btf_type *t) 343 + { 344 + return BTF_INT_BITS(*(__u32 *)(t + 1)); 343 345 } 344 346 345 347 static inline bool btf_type_is_scalar(const struct btf_type *t) ··· 486 478 return (struct btf_param *)(t + 1); 487 479 } 488 480 481 + static inline struct btf_decl_tag *btf_decl_tag(const struct btf_type *t) 482 + { 483 + return (struct btf_decl_tag *)(t + 1); 484 + } 485 + 489 486 static inline int btf_id_cmp_func(const void *a, const void *b) 490 487 { 491 488 const int *pa = a, *pb = b; ··· 528 515 } 529 516 #endif 530 517 518 + enum btf_field_iter_kind { 519 + BTF_FIELD_ITER_IDS, 520 + BTF_FIELD_ITER_STRS, 521 + }; 522 + 523 + struct btf_field_desc { 524 + /* once-per-type offsets */ 525 + int t_off_cnt, t_offs[2]; 526 + /* member struct size, or zero, if no members */ 527 + int m_sz; 528 + /* repeated per-member offsets */ 529 + int m_off_cnt, m_offs[1]; 530 + }; 531 + 532 + struct btf_field_iter { 533 + struct btf_field_desc desc; 534 + void *p; 535 + int m_idx; 536 + int off_idx; 537 + int vlen; 538 + }; 539 + 531 540 #ifdef CONFIG_BPF_SYSCALL 532 541 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); 542 + void btf_set_base_btf(struct btf *btf, const struct btf *base_btf); 543 + int btf_relocate(struct btf *btf, const struct btf *base_btf, __u32 **map_ids); 544 + int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, 545 + enum btf_field_iter_kind iter_kind); 546 + __u32 *btf_field_iter_next(struct btf_field_iter *it); 547 + 533 548 const char *btf_name_by_offset(const struct btf *btf, u32 offset); 549 + const char *btf_str_by_offset(const struct btf *btf, u32 offset); 534 550 struct btf *btf_parse_vmlinux(void); 535 551 struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); 536 552 u32 *btf_kfunc_id_set_contains(const struct btf *btf, u32 kfunc_btf_id, ··· 586 544 { 587 545 return NULL; 588 546 } 547 + 548 + static inline void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) 549 + { 550 + } 551 + 552 + static inline int btf_relocate(void *log, struct btf *btf, const struct btf *base_btf, 553 + __u32 **map_ids) 554 + { 555 + return -EOPNOTSUPP; 556 + } 557 + 558 + static inline int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, 559 + enum btf_field_iter_kind iter_kind) 560 + { 561 + return -EOPNOTSUPP; 562 + } 563 + 564 + static inline __u32 *btf_field_iter_next(struct btf_field_iter *it) 565 + { 566 + return NULL; 567 + } 568 + 589 569 static inline const char *btf_name_by_offset(const struct btf *btf, 590 570 u32 offset) 591 571 {
+7 -1
kernel/bpf/Makefile
··· 50 50 obj-$(CONFIG_BPF_PRELOAD) += preload/ 51 51 52 52 obj-$(CONFIG_BPF_SYSCALL) += relo_core.o 53 - $(obj)/relo_core.o: $(srctree)/tools/lib/bpf/relo_core.c FORCE 53 + obj-$(CONFIG_BPF_SYSCALL) += btf_iter.o 54 + obj-$(CONFIG_BPF_SYSCALL) += btf_relocate.o 55 + 56 + # Some source files are common to libbpf. 57 + vpath %.c $(srctree)/kernel/bpf:$(srctree)/tools/lib/bpf 58 + 59 + $(obj)/%.o: %.c FORCE 54 60 $(call if_changed_rule,cc_o_c)
+125 -53
kernel/bpf/btf.c
··· 274 274 u32 start_str_off; /* first string offset (0 for base BTF) */ 275 275 char name[MODULE_NAME_LEN]; 276 276 bool kernel_btf; 277 + __u32 *base_id_map; /* map from distilled base BTF -> vmlinux BTF ids */ 277 278 }; 278 279 279 280 enum verifier_phase { ··· 531 530 btf_type_is_var(t) || btf_type_is_typedef(t); 532 531 } 533 532 533 + bool btf_is_vmlinux(const struct btf *btf) 534 + { 535 + return btf->kernel_btf && !btf->base_btf; 536 + } 537 + 534 538 u32 btf_nr_types(const struct btf *btf) 535 539 { 536 540 u32 total = 0; ··· 778 772 return true; 779 773 } 780 774 781 - static const char *btf_str_by_offset(const struct btf *btf, u32 offset) 775 + const char *btf_str_by_offset(const struct btf *btf, u32 offset) 782 776 { 783 777 while (offset < btf->start_str_off) 784 778 btf = btf->base_btf; ··· 1676 1670 1677 1671 if (!tab) 1678 1672 return; 1679 - /* For module BTF, we directly assign the sets being registered, so 1680 - * there is nothing to free except kfunc_set_tab. 1681 - */ 1682 - if (btf_is_module(btf)) 1683 - goto free_tab; 1684 1673 for (hook = 0; hook < ARRAY_SIZE(tab->sets); hook++) 1685 1674 kfree(tab->sets[hook]); 1686 - free_tab: 1687 1675 kfree(tab); 1688 1676 btf->kfunc_set_tab = NULL; 1689 1677 } ··· 1735 1735 kvfree(btf->types); 1736 1736 kvfree(btf->resolved_sizes); 1737 1737 kvfree(btf->resolved_ids); 1738 - kvfree(btf->data); 1738 + /* vmlinux does not allocate btf->data, it simply points it at 1739 + * __start_BTF. 1740 + */ 1741 + if (!btf_is_vmlinux(btf)) 1742 + kvfree(btf->data); 1743 + kvfree(btf->base_id_map); 1739 1744 kfree(btf); 1740 1745 } 1741 1746 ··· 1767 1762 btf_free_id(btf); 1768 1763 call_rcu(&btf->rcu, btf_free_rcu); 1769 1764 } 1765 + } 1766 + 1767 + struct btf *btf_base_btf(const struct btf *btf) 1768 + { 1769 + return btf->base_btf; 1770 + } 1771 + 1772 + const struct btf_header *btf_header(const struct btf *btf) 1773 + { 1774 + return &btf->hdr; 1775 + } 1776 + 1777 + void btf_set_base_btf(struct btf *btf, const struct btf *base_btf) 1778 + { 1779 + btf->base_btf = (struct btf *)base_btf; 1780 + btf->start_id = btf_nr_types(base_btf); 1781 + btf->start_str_off = base_btf->hdr.str_len; 1770 1782 } 1771 1783 1772 1784 static int env_resolve_init(struct btf_verifier_env *env) ··· 6105 6083 BTF_ID_LIST(bpf_ctx_convert_btf_id) 6106 6084 BTF_ID(struct, bpf_ctx_convert) 6107 6085 6108 - struct btf *btf_parse_vmlinux(void) 6086 + static struct btf *btf_parse_base(struct btf_verifier_env *env, const char *name, 6087 + void *data, unsigned int data_size) 6109 6088 { 6110 - struct btf_verifier_env *env = NULL; 6111 - struct bpf_verifier_log *log; 6112 6089 struct btf *btf = NULL; 6113 6090 int err; 6114 6091 6115 6092 if (!IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) 6116 6093 return ERR_PTR(-ENOENT); 6117 - 6118 - env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); 6119 - if (!env) 6120 - return ERR_PTR(-ENOMEM); 6121 - 6122 - log = &env->log; 6123 - log->level = BPF_LOG_KERNEL; 6124 6094 6125 6095 btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); 6126 6096 if (!btf) { ··· 6121 6107 } 6122 6108 env->btf = btf; 6123 6109 6124 - btf->data = __start_BTF; 6125 - btf->data_size = __stop_BTF - __start_BTF; 6110 + btf->data = data; 6111 + btf->data_size = data_size; 6126 6112 btf->kernel_btf = true; 6127 - snprintf(btf->name, sizeof(btf->name), "vmlinux"); 6113 + snprintf(btf->name, sizeof(btf->name), "%s", name); 6128 6114 6129 6115 err = btf_parse_hdr(env); 6130 6116 if (err) ··· 6144 6130 if (err) 6145 6131 goto errout; 6146 6132 6147 - /* btf_parse_vmlinux() runs under bpf_verifier_lock */ 6148 - bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); 6149 - 6150 6133 refcount_set(&btf->refcnt, 1); 6151 6134 6152 - err = btf_alloc_id(btf); 6153 - if (err) 6154 - goto errout; 6155 - 6156 - btf_verifier_env_free(env); 6157 6135 return btf; 6158 6136 6159 6137 errout: 6160 - btf_verifier_env_free(env); 6161 6138 if (btf) { 6162 6139 kvfree(btf->types); 6163 6140 kfree(btf); ··· 6156 6151 return ERR_PTR(err); 6157 6152 } 6158 6153 6159 - #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 6160 - 6161 - static struct btf *btf_parse_module(const char *module_name, const void *data, unsigned int data_size) 6154 + struct btf *btf_parse_vmlinux(void) 6162 6155 { 6163 6156 struct btf_verifier_env *env = NULL; 6164 6157 struct bpf_verifier_log *log; 6165 - struct btf *btf = NULL, *base_btf; 6158 + struct btf *btf; 6166 6159 int err; 6167 6160 6168 - base_btf = bpf_get_btf_vmlinux(); 6169 - if (IS_ERR(base_btf)) 6170 - return base_btf; 6171 - if (!base_btf) 6161 + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); 6162 + if (!env) 6163 + return ERR_PTR(-ENOMEM); 6164 + 6165 + log = &env->log; 6166 + log->level = BPF_LOG_KERNEL; 6167 + btf = btf_parse_base(env, "vmlinux", __start_BTF, __stop_BTF - __start_BTF); 6168 + if (IS_ERR(btf)) 6169 + goto err_out; 6170 + 6171 + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ 6172 + bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]); 6173 + err = btf_alloc_id(btf); 6174 + if (err) { 6175 + btf_free(btf); 6176 + btf = ERR_PTR(err); 6177 + } 6178 + err_out: 6179 + btf_verifier_env_free(env); 6180 + return btf; 6181 + } 6182 + 6183 + #ifdef CONFIG_DEBUG_INFO_BTF_MODULES 6184 + 6185 + /* If .BTF_ids section was created with distilled base BTF, both base and 6186 + * split BTF ids will need to be mapped to actual base/split ids for 6187 + * BTF now that it has been relocated. 6188 + */ 6189 + static __u32 btf_relocate_id(const struct btf *btf, __u32 id) 6190 + { 6191 + if (!btf->base_btf || !btf->base_id_map) 6192 + return id; 6193 + return btf->base_id_map[id]; 6194 + } 6195 + 6196 + static struct btf *btf_parse_module(const char *module_name, const void *data, 6197 + unsigned int data_size, void *base_data, 6198 + unsigned int base_data_size) 6199 + { 6200 + struct btf *btf = NULL, *vmlinux_btf, *base_btf = NULL; 6201 + struct btf_verifier_env *env = NULL; 6202 + struct bpf_verifier_log *log; 6203 + int err = 0; 6204 + 6205 + vmlinux_btf = bpf_get_btf_vmlinux(); 6206 + if (IS_ERR(vmlinux_btf)) 6207 + return vmlinux_btf; 6208 + if (!vmlinux_btf) 6172 6209 return ERR_PTR(-EINVAL); 6173 6210 6174 6211 env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); ··· 6219 6172 6220 6173 log = &env->log; 6221 6174 log->level = BPF_LOG_KERNEL; 6175 + 6176 + if (base_data) { 6177 + base_btf = btf_parse_base(env, ".BTF.base", base_data, base_data_size); 6178 + if (IS_ERR(base_btf)) { 6179 + err = PTR_ERR(base_btf); 6180 + goto errout; 6181 + } 6182 + } else { 6183 + base_btf = vmlinux_btf; 6184 + } 6222 6185 6223 6186 btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); 6224 6187 if (!btf) { ··· 6269 6212 if (err) 6270 6213 goto errout; 6271 6214 6215 + if (base_btf != vmlinux_btf) { 6216 + err = btf_relocate(btf, vmlinux_btf, &btf->base_id_map); 6217 + if (err) 6218 + goto errout; 6219 + btf_free(base_btf); 6220 + base_btf = vmlinux_btf; 6221 + } 6222 + 6272 6223 btf_verifier_env_free(env); 6273 6224 refcount_set(&btf->refcnt, 1); 6274 6225 return btf; 6275 6226 6276 6227 errout: 6277 6228 btf_verifier_env_free(env); 6229 + if (base_btf != vmlinux_btf) 6230 + btf_free(base_btf); 6278 6231 if (btf) { 6279 6232 kvfree(btf->data); 6280 6233 kvfree(btf->types); ··· 7837 7770 err = -ENOMEM; 7838 7771 goto out; 7839 7772 } 7840 - btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size); 7773 + btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size, 7774 + mod->btf_base_data, mod->btf_base_data_size); 7841 7775 if (IS_ERR(btf)) { 7842 7776 kfree(btf_mod); 7843 7777 if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) { ··· 8162 8094 bool add_filter = !!kset->filter; 8163 8095 struct btf_kfunc_set_tab *tab; 8164 8096 struct btf_id_set8 *set; 8165 - u32 set_cnt; 8097 + u32 set_cnt, i; 8166 8098 int ret; 8167 8099 8168 8100 if (hook >= BTF_KFUNC_HOOK_MAX) { ··· 8208 8140 goto end; 8209 8141 } 8210 8142 8211 - /* We don't need to allocate, concatenate, and sort module sets, because 8212 - * only one is allowed per hook. Hence, we can directly assign the 8213 - * pointer and return. 8214 - */ 8215 - if (!vmlinux_set) { 8216 - tab->sets[hook] = add_set; 8217 - goto do_add_filter; 8218 - } 8219 - 8220 8143 /* In case of vmlinux sets, there may be more than one set being 8221 8144 * registered per hook. To create a unified set, we allocate a new set 8222 8145 * and concatenate all individual sets being registered. While each set 8223 8146 * is individually sorted, they may become unsorted when concatenated, 8224 8147 * hence re-sorting the final set again is required to make binary 8225 8148 * searching the set using btf_id_set8_contains function work. 8149 + * 8150 + * For module sets, we need to allocate as we may need to relocate 8151 + * BTF ids. 8226 8152 */ 8227 8153 set_cnt = set ? set->cnt : 0; 8228 8154 ··· 8246 8184 8247 8185 /* Concatenate the two sets */ 8248 8186 memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); 8187 + /* Now that the set is copied, update with relocated BTF ids */ 8188 + for (i = set->cnt; i < set->cnt + add_set->cnt; i++) 8189 + set->pairs[i].id = btf_relocate_id(btf, set->pairs[i].id); 8190 + 8249 8191 set->cnt += add_set->cnt; 8250 8192 8251 8193 sort(set->pairs, set->cnt, sizeof(set->pairs[0]), btf_id_cmp_func, NULL); 8252 8194 8253 - do_add_filter: 8254 8195 if (add_filter) { 8255 8196 hook_filter = &tab->hook_filters[hook]; 8256 8197 hook_filter->filters[hook_filter->nr_filters++] = kset->filter; ··· 8373 8308 return PTR_ERR(btf); 8374 8309 8375 8310 for (i = 0; i < kset->set->cnt; i++) { 8376 - ret = btf_check_kfunc_protos(btf, kset->set->pairs[i].id, 8311 + ret = btf_check_kfunc_protos(btf, btf_relocate_id(btf, kset->set->pairs[i].id), 8377 8312 kset->set->pairs[i].flags); 8378 8313 if (ret) 8379 8314 goto err_out; ··· 8437 8372 u32 nr_args, i; 8438 8373 8439 8374 for (i = 0; i < cnt; i++) { 8440 - dtor_btf_id = dtors[i].kfunc_btf_id; 8375 + dtor_btf_id = btf_relocate_id(btf, dtors[i].kfunc_btf_id); 8441 8376 8442 8377 dtor_func = btf_type_by_id(btf, dtor_btf_id); 8443 8378 if (!dtor_func || !btf_type_is_func(dtor_func)) ··· 8472 8407 { 8473 8408 struct btf_id_dtor_kfunc_tab *tab; 8474 8409 struct btf *btf; 8475 - u32 tab_cnt; 8410 + u32 tab_cnt, i; 8476 8411 int ret; 8477 8412 8478 8413 btf = btf_get_module_btf(owner); ··· 8523 8458 btf->dtor_kfunc_tab = tab; 8524 8459 8525 8460 memcpy(tab->dtors + tab->cnt, dtors, add_cnt * sizeof(tab->dtors[0])); 8461 + 8462 + /* remap BTF ids based on BTF relocation (if any) */ 8463 + for (i = tab_cnt; i < tab_cnt + add_cnt; i++) { 8464 + tab->dtors[i].btf_id = btf_relocate_id(btf, tab->dtors[i].btf_id); 8465 + tab->dtors[i].kfunc_btf_id = btf_relocate_id(btf, tab->dtors[i].kfunc_btf_id); 8466 + } 8467 + 8526 8468 tab->cnt += add_cnt; 8527 8469 8528 8470 sort(tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func, NULL);
+8
tools/lib/bpf/btf_iter.c
··· 2 2 /* Copyright (c) 2021 Facebook */ 3 3 /* Copyright (c) 2024, Oracle and/or its affiliates. */ 4 4 5 + #ifdef __KERNEL__ 6 + #include <linux/bpf.h> 7 + #include <linux/btf.h> 8 + 9 + #define btf_var_secinfos(t) (struct btf_var_secinfo *)btf_type_var_secinfo(t) 10 + 11 + #else 5 12 #include "btf.h" 6 13 #include "libbpf_internal.h" 14 + #endif 7 15 8 16 int btf_field_iter_init(struct btf_field_iter *it, struct btf_type *t, 9 17 enum btf_field_iter_kind iter_kind)
+23
tools/lib/bpf/btf_relocate.c
··· 5 5 #define _GNU_SOURCE 6 6 #endif 7 7 8 + #ifdef __KERNEL__ 9 + #include <linux/bpf.h> 10 + #include <linux/bsearch.h> 11 + #include <linux/btf.h> 12 + #include <linux/sort.h> 13 + #include <linux/string.h> 14 + #include <linux/bpf_verifier.h> 15 + 16 + #define btf_type_by_id (struct btf_type *)btf_type_by_id 17 + #define btf__type_cnt btf_nr_types 18 + #define btf__base_btf btf_base_btf 19 + #define btf__name_by_offset btf_name_by_offset 20 + #define btf__str_by_offset btf_str_by_offset 21 + #define btf_kflag btf_type_kflag 22 + 23 + #define calloc(nmemb, sz) kvcalloc(nmemb, sz, GFP_KERNEL | __GFP_NOWARN) 24 + #define free(ptr) kvfree(ptr) 25 + #define qsort(base, num, sz, cmp) sort(base, num, sz, cmp, NULL) 26 + 27 + #else 28 + 8 29 #include "btf.h" 9 30 #include "bpf.h" 10 31 #include "libbpf.h" 11 32 #include "libbpf_internal.h" 33 + 34 + #endif /* __KERNEL__ */ 12 35 13 36 struct btf; 14 37