tools/lib/bpf/libbpf.c at v5.16-rc6 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / tools / lib / bpf / libbpf.c
at v5.16-rc6 11474 lines 303 kB view raw
    1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
    2
    3/*
    4 * Common eBPF ELF object loading operations.
    5 *
    6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
    7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
    8 * Copyright (C) 2015 Huawei Inc.
    9 * Copyright (C) 2017 Nicira, Inc.
   10 * Copyright (C) 2019 Isovalent, Inc.
   11 */
   12
   13#ifndef _GNU_SOURCE
   14#define _GNU_SOURCE
   15#endif
   16#include <stdlib.h>
   17#include <stdio.h>
   18#include <stdarg.h>
   19#include <libgen.h>
   20#include <inttypes.h>
   21#include <limits.h>
   22#include <string.h>
   23#include <unistd.h>
   24#include <endian.h>
   25#include <fcntl.h>
   26#include <errno.h>
   27#include <ctype.h>
   28#include <asm/unistd.h>
   29#include <linux/err.h>
   30#include <linux/kernel.h>
   31#include <linux/bpf.h>
   32#include <linux/btf.h>
   33#include <linux/filter.h>
   34#include <linux/list.h>
   35#include <linux/limits.h>
   36#include <linux/perf_event.h>
   37#include <linux/ring_buffer.h>
   38#include <linux/version.h>
   39#include <sys/epoll.h>
   40#include <sys/ioctl.h>
   41#include <sys/mman.h>
   42#include <sys/stat.h>
   43#include <sys/types.h>
   44#include <sys/vfs.h>
   45#include <sys/utsname.h>
   46#include <sys/resource.h>
   47#include <libelf.h>
   48#include <gelf.h>
   49#include <zlib.h>
   50
   51#include "libbpf.h"
   52#include "bpf.h"
   53#include "btf.h"
   54#include "str_error.h"
   55#include "libbpf_internal.h"
   56#include "hashmap.h"
   57#include "bpf_gen_internal.h"
   58
   59#ifndef BPF_FS_MAGIC
   60#define BPF_FS_MAGIC		0xcafe4a11
   61#endif
   62
   63#define BPF_INSN_SZ (sizeof(struct bpf_insn))
   64
   65/* vsprintf() in __base_pr() uses nonliteral format string. It may break
   66 * compilation if user enables corresponding warning. Disable it explicitly.
   67 */
   68#pragma GCC diagnostic ignored "-Wformat-nonliteral"
   69
   70#define __printf(a, b)	__attribute__((format(printf, a, b)))
   71
   72static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
   73static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
   74
   75static int __base_pr(enum libbpf_print_level level, const char *format,
   76		     va_list args)
   77{
   78	if (level == LIBBPF_DEBUG)
   79		return 0;
   80
   81	return vfprintf(stderr, format, args);
   82}
   83
   84static libbpf_print_fn_t __libbpf_pr = __base_pr;
   85
   86libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
   87{
   88	libbpf_print_fn_t old_print_fn = __libbpf_pr;
   89
   90	__libbpf_pr = fn;
   91	return old_print_fn;
   92}
   93
   94__printf(2, 3)
   95void libbpf_print(enum libbpf_print_level level, const char *format, ...)
   96{
   97	va_list args;
   98
   99	if (!__libbpf_pr)
  100		return;
  101
  102	va_start(args, format);
  103	__libbpf_pr(level, format, args);
  104	va_end(args);
  105}
  106
  107static void pr_perm_msg(int err)
  108{
  109	struct rlimit limit;
  110	char buf[100];
  111
  112	if (err != -EPERM || geteuid() != 0)
  113		return;
  114
  115	err = getrlimit(RLIMIT_MEMLOCK, &limit);
  116	if (err)
  117		return;
  118
  119	if (limit.rlim_cur == RLIM_INFINITY)
  120		return;
  121
  122	if (limit.rlim_cur < 1024)
  123		snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
  124	else if (limit.rlim_cur < 1024*1024)
  125		snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
  126	else
  127		snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
  128
  129	pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
  130		buf);
  131}
  132
  133#define STRERR_BUFSIZE  128
  134
  135/* Copied from tools/perf/util/util.h */
  136#ifndef zfree
  137# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
  138#endif
  139
  140#ifndef zclose
  141# define zclose(fd) ({			\
  142	int ___err = 0;			\
  143	if ((fd) >= 0)			\
  144		___err = close((fd));	\
  145	fd = -1;			\
  146	___err; })
  147#endif
  148
  149static inline __u64 ptr_to_u64(const void *ptr)
  150{
  151	return (__u64) (unsigned long) ptr;
  152}
  153
  154/* this goes away in libbpf 1.0 */
  155enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
  156
  157int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
  158{
  159	/* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
  160	 * get all possible values we compensate last +1, and then (2*x - 1)
  161	 * to get the bit mask
  162	 */
  163	if (mode != LIBBPF_STRICT_ALL
  164	    && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
  165		return errno = EINVAL, -EINVAL;
  166
  167	libbpf_mode = mode;
  168	return 0;
  169}
  170
  171enum kern_feature_id {
  172	/* v4.14: kernel support for program & map names. */
  173	FEAT_PROG_NAME,
  174	/* v5.2: kernel support for global data sections. */
  175	FEAT_GLOBAL_DATA,
  176	/* BTF support */
  177	FEAT_BTF,
  178	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
  179	FEAT_BTF_FUNC,
  180	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */
  181	FEAT_BTF_DATASEC,
  182	/* BTF_FUNC_GLOBAL is supported */
  183	FEAT_BTF_GLOBAL_FUNC,
  184	/* BPF_F_MMAPABLE is supported for arrays */
  185	FEAT_ARRAY_MMAP,
  186	/* kernel support for expected_attach_type in BPF_PROG_LOAD */
  187	FEAT_EXP_ATTACH_TYPE,
  188	/* bpf_probe_read_{kernel,user}[_str] helpers */
  189	FEAT_PROBE_READ_KERN,
  190	/* BPF_PROG_BIND_MAP is supported */
  191	FEAT_PROG_BIND_MAP,
  192	/* Kernel support for module BTFs */
  193	FEAT_MODULE_BTF,
  194	/* BTF_KIND_FLOAT support */
  195	FEAT_BTF_FLOAT,
  196	/* BPF perf link support */
  197	FEAT_PERF_LINK,
  198	/* BTF_KIND_DECL_TAG support */
  199	FEAT_BTF_DECL_TAG,
  200	__FEAT_CNT,
  201};
  202
  203static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id);
  204
  205enum reloc_type {
  206	RELO_LD64,
  207	RELO_CALL,
  208	RELO_DATA,
  209	RELO_EXTERN_VAR,
  210	RELO_EXTERN_FUNC,
  211	RELO_SUBPROG_ADDR,
  212};
  213
  214struct reloc_desc {
  215	enum reloc_type type;
  216	int insn_idx;
  217	int map_idx;
  218	int sym_off;
  219};
  220
  221struct bpf_sec_def;
  222
  223typedef int (*init_fn_t)(struct bpf_program *prog, long cookie);
  224typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie);
  225typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie);
  226
  227/* stored as sec_def->cookie for all libbpf-supported SEC()s */
  228enum sec_def_flags {
  229	SEC_NONE = 0,
  230	/* expected_attach_type is optional, if kernel doesn't support that */
  231	SEC_EXP_ATTACH_OPT = 1,
  232	/* legacy, only used by libbpf_get_type_names() and
  233	 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
  234	 * This used to be associated with cgroup (and few other) BPF programs
  235	 * that were attachable through BPF_PROG_ATTACH command. Pretty
  236	 * meaningless nowadays, though.
  237	 */
  238	SEC_ATTACHABLE = 2,
  239	SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
  240	/* attachment target is specified through BTF ID in either kernel or
  241	 * other BPF program's BTF object */
  242	SEC_ATTACH_BTF = 4,
  243	/* BPF program type allows sleeping/blocking in kernel */
  244	SEC_SLEEPABLE = 8,
  245	/* allow non-strict prefix matching */
  246	SEC_SLOPPY_PFX = 16,
  247};
  248
  249struct bpf_sec_def {
  250	const char *sec;
  251	enum bpf_prog_type prog_type;
  252	enum bpf_attach_type expected_attach_type;
  253	long cookie;
  254
  255	init_fn_t init_fn;
  256	preload_fn_t preload_fn;
  257	attach_fn_t attach_fn;
  258};
  259
  260/*
  261 * bpf_prog should be a better name but it has been used in
  262 * linux/filter.h.
  263 */
  264struct bpf_program {
  265	const struct bpf_sec_def *sec_def;
  266	char *sec_name;
  267	size_t sec_idx;
  268	/* this program's instruction offset (in number of instructions)
  269	 * within its containing ELF section
  270	 */
  271	size_t sec_insn_off;
  272	/* number of original instructions in ELF section belonging to this
  273	 * program, not taking into account subprogram instructions possible
  274	 * appended later during relocation
  275	 */
  276	size_t sec_insn_cnt;
  277	/* Offset (in number of instructions) of the start of instruction
  278	 * belonging to this BPF program  within its containing main BPF
  279	 * program. For the entry-point (main) BPF program, this is always
  280	 * zero. For a sub-program, this gets reset before each of main BPF
  281	 * programs are processed and relocated and is used to determined
  282	 * whether sub-program was already appended to the main program, and
  283	 * if yes, at which instruction offset.
  284	 */
  285	size_t sub_insn_off;
  286
  287	char *name;
  288	/* name with / replaced by _; makes recursive pinning
  289	 * in bpf_object__pin_programs easier
  290	 */
  291	char *pin_name;
  292
  293	/* instructions that belong to BPF program; insns[0] is located at
  294	 * sec_insn_off instruction within its ELF section in ELF file, so
  295	 * when mapping ELF file instruction index to the local instruction,
  296	 * one needs to subtract sec_insn_off; and vice versa.
  297	 */
  298	struct bpf_insn *insns;
  299	/* actual number of instruction in this BPF program's image; for
  300	 * entry-point BPF programs this includes the size of main program
  301	 * itself plus all the used sub-programs, appended at the end
  302	 */
  303	size_t insns_cnt;
  304
  305	struct reloc_desc *reloc_desc;
  306	int nr_reloc;
  307	int log_level;
  308
  309	struct {
  310		int nr;
  311		int *fds;
  312	} instances;
  313	bpf_program_prep_t preprocessor;
  314
  315	struct bpf_object *obj;
  316	void *priv;
  317	bpf_program_clear_priv_t clear_priv;
  318
  319	bool load;
  320	bool mark_btf_static;
  321	enum bpf_prog_type type;
  322	enum bpf_attach_type expected_attach_type;
  323	int prog_ifindex;
  324	__u32 attach_btf_obj_fd;
  325	__u32 attach_btf_id;
  326	__u32 attach_prog_fd;
  327	void *func_info;
  328	__u32 func_info_rec_size;
  329	__u32 func_info_cnt;
  330
  331	void *line_info;
  332	__u32 line_info_rec_size;
  333	__u32 line_info_cnt;
  334	__u32 prog_flags;
  335};
  336
  337struct bpf_struct_ops {
  338	const char *tname;
  339	const struct btf_type *type;
  340	struct bpf_program **progs;
  341	__u32 *kern_func_off;
  342	/* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
  343	void *data;
  344	/* e.g. struct bpf_struct_ops_tcp_congestion_ops in
  345	 *      btf_vmlinux's format.
  346	 * struct bpf_struct_ops_tcp_congestion_ops {
  347	 *	[... some other kernel fields ...]
  348	 *	struct tcp_congestion_ops data;
  349	 * }
  350	 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
  351	 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
  352	 * from "data".
  353	 */
  354	void *kern_vdata;
  355	__u32 type_id;
  356};
  357
  358#define DATA_SEC ".data"
  359#define BSS_SEC ".bss"
  360#define RODATA_SEC ".rodata"
  361#define KCONFIG_SEC ".kconfig"
  362#define KSYMS_SEC ".ksyms"
  363#define STRUCT_OPS_SEC ".struct_ops"
  364
  365enum libbpf_map_type {
  366	LIBBPF_MAP_UNSPEC,
  367	LIBBPF_MAP_DATA,
  368	LIBBPF_MAP_BSS,
  369	LIBBPF_MAP_RODATA,
  370	LIBBPF_MAP_KCONFIG,
  371};
  372
  373struct bpf_map {
  374	char *name;
  375	/* real_name is defined for special internal maps (.rodata*,
  376	 * .data*, .bss, .kconfig) and preserves their original ELF section
  377	 * name. This is important to be be able to find corresponding BTF
  378	 * DATASEC information.
  379	 */
  380	char *real_name;
  381	int fd;
  382	int sec_idx;
  383	size_t sec_offset;
  384	int map_ifindex;
  385	int inner_map_fd;
  386	struct bpf_map_def def;
  387	__u32 numa_node;
  388	__u32 btf_var_idx;
  389	__u32 btf_key_type_id;
  390	__u32 btf_value_type_id;
  391	__u32 btf_vmlinux_value_type_id;
  392	void *priv;
  393	bpf_map_clear_priv_t clear_priv;
  394	enum libbpf_map_type libbpf_type;
  395	void *mmaped;
  396	struct bpf_struct_ops *st_ops;
  397	struct bpf_map *inner_map;
  398	void **init_slots;
  399	int init_slots_sz;
  400	char *pin_path;
  401	bool pinned;
  402	bool reused;
  403	__u64 map_extra;
  404};
  405
  406enum extern_type {
  407	EXT_UNKNOWN,
  408	EXT_KCFG,
  409	EXT_KSYM,
  410};
  411
  412enum kcfg_type {
  413	KCFG_UNKNOWN,
  414	KCFG_CHAR,
  415	KCFG_BOOL,
  416	KCFG_INT,
  417	KCFG_TRISTATE,
  418	KCFG_CHAR_ARR,
  419};
  420
  421struct extern_desc {
  422	enum extern_type type;
  423	int sym_idx;
  424	int btf_id;
  425	int sec_btf_id;
  426	const char *name;
  427	bool is_set;
  428	bool is_weak;
  429	union {
  430		struct {
  431			enum kcfg_type type;
  432			int sz;
  433			int align;
  434			int data_off;
  435			bool is_signed;
  436		} kcfg;
  437		struct {
  438			unsigned long long addr;
  439
  440			/* target btf_id of the corresponding kernel var. */
  441			int kernel_btf_obj_fd;
  442			int kernel_btf_id;
  443
  444			/* local btf_id of the ksym extern's type. */
  445			__u32 type_id;
  446			/* BTF fd index to be patched in for insn->off, this is
  447			 * 0 for vmlinux BTF, index in obj->fd_array for module
  448			 * BTF
  449			 */
  450			__s16 btf_fd_idx;
  451		} ksym;
  452	};
  453};
  454
  455static LIST_HEAD(bpf_objects_list);
  456
  457struct module_btf {
  458	struct btf *btf;
  459	char *name;
  460	__u32 id;
  461	int fd;
  462	int fd_array_idx;
  463};
  464
  465enum sec_type {
  466	SEC_UNUSED = 0,
  467	SEC_RELO,
  468	SEC_BSS,
  469	SEC_DATA,
  470	SEC_RODATA,
  471};
  472
  473struct elf_sec_desc {
  474	enum sec_type sec_type;
  475	Elf64_Shdr *shdr;
  476	Elf_Data *data;
  477};
  478
  479struct elf_state {
  480	int fd;
  481	const void *obj_buf;
  482	size_t obj_buf_sz;
  483	Elf *elf;
  484	Elf64_Ehdr *ehdr;
  485	Elf_Data *symbols;
  486	Elf_Data *st_ops_data;
  487	size_t shstrndx; /* section index for section name strings */
  488	size_t strtabidx;
  489	struct elf_sec_desc *secs;
  490	int sec_cnt;
  491	int maps_shndx;
  492	int btf_maps_shndx;
  493	__u32 btf_maps_sec_btf_id;
  494	int text_shndx;
  495	int symbols_shndx;
  496	int st_ops_shndx;
  497};
  498
  499struct bpf_object {
  500	char name[BPF_OBJ_NAME_LEN];
  501	char license[64];
  502	__u32 kern_version;
  503
  504	struct bpf_program *programs;
  505	size_t nr_programs;
  506	struct bpf_map *maps;
  507	size_t nr_maps;
  508	size_t maps_cap;
  509
  510	char *kconfig;
  511	struct extern_desc *externs;
  512	int nr_extern;
  513	int kconfig_map_idx;
  514
  515	bool loaded;
  516	bool has_subcalls;
  517	bool has_rodata;
  518
  519	struct bpf_gen *gen_loader;
  520
  521	/* Information when doing ELF related work. Only valid if efile.elf is not NULL */
  522	struct elf_state efile;
  523	/*
  524	 * All loaded bpf_object are linked in a list, which is
  525	 * hidden to caller. bpf_objects__<func> handlers deal with
  526	 * all objects.
  527	 */
  528	struct list_head list;
  529
  530	struct btf *btf;
  531	struct btf_ext *btf_ext;
  532
  533	/* Parse and load BTF vmlinux if any of the programs in the object need
  534	 * it at load time.
  535	 */
  536	struct btf *btf_vmlinux;
  537	/* Path to the custom BTF to be used for BPF CO-RE relocations as an
  538	 * override for vmlinux BTF.
  539	 */
  540	char *btf_custom_path;
  541	/* vmlinux BTF override for CO-RE relocations */
  542	struct btf *btf_vmlinux_override;
  543	/* Lazily initialized kernel module BTFs */
  544	struct module_btf *btf_modules;
  545	bool btf_modules_loaded;
  546	size_t btf_module_cnt;
  547	size_t btf_module_cap;
  548
  549	void *priv;
  550	bpf_object_clear_priv_t clear_priv;
  551
  552	int *fd_array;
  553	size_t fd_array_cap;
  554	size_t fd_array_cnt;
  555
  556	char path[];
  557};
  558
  559static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
  560static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
  561static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
  562static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
  563static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
  564static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
  565static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
  566static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
  567static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
  568
  569void bpf_program__unload(struct bpf_program *prog)
  570{
  571	int i;
  572
  573	if (!prog)
  574		return;
  575
  576	/*
  577	 * If the object is opened but the program was never loaded,
  578	 * it is possible that prog->instances.nr == -1.
  579	 */
  580	if (prog->instances.nr > 0) {
  581		for (i = 0; i < prog->instances.nr; i++)
  582			zclose(prog->instances.fds[i]);
  583	} else if (prog->instances.nr != -1) {
  584		pr_warn("Internal error: instances.nr is %d\n",
  585			prog->instances.nr);
  586	}
  587
  588	prog->instances.nr = -1;
  589	zfree(&prog->instances.fds);
  590
  591	zfree(&prog->func_info);
  592	zfree(&prog->line_info);
  593}
  594
  595static void bpf_program__exit(struct bpf_program *prog)
  596{
  597	if (!prog)
  598		return;
  599
  600	if (prog->clear_priv)
  601		prog->clear_priv(prog, prog->priv);
  602
  603	prog->priv = NULL;
  604	prog->clear_priv = NULL;
  605
  606	bpf_program__unload(prog);
  607	zfree(&prog->name);
  608	zfree(&prog->sec_name);
  609	zfree(&prog->pin_name);
  610	zfree(&prog->insns);
  611	zfree(&prog->reloc_desc);
  612
  613	prog->nr_reloc = 0;
  614	prog->insns_cnt = 0;
  615	prog->sec_idx = -1;
  616}
  617
  618static char *__bpf_program__pin_name(struct bpf_program *prog)
  619{
  620	char *name, *p;
  621
  622	if (libbpf_mode & LIBBPF_STRICT_SEC_NAME)
  623		name = strdup(prog->name);
  624	else
  625		name = strdup(prog->sec_name);
  626
  627	if (!name)
  628		return NULL;
  629
  630	p = name;
  631
  632	while ((p = strchr(p, '/')))
  633		*p = '_';
  634
  635	return name;
  636}
  637
  638static bool insn_is_subprog_call(const struct bpf_insn *insn)
  639{
  640	return BPF_CLASS(insn->code) == BPF_JMP &&
  641	       BPF_OP(insn->code) == BPF_CALL &&
  642	       BPF_SRC(insn->code) == BPF_K &&
  643	       insn->src_reg == BPF_PSEUDO_CALL &&
  644	       insn->dst_reg == 0 &&
  645	       insn->off == 0;
  646}
  647
  648static bool is_call_insn(const struct bpf_insn *insn)
  649{
  650	return insn->code == (BPF_JMP | BPF_CALL);
  651}
  652
  653static bool insn_is_pseudo_func(struct bpf_insn *insn)
  654{
  655	return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
  656}
  657
  658static int
  659bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
  660		      const char *name, size_t sec_idx, const char *sec_name,
  661		      size_t sec_off, void *insn_data, size_t insn_data_sz)
  662{
  663	if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
  664		pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
  665			sec_name, name, sec_off, insn_data_sz);
  666		return -EINVAL;
  667	}
  668
  669	memset(prog, 0, sizeof(*prog));
  670	prog->obj = obj;
  671
  672	prog->sec_idx = sec_idx;
  673	prog->sec_insn_off = sec_off / BPF_INSN_SZ;
  674	prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
  675	/* insns_cnt can later be increased by appending used subprograms */
  676	prog->insns_cnt = prog->sec_insn_cnt;
  677
  678	prog->type = BPF_PROG_TYPE_UNSPEC;
  679	prog->load = true;
  680
  681	prog->instances.fds = NULL;
  682	prog->instances.nr = -1;
  683
  684	prog->sec_name = strdup(sec_name);
  685	if (!prog->sec_name)
  686		goto errout;
  687
  688	prog->name = strdup(name);
  689	if (!prog->name)
  690		goto errout;
  691
  692	prog->pin_name = __bpf_program__pin_name(prog);
  693	if (!prog->pin_name)
  694		goto errout;
  695
  696	prog->insns = malloc(insn_data_sz);
  697	if (!prog->insns)
  698		goto errout;
  699	memcpy(prog->insns, insn_data, insn_data_sz);
  700
  701	return 0;
  702errout:
  703	pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
  704	bpf_program__exit(prog);
  705	return -ENOMEM;
  706}
  707
  708static int
  709bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
  710			 const char *sec_name, int sec_idx)
  711{
  712	Elf_Data *symbols = obj->efile.symbols;
  713	struct bpf_program *prog, *progs;
  714	void *data = sec_data->d_buf;
  715	size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
  716	int nr_progs, err, i;
  717	const char *name;
  718	Elf64_Sym *sym;
  719
  720	progs = obj->programs;
  721	nr_progs = obj->nr_programs;
  722	nr_syms = symbols->d_size / sizeof(Elf64_Sym);
  723	sec_off = 0;
  724
  725	for (i = 0; i < nr_syms; i++) {
  726		sym = elf_sym_by_idx(obj, i);
  727
  728		if (sym->st_shndx != sec_idx)
  729			continue;
  730		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
  731			continue;
  732
  733		prog_sz = sym->st_size;
  734		sec_off = sym->st_value;
  735
  736		name = elf_sym_str(obj, sym->st_name);
  737		if (!name) {
  738			pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
  739				sec_name, sec_off);
  740			return -LIBBPF_ERRNO__FORMAT;
  741		}
  742
  743		if (sec_off + prog_sz > sec_sz) {
  744			pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
  745				sec_name, sec_off);
  746			return -LIBBPF_ERRNO__FORMAT;
  747		}
  748
  749		if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
  750			pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
  751			return -ENOTSUP;
  752		}
  753
  754		pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
  755			 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
  756
  757		progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
  758		if (!progs) {
  759			/*
  760			 * In this case the original obj->programs
  761			 * is still valid, so don't need special treat for
  762			 * bpf_close_object().
  763			 */
  764			pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
  765				sec_name, name);
  766			return -ENOMEM;
  767		}
  768		obj->programs = progs;
  769
  770		prog = &progs[nr_progs];
  771
  772		err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
  773					    sec_off, data + sec_off, prog_sz);
  774		if (err)
  775			return err;
  776
  777		/* if function is a global/weak symbol, but has restricted
  778		 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
  779		 * as static to enable more permissive BPF verification mode
  780		 * with more outside context available to BPF verifier
  781		 */
  782		if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
  783		    && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
  784			|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
  785			prog->mark_btf_static = true;
  786
  787		nr_progs++;
  788		obj->nr_programs = nr_progs;
  789	}
  790
  791	return 0;
  792}
  793
  794static __u32 get_kernel_version(void)
  795{
  796	__u32 major, minor, patch;
  797	struct utsname info;
  798
  799	uname(&info);
  800	if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
  801		return 0;
  802	return KERNEL_VERSION(major, minor, patch);
  803}
  804
  805static const struct btf_member *
  806find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
  807{
  808	struct btf_member *m;
  809	int i;
  810
  811	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
  812		if (btf_member_bit_offset(t, i) == bit_offset)
  813			return m;
  814	}
  815
  816	return NULL;
  817}
  818
  819static const struct btf_member *
  820find_member_by_name(const struct btf *btf, const struct btf_type *t,
  821		    const char *name)
  822{
  823	struct btf_member *m;
  824	int i;
  825
  826	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
  827		if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
  828			return m;
  829	}
  830
  831	return NULL;
  832}
  833
  834#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
  835static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
  836				   const char *name, __u32 kind);
  837
  838static int
  839find_struct_ops_kern_types(const struct btf *btf, const char *tname,
  840			   const struct btf_type **type, __u32 *type_id,
  841			   const struct btf_type **vtype, __u32 *vtype_id,
  842			   const struct btf_member **data_member)
  843{
  844	const struct btf_type *kern_type, *kern_vtype;
  845	const struct btf_member *kern_data_member;
  846	__s32 kern_vtype_id, kern_type_id;
  847	__u32 i;
  848
  849	kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
  850	if (kern_type_id < 0) {
  851		pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
  852			tname);
  853		return kern_type_id;
  854	}
  855	kern_type = btf__type_by_id(btf, kern_type_id);
  856
  857	/* Find the corresponding "map_value" type that will be used
  858	 * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
  859	 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
  860	 * btf_vmlinux.
  861	 */
  862	kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
  863						tname, BTF_KIND_STRUCT);
  864	if (kern_vtype_id < 0) {
  865		pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
  866			STRUCT_OPS_VALUE_PREFIX, tname);
  867		return kern_vtype_id;
  868	}
  869	kern_vtype = btf__type_by_id(btf, kern_vtype_id);
  870
  871	/* Find "struct tcp_congestion_ops" from
  872	 * struct bpf_struct_ops_tcp_congestion_ops {
  873	 *	[ ... ]
  874	 *	struct tcp_congestion_ops data;
  875	 * }
  876	 */
  877	kern_data_member = btf_members(kern_vtype);
  878	for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
  879		if (kern_data_member->type == kern_type_id)
  880			break;
  881	}
  882	if (i == btf_vlen(kern_vtype)) {
  883		pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
  884			tname, STRUCT_OPS_VALUE_PREFIX, tname);
  885		return -EINVAL;
  886	}
  887
  888	*type = kern_type;
  889	*type_id = kern_type_id;
  890	*vtype = kern_vtype;
  891	*vtype_id = kern_vtype_id;
  892	*data_member = kern_data_member;
  893
  894	return 0;
  895}
  896
  897static bool bpf_map__is_struct_ops(const struct bpf_map *map)
  898{
  899	return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
  900}
  901
  902/* Init the map's fields that depend on kern_btf */
  903static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
  904					 const struct btf *btf,
  905					 const struct btf *kern_btf)
  906{
  907	const struct btf_member *member, *kern_member, *kern_data_member;
  908	const struct btf_type *type, *kern_type, *kern_vtype;
  909	__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
  910	struct bpf_struct_ops *st_ops;
  911	void *data, *kern_data;
  912	const char *tname;
  913	int err;
  914
  915	st_ops = map->st_ops;
  916	type = st_ops->type;
  917	tname = st_ops->tname;
  918	err = find_struct_ops_kern_types(kern_btf, tname,
  919					 &kern_type, &kern_type_id,
  920					 &kern_vtype, &kern_vtype_id,
  921					 &kern_data_member);
  922	if (err)
  923		return err;
  924
  925	pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
  926		 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
  927
  928	map->def.value_size = kern_vtype->size;
  929	map->btf_vmlinux_value_type_id = kern_vtype_id;
  930
  931	st_ops->kern_vdata = calloc(1, kern_vtype->size);
  932	if (!st_ops->kern_vdata)
  933		return -ENOMEM;
  934
  935	data = st_ops->data;
  936	kern_data_off = kern_data_member->offset / 8;
  937	kern_data = st_ops->kern_vdata + kern_data_off;
  938
  939	member = btf_members(type);
  940	for (i = 0; i < btf_vlen(type); i++, member++) {
  941		const struct btf_type *mtype, *kern_mtype;
  942		__u32 mtype_id, kern_mtype_id;
  943		void *mdata, *kern_mdata;
  944		__s64 msize, kern_msize;
  945		__u32 moff, kern_moff;
  946		__u32 kern_member_idx;
  947		const char *mname;
  948
  949		mname = btf__name_by_offset(btf, member->name_off);
  950		kern_member = find_member_by_name(kern_btf, kern_type, mname);
  951		if (!kern_member) {
  952			pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
  953				map->name, mname);
  954			return -ENOTSUP;
  955		}
  956
  957		kern_member_idx = kern_member - btf_members(kern_type);
  958		if (btf_member_bitfield_size(type, i) ||
  959		    btf_member_bitfield_size(kern_type, kern_member_idx)) {
  960			pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
  961				map->name, mname);
  962			return -ENOTSUP;
  963		}
  964
  965		moff = member->offset / 8;
  966		kern_moff = kern_member->offset / 8;
  967
  968		mdata = data + moff;
  969		kern_mdata = kern_data + kern_moff;
  970
  971		mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
  972		kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
  973						    &kern_mtype_id);
  974		if (BTF_INFO_KIND(mtype->info) !=
  975		    BTF_INFO_KIND(kern_mtype->info)) {
  976			pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
  977				map->name, mname, BTF_INFO_KIND(mtype->info),
  978				BTF_INFO_KIND(kern_mtype->info));
  979			return -ENOTSUP;
  980		}
  981
  982		if (btf_is_ptr(mtype)) {
  983			struct bpf_program *prog;
  984
  985			prog = st_ops->progs[i];
  986			if (!prog)
  987				continue;
  988
  989			kern_mtype = skip_mods_and_typedefs(kern_btf,
  990							    kern_mtype->type,
  991							    &kern_mtype_id);
  992
  993			/* mtype->type must be a func_proto which was
  994			 * guaranteed in bpf_object__collect_st_ops_relos(),
  995			 * so only check kern_mtype for func_proto here.
  996			 */
  997			if (!btf_is_func_proto(kern_mtype)) {
  998				pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
  999					map->name, mname);
 1000				return -ENOTSUP;
 1001			}
 1002
 1003			prog->attach_btf_id = kern_type_id;
 1004			prog->expected_attach_type = kern_member_idx;
 1005
 1006			st_ops->kern_func_off[i] = kern_data_off + kern_moff;
 1007
 1008			pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
 1009				 map->name, mname, prog->name, moff,
 1010				 kern_moff);
 1011
 1012			continue;
 1013		}
 1014
 1015		msize = btf__resolve_size(btf, mtype_id);
 1016		kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
 1017		if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
 1018			pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
 1019				map->name, mname, (ssize_t)msize,
 1020				(ssize_t)kern_msize);
 1021			return -ENOTSUP;
 1022		}
 1023
 1024		pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
 1025			 map->name, mname, (unsigned int)msize,
 1026			 moff, kern_moff);
 1027		memcpy(kern_mdata, mdata, msize);
 1028	}
 1029
 1030	return 0;
 1031}
 1032
 1033static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
 1034{
 1035	struct bpf_map *map;
 1036	size_t i;
 1037	int err;
 1038
 1039	for (i = 0; i < obj->nr_maps; i++) {
 1040		map = &obj->maps[i];
 1041
 1042		if (!bpf_map__is_struct_ops(map))
 1043			continue;
 1044
 1045		err = bpf_map__init_kern_struct_ops(map, obj->btf,
 1046						    obj->btf_vmlinux);
 1047		if (err)
 1048			return err;
 1049	}
 1050
 1051	return 0;
 1052}
 1053
 1054static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 1055{
 1056	const struct btf_type *type, *datasec;
 1057	const struct btf_var_secinfo *vsi;
 1058	struct bpf_struct_ops *st_ops;
 1059	const char *tname, *var_name;
 1060	__s32 type_id, datasec_id;
 1061	const struct btf *btf;
 1062	struct bpf_map *map;
 1063	__u32 i;
 1064
 1065	if (obj->efile.st_ops_shndx == -1)
 1066		return 0;
 1067
 1068	btf = obj->btf;
 1069	datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
 1070					    BTF_KIND_DATASEC);
 1071	if (datasec_id < 0) {
 1072		pr_warn("struct_ops init: DATASEC %s not found\n",
 1073			STRUCT_OPS_SEC);
 1074		return -EINVAL;
 1075	}
 1076
 1077	datasec = btf__type_by_id(btf, datasec_id);
 1078	vsi = btf_var_secinfos(datasec);
 1079	for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
 1080		type = btf__type_by_id(obj->btf, vsi->type);
 1081		var_name = btf__name_by_offset(obj->btf, type->name_off);
 1082
 1083		type_id = btf__resolve_type(obj->btf, vsi->type);
 1084		if (type_id < 0) {
 1085			pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
 1086				vsi->type, STRUCT_OPS_SEC);
 1087			return -EINVAL;
 1088		}
 1089
 1090		type = btf__type_by_id(obj->btf, type_id);
 1091		tname = btf__name_by_offset(obj->btf, type->name_off);
 1092		if (!tname[0]) {
 1093			pr_warn("struct_ops init: anonymous type is not supported\n");
 1094			return -ENOTSUP;
 1095		}
 1096		if (!btf_is_struct(type)) {
 1097			pr_warn("struct_ops init: %s is not a struct\n", tname);
 1098			return -EINVAL;
 1099		}
 1100
 1101		map = bpf_object__add_map(obj);
 1102		if (IS_ERR(map))
 1103			return PTR_ERR(map);
 1104
 1105		map->sec_idx = obj->efile.st_ops_shndx;
 1106		map->sec_offset = vsi->offset;
 1107		map->name = strdup(var_name);
 1108		if (!map->name)
 1109			return -ENOMEM;
 1110
 1111		map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
 1112		map->def.key_size = sizeof(int);
 1113		map->def.value_size = type->size;
 1114		map->def.max_entries = 1;
 1115
 1116		map->st_ops = calloc(1, sizeof(*map->st_ops));
 1117		if (!map->st_ops)
 1118			return -ENOMEM;
 1119		st_ops = map->st_ops;
 1120		st_ops->data = malloc(type->size);
 1121		st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
 1122		st_ops->kern_func_off = malloc(btf_vlen(type) *
 1123					       sizeof(*st_ops->kern_func_off));
 1124		if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
 1125			return -ENOMEM;
 1126
 1127		if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
 1128			pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
 1129				var_name, STRUCT_OPS_SEC);
 1130			return -EINVAL;
 1131		}
 1132
 1133		memcpy(st_ops->data,
 1134		       obj->efile.st_ops_data->d_buf + vsi->offset,
 1135		       type->size);
 1136		st_ops->tname = tname;
 1137		st_ops->type = type;
 1138		st_ops->type_id = type_id;
 1139
 1140		pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
 1141			 tname, type_id, var_name, vsi->offset);
 1142	}
 1143
 1144	return 0;
 1145}
 1146
 1147static struct bpf_object *bpf_object__new(const char *path,
 1148					  const void *obj_buf,
 1149					  size_t obj_buf_sz,
 1150					  const char *obj_name)
 1151{
 1152	bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
 1153	struct bpf_object *obj;
 1154	char *end;
 1155
 1156	obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
 1157	if (!obj) {
 1158		pr_warn("alloc memory failed for %s\n", path);
 1159		return ERR_PTR(-ENOMEM);
 1160	}
 1161
 1162	strcpy(obj->path, path);
 1163	if (obj_name) {
 1164		strncpy(obj->name, obj_name, sizeof(obj->name) - 1);
 1165		obj->name[sizeof(obj->name) - 1] = 0;
 1166	} else {
 1167		/* Using basename() GNU version which doesn't modify arg. */
 1168		strncpy(obj->name, basename((void *)path),
 1169			sizeof(obj->name) - 1);
 1170		end = strchr(obj->name, '.');
 1171		if (end)
 1172			*end = 0;
 1173	}
 1174
 1175	obj->efile.fd = -1;
 1176	/*
 1177	 * Caller of this function should also call
 1178	 * bpf_object__elf_finish() after data collection to return
 1179	 * obj_buf to user. If not, we should duplicate the buffer to
 1180	 * avoid user freeing them before elf finish.
 1181	 */
 1182	obj->efile.obj_buf = obj_buf;
 1183	obj->efile.obj_buf_sz = obj_buf_sz;
 1184	obj->efile.maps_shndx = -1;
 1185	obj->efile.btf_maps_shndx = -1;
 1186	obj->efile.st_ops_shndx = -1;
 1187	obj->kconfig_map_idx = -1;
 1188
 1189	obj->kern_version = get_kernel_version();
 1190	obj->loaded = false;
 1191
 1192	INIT_LIST_HEAD(&obj->list);
 1193	if (!strict)
 1194		list_add(&obj->list, &bpf_objects_list);
 1195	return obj;
 1196}
 1197
 1198static void bpf_object__elf_finish(struct bpf_object *obj)
 1199{
 1200	if (!obj->efile.elf)
 1201		return;
 1202
 1203	if (obj->efile.elf) {
 1204		elf_end(obj->efile.elf);
 1205		obj->efile.elf = NULL;
 1206	}
 1207	obj->efile.symbols = NULL;
 1208	obj->efile.st_ops_data = NULL;
 1209
 1210	zfree(&obj->efile.secs);
 1211	obj->efile.sec_cnt = 0;
 1212	zclose(obj->efile.fd);
 1213	obj->efile.obj_buf = NULL;
 1214	obj->efile.obj_buf_sz = 0;
 1215}
 1216
 1217static int bpf_object__elf_init(struct bpf_object *obj)
 1218{
 1219	Elf64_Ehdr *ehdr;
 1220	int err = 0;
 1221	Elf *elf;
 1222
 1223	if (obj->efile.elf) {
 1224		pr_warn("elf: init internal error\n");
 1225		return -LIBBPF_ERRNO__LIBELF;
 1226	}
 1227
 1228	if (obj->efile.obj_buf_sz > 0) {
 1229		/*
 1230		 * obj_buf should have been validated by
 1231		 * bpf_object__open_buffer().
 1232		 */
 1233		elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
 1234	} else {
 1235		obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
 1236		if (obj->efile.fd < 0) {
 1237			char errmsg[STRERR_BUFSIZE], *cp;
 1238
 1239			err = -errno;
 1240			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 1241			pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
 1242			return err;
 1243		}
 1244
 1245		elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
 1246	}
 1247
 1248	if (!elf) {
 1249		pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
 1250		err = -LIBBPF_ERRNO__LIBELF;
 1251		goto errout;
 1252	}
 1253
 1254	obj->efile.elf = elf;
 1255
 1256	if (elf_kind(elf) != ELF_K_ELF) {
 1257		err = -LIBBPF_ERRNO__FORMAT;
 1258		pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
 1259		goto errout;
 1260	}
 1261
 1262	if (gelf_getclass(elf) != ELFCLASS64) {
 1263		err = -LIBBPF_ERRNO__FORMAT;
 1264		pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
 1265		goto errout;
 1266	}
 1267
 1268	obj->efile.ehdr = ehdr = elf64_getehdr(elf);
 1269	if (!obj->efile.ehdr) {
 1270		pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
 1271		err = -LIBBPF_ERRNO__FORMAT;
 1272		goto errout;
 1273	}
 1274
 1275	if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
 1276		pr_warn("elf: failed to get section names section index for %s: %s\n",
 1277			obj->path, elf_errmsg(-1));
 1278		err = -LIBBPF_ERRNO__FORMAT;
 1279		goto errout;
 1280	}
 1281
 1282	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
 1283	if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
 1284		pr_warn("elf: failed to get section names strings from %s: %s\n",
 1285			obj->path, elf_errmsg(-1));
 1286		err = -LIBBPF_ERRNO__FORMAT;
 1287		goto errout;
 1288	}
 1289
 1290	/* Old LLVM set e_machine to EM_NONE */
 1291	if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
 1292		pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
 1293		err = -LIBBPF_ERRNO__FORMAT;
 1294		goto errout;
 1295	}
 1296
 1297	return 0;
 1298errout:
 1299	bpf_object__elf_finish(obj);
 1300	return err;
 1301}
 1302
 1303static int bpf_object__check_endianness(struct bpf_object *obj)
 1304{
 1305#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 1306	if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
 1307		return 0;
 1308#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 1309	if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
 1310		return 0;
 1311#else
 1312# error "Unrecognized __BYTE_ORDER__"
 1313#endif
 1314	pr_warn("elf: endianness mismatch in %s.\n", obj->path);
 1315	return -LIBBPF_ERRNO__ENDIAN;
 1316}
 1317
 1318static int
 1319bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
 1320{
 1321	memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
 1322	pr_debug("license of %s is %s\n", obj->path, obj->license);
 1323	return 0;
 1324}
 1325
 1326static int
 1327bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
 1328{
 1329	__u32 kver;
 1330
 1331	if (size != sizeof(kver)) {
 1332		pr_warn("invalid kver section in %s\n", obj->path);
 1333		return -LIBBPF_ERRNO__FORMAT;
 1334	}
 1335	memcpy(&kver, data, sizeof(kver));
 1336	obj->kern_version = kver;
 1337	pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
 1338	return 0;
 1339}
 1340
 1341static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
 1342{
 1343	if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 1344	    type == BPF_MAP_TYPE_HASH_OF_MAPS)
 1345		return true;
 1346	return false;
 1347}
 1348
 1349static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
 1350{
 1351	int ret = -ENOENT;
 1352	Elf_Data *data;
 1353	Elf_Scn *scn;
 1354
 1355	*size = 0;
 1356	if (!name)
 1357		return -EINVAL;
 1358
 1359	scn = elf_sec_by_name(obj, name);
 1360	data = elf_sec_data(obj, scn);
 1361	if (data) {
 1362		ret = 0; /* found it */
 1363		*size = data->d_size;
 1364	}
 1365
 1366	return *size ? 0 : ret;
 1367}
 1368
 1369static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off)
 1370{
 1371	Elf_Data *symbols = obj->efile.symbols;
 1372	const char *sname;
 1373	size_t si;
 1374
 1375	if (!name || !off)
 1376		return -EINVAL;
 1377
 1378	for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
 1379		Elf64_Sym *sym = elf_sym_by_idx(obj, si);
 1380
 1381		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL ||
 1382		    ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
 1383			continue;
 1384
 1385		sname = elf_sym_str(obj, sym->st_name);
 1386		if (!sname) {
 1387			pr_warn("failed to get sym name string for var %s\n", name);
 1388			return -EIO;
 1389		}
 1390		if (strcmp(name, sname) == 0) {
 1391			*off = sym->st_value;
 1392			return 0;
 1393		}
 1394	}
 1395
 1396	return -ENOENT;
 1397}
 1398
 1399static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 1400{
 1401	struct bpf_map *new_maps;
 1402	size_t new_cap;
 1403	int i;
 1404
 1405	if (obj->nr_maps < obj->maps_cap)
 1406		return &obj->maps[obj->nr_maps++];
 1407
 1408	new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
 1409	new_maps = libbpf_reallocarray(obj->maps, new_cap, sizeof(*obj->maps));
 1410	if (!new_maps) {
 1411		pr_warn("alloc maps for object failed\n");
 1412		return ERR_PTR(-ENOMEM);
 1413	}
 1414
 1415	obj->maps_cap = new_cap;
 1416	obj->maps = new_maps;
 1417
 1418	/* zero out new maps */
 1419	memset(obj->maps + obj->nr_maps, 0,
 1420	       (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
 1421	/*
 1422	 * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
 1423	 * when failure (zclose won't close negative fd)).
 1424	 */
 1425	for (i = obj->nr_maps; i < obj->maps_cap; i++) {
 1426		obj->maps[i].fd = -1;
 1427		obj->maps[i].inner_map_fd = -1;
 1428	}
 1429
 1430	return &obj->maps[obj->nr_maps++];
 1431}
 1432
 1433static size_t bpf_map_mmap_sz(const struct bpf_map *map)
 1434{
 1435	long page_sz = sysconf(_SC_PAGE_SIZE);
 1436	size_t map_sz;
 1437
 1438	map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
 1439	map_sz = roundup(map_sz, page_sz);
 1440	return map_sz;
 1441}
 1442
 1443static char *internal_map_name(struct bpf_object *obj, const char *real_name)
 1444{
 1445	char map_name[BPF_OBJ_NAME_LEN], *p;
 1446	int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
 1447
 1448	/* This is one of the more confusing parts of libbpf for various
 1449	 * reasons, some of which are historical. The original idea for naming
 1450	 * internal names was to include as much of BPF object name prefix as
 1451	 * possible, so that it can be distinguished from similar internal
 1452	 * maps of a different BPF object.
 1453	 * As an example, let's say we have bpf_object named 'my_object_name'
 1454	 * and internal map corresponding to '.rodata' ELF section. The final
 1455	 * map name advertised to user and to the kernel will be
 1456	 * 'my_objec.rodata', taking first 8 characters of object name and
 1457	 * entire 7 characters of '.rodata'.
 1458	 * Somewhat confusingly, if internal map ELF section name is shorter
 1459	 * than 7 characters, e.g., '.bss', we still reserve 7 characters
 1460	 * for the suffix, even though we only have 4 actual characters, and
 1461	 * resulting map will be called 'my_objec.bss', not even using all 15
 1462	 * characters allowed by the kernel. Oh well, at least the truncated
 1463	 * object name is somewhat consistent in this case. But if the map
 1464	 * name is '.kconfig', we'll still have entirety of '.kconfig' added
 1465	 * (8 chars) and thus will be left with only first 7 characters of the
 1466	 * object name ('my_obje'). Happy guessing, user, that the final map
 1467	 * name will be "my_obje.kconfig".
 1468	 * Now, with libbpf starting to support arbitrarily named .rodata.*
 1469	 * and .data.* data sections, it's possible that ELF section name is
 1470	 * longer than allowed 15 chars, so we now need to be careful to take
 1471	 * only up to 15 first characters of ELF name, taking no BPF object
 1472	 * name characters at all. So '.rodata.abracadabra' will result in
 1473	 * '.rodata.abracad' kernel and user-visible name.
 1474	 * We need to keep this convoluted logic intact for .data, .bss and
 1475	 * .rodata maps, but for new custom .data.custom and .rodata.custom
 1476	 * maps we use their ELF names as is, not prepending bpf_object name
 1477	 * in front. We still need to truncate them to 15 characters for the
 1478	 * kernel. Full name can be recovered for such maps by using DATASEC
 1479	 * BTF type associated with such map's value type, though.
 1480	 */
 1481	if (sfx_len >= BPF_OBJ_NAME_LEN)
 1482		sfx_len = BPF_OBJ_NAME_LEN - 1;
 1483
 1484	/* if there are two or more dots in map name, it's a custom dot map */
 1485	if (strchr(real_name + 1, '.') != NULL)
 1486		pfx_len = 0;
 1487	else
 1488		pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
 1489
 1490	snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
 1491		 sfx_len, real_name);
 1492
 1493	/* sanitise map name to characters allowed by kernel */
 1494	for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
 1495		if (!isalnum(*p) && *p != '_' && *p != '.')
 1496			*p = '_';
 1497
 1498	return strdup(map_name);
 1499}
 1500
 1501static int
 1502bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 1503			      const char *real_name, int sec_idx, void *data, size_t data_sz)
 1504{
 1505	struct bpf_map_def *def;
 1506	struct bpf_map *map;
 1507	int err;
 1508
 1509	map = bpf_object__add_map(obj);
 1510	if (IS_ERR(map))
 1511		return PTR_ERR(map);
 1512
 1513	map->libbpf_type = type;
 1514	map->sec_idx = sec_idx;
 1515	map->sec_offset = 0;
 1516	map->real_name = strdup(real_name);
 1517	map->name = internal_map_name(obj, real_name);
 1518	if (!map->real_name || !map->name) {
 1519		zfree(&map->real_name);
 1520		zfree(&map->name);
 1521		return -ENOMEM;
 1522	}
 1523
 1524	def = &map->def;
 1525	def->type = BPF_MAP_TYPE_ARRAY;
 1526	def->key_size = sizeof(int);
 1527	def->value_size = data_sz;
 1528	def->max_entries = 1;
 1529	def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
 1530			 ? BPF_F_RDONLY_PROG : 0;
 1531	def->map_flags |= BPF_F_MMAPABLE;
 1532
 1533	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
 1534		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
 1535
 1536	map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
 1537			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 1538	if (map->mmaped == MAP_FAILED) {
 1539		err = -errno;
 1540		map->mmaped = NULL;
 1541		pr_warn("failed to alloc map '%s' content buffer: %d\n",
 1542			map->name, err);
 1543		zfree(&map->real_name);
 1544		zfree(&map->name);
 1545		return err;
 1546	}
 1547
 1548	if (data)
 1549		memcpy(map->mmaped, data, data_sz);
 1550
 1551	pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
 1552	return 0;
 1553}
 1554
 1555static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 1556{
 1557	struct elf_sec_desc *sec_desc;
 1558	const char *sec_name;
 1559	int err = 0, sec_idx;
 1560
 1561	/*
 1562	 * Populate obj->maps with libbpf internal maps.
 1563	 */
 1564	for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
 1565		sec_desc = &obj->efile.secs[sec_idx];
 1566
 1567		switch (sec_desc->sec_type) {
 1568		case SEC_DATA:
 1569			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1570			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
 1571							    sec_name, sec_idx,
 1572							    sec_desc->data->d_buf,
 1573							    sec_desc->data->d_size);
 1574			break;
 1575		case SEC_RODATA:
 1576			obj->has_rodata = true;
 1577			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1578			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
 1579							    sec_name, sec_idx,
 1580							    sec_desc->data->d_buf,
 1581							    sec_desc->data->d_size);
 1582			break;
 1583		case SEC_BSS:
 1584			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1585			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
 1586							    sec_name, sec_idx,
 1587							    NULL,
 1588							    sec_desc->data->d_size);
 1589			break;
 1590		default:
 1591			/* skip */
 1592			break;
 1593		}
 1594		if (err)
 1595			return err;
 1596	}
 1597	return 0;
 1598}
 1599
 1600
 1601static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
 1602					       const void *name)
 1603{
 1604	int i;
 1605
 1606	for (i = 0; i < obj->nr_extern; i++) {
 1607		if (strcmp(obj->externs[i].name, name) == 0)
 1608			return &obj->externs[i];
 1609	}
 1610	return NULL;
 1611}
 1612
 1613static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
 1614			      char value)
 1615{
 1616	switch (ext->kcfg.type) {
 1617	case KCFG_BOOL:
 1618		if (value == 'm') {
 1619			pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
 1620				ext->name, value);
 1621			return -EINVAL;
 1622		}
 1623		*(bool *)ext_val = value == 'y' ? true : false;
 1624		break;
 1625	case KCFG_TRISTATE:
 1626		if (value == 'y')
 1627			*(enum libbpf_tristate *)ext_val = TRI_YES;
 1628		else if (value == 'm')
 1629			*(enum libbpf_tristate *)ext_val = TRI_MODULE;
 1630		else /* value == 'n' */
 1631			*(enum libbpf_tristate *)ext_val = TRI_NO;
 1632		break;
 1633	case KCFG_CHAR:
 1634		*(char *)ext_val = value;
 1635		break;
 1636	case KCFG_UNKNOWN:
 1637	case KCFG_INT:
 1638	case KCFG_CHAR_ARR:
 1639	default:
 1640		pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
 1641			ext->name, value);
 1642		return -EINVAL;
 1643	}
 1644	ext->is_set = true;
 1645	return 0;
 1646}
 1647
 1648static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
 1649			      const char *value)
 1650{
 1651	size_t len;
 1652
 1653	if (ext->kcfg.type != KCFG_CHAR_ARR) {
 1654		pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
 1655		return -EINVAL;
 1656	}
 1657
 1658	len = strlen(value);
 1659	if (value[len - 1] != '"') {
 1660		pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
 1661			ext->name, value);
 1662		return -EINVAL;
 1663	}
 1664
 1665	/* strip quotes */
 1666	len -= 2;
 1667	if (len >= ext->kcfg.sz) {
 1668		pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
 1669			ext->name, value, len, ext->kcfg.sz - 1);
 1670		len = ext->kcfg.sz - 1;
 1671	}
 1672	memcpy(ext_val, value + 1, len);
 1673	ext_val[len] = '\0';
 1674	ext->is_set = true;
 1675	return 0;
 1676}
 1677
 1678static int parse_u64(const char *value, __u64 *res)
 1679{
 1680	char *value_end;
 1681	int err;
 1682
 1683	errno = 0;
 1684	*res = strtoull(value, &value_end, 0);
 1685	if (errno) {
 1686		err = -errno;
 1687		pr_warn("failed to parse '%s' as integer: %d\n", value, err);
 1688		return err;
 1689	}
 1690	if (*value_end) {
 1691		pr_warn("failed to parse '%s' as integer completely\n", value);
 1692		return -EINVAL;
 1693	}
 1694	return 0;
 1695}
 1696
 1697static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
 1698{
 1699	int bit_sz = ext->kcfg.sz * 8;
 1700
 1701	if (ext->kcfg.sz == 8)
 1702		return true;
 1703
 1704	/* Validate that value stored in u64 fits in integer of `ext->sz`
 1705	 * bytes size without any loss of information. If the target integer
 1706	 * is signed, we rely on the following limits of integer type of
 1707	 * Y bits and subsequent transformation:
 1708	 *
 1709	 *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
 1710	 *            0 <= X + 2^(Y-1) <= 2^Y - 1
 1711	 *            0 <= X + 2^(Y-1) <  2^Y
 1712	 *
 1713	 *  For unsigned target integer, check that all the (64 - Y) bits are
 1714	 *  zero.
 1715	 */
 1716	if (ext->kcfg.is_signed)
 1717		return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
 1718	else
 1719		return (v >> bit_sz) == 0;
 1720}
 1721
 1722static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
 1723			      __u64 value)
 1724{
 1725	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
 1726		pr_warn("extern (kcfg) %s=%llu should be integer\n",
 1727			ext->name, (unsigned long long)value);
 1728		return -EINVAL;
 1729	}
 1730	if (!is_kcfg_value_in_range(ext, value)) {
 1731		pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
 1732			ext->name, (unsigned long long)value, ext->kcfg.sz);
 1733		return -ERANGE;
 1734	}
 1735	switch (ext->kcfg.sz) {
 1736		case 1: *(__u8 *)ext_val = value; break;
 1737		case 2: *(__u16 *)ext_val = value; break;
 1738		case 4: *(__u32 *)ext_val = value; break;
 1739		case 8: *(__u64 *)ext_val = value; break;
 1740		default:
 1741			return -EINVAL;
 1742	}
 1743	ext->is_set = true;
 1744	return 0;
 1745}
 1746
 1747static int bpf_object__process_kconfig_line(struct bpf_object *obj,
 1748					    char *buf, void *data)
 1749{
 1750	struct extern_desc *ext;
 1751	char *sep, *value;
 1752	int len, err = 0;
 1753	void *ext_val;
 1754	__u64 num;
 1755
 1756	if (!str_has_pfx(buf, "CONFIG_"))
 1757		return 0;
 1758
 1759	sep = strchr(buf, '=');
 1760	if (!sep) {
 1761		pr_warn("failed to parse '%s': no separator\n", buf);
 1762		return -EINVAL;
 1763	}
 1764
 1765	/* Trim ending '\n' */
 1766	len = strlen(buf);
 1767	if (buf[len - 1] == '\n')
 1768		buf[len - 1] = '\0';
 1769	/* Split on '=' and ensure that a value is present. */
 1770	*sep = '\0';
 1771	if (!sep[1]) {
 1772		*sep = '=';
 1773		pr_warn("failed to parse '%s': no value\n", buf);
 1774		return -EINVAL;
 1775	}
 1776
 1777	ext = find_extern_by_name(obj, buf);
 1778	if (!ext || ext->is_set)
 1779		return 0;
 1780
 1781	ext_val = data + ext->kcfg.data_off;
 1782	value = sep + 1;
 1783
 1784	switch (*value) {
 1785	case 'y': case 'n': case 'm':
 1786		err = set_kcfg_value_tri(ext, ext_val, *value);
 1787		break;
 1788	case '"':
 1789		err = set_kcfg_value_str(ext, ext_val, value);
 1790		break;
 1791	default:
 1792		/* assume integer */
 1793		err = parse_u64(value, &num);
 1794		if (err) {
 1795			pr_warn("extern (kcfg) %s=%s should be integer\n",
 1796				ext->name, value);
 1797			return err;
 1798		}
 1799		err = set_kcfg_value_num(ext, ext_val, num);
 1800		break;
 1801	}
 1802	if (err)
 1803		return err;
 1804	pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
 1805	return 0;
 1806}
 1807
 1808static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
 1809{
 1810	char buf[PATH_MAX];
 1811	struct utsname uts;
 1812	int len, err = 0;
 1813	gzFile file;
 1814
 1815	uname(&uts);
 1816	len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
 1817	if (len < 0)
 1818		return -EINVAL;
 1819	else if (len >= PATH_MAX)
 1820		return -ENAMETOOLONG;
 1821
 1822	/* gzopen also accepts uncompressed files. */
 1823	file = gzopen(buf, "r");
 1824	if (!file)
 1825		file = gzopen("/proc/config.gz", "r");
 1826
 1827	if (!file) {
 1828		pr_warn("failed to open system Kconfig\n");
 1829		return -ENOENT;
 1830	}
 1831
 1832	while (gzgets(file, buf, sizeof(buf))) {
 1833		err = bpf_object__process_kconfig_line(obj, buf, data);
 1834		if (err) {
 1835			pr_warn("error parsing system Kconfig line '%s': %d\n",
 1836				buf, err);
 1837			goto out;
 1838		}
 1839	}
 1840
 1841out:
 1842	gzclose(file);
 1843	return err;
 1844}
 1845
 1846static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
 1847					const char *config, void *data)
 1848{
 1849	char buf[PATH_MAX];
 1850	int err = 0;
 1851	FILE *file;
 1852
 1853	file = fmemopen((void *)config, strlen(config), "r");
 1854	if (!file) {
 1855		err = -errno;
 1856		pr_warn("failed to open in-memory Kconfig: %d\n", err);
 1857		return err;
 1858	}
 1859
 1860	while (fgets(buf, sizeof(buf), file)) {
 1861		err = bpf_object__process_kconfig_line(obj, buf, data);
 1862		if (err) {
 1863			pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
 1864				buf, err);
 1865			break;
 1866		}
 1867	}
 1868
 1869	fclose(file);
 1870	return err;
 1871}
 1872
 1873static int bpf_object__init_kconfig_map(struct bpf_object *obj)
 1874{
 1875	struct extern_desc *last_ext = NULL, *ext;
 1876	size_t map_sz;
 1877	int i, err;
 1878
 1879	for (i = 0; i < obj->nr_extern; i++) {
 1880		ext = &obj->externs[i];
 1881		if (ext->type == EXT_KCFG)
 1882			last_ext = ext;
 1883	}
 1884
 1885	if (!last_ext)
 1886		return 0;
 1887
 1888	map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
 1889	err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
 1890					    ".kconfig", obj->efile.symbols_shndx,
 1891					    NULL, map_sz);
 1892	if (err)
 1893		return err;
 1894
 1895	obj->kconfig_map_idx = obj->nr_maps - 1;
 1896
 1897	return 0;
 1898}
 1899
 1900static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
 1901{
 1902	Elf_Data *symbols = obj->efile.symbols;
 1903	int i, map_def_sz = 0, nr_maps = 0, nr_syms;
 1904	Elf_Data *data = NULL;
 1905	Elf_Scn *scn;
 1906
 1907	if (obj->efile.maps_shndx < 0)
 1908		return 0;
 1909
 1910	if (!symbols)
 1911		return -EINVAL;
 1912
 1913	scn = elf_sec_by_idx(obj, obj->efile.maps_shndx);
 1914	data = elf_sec_data(obj, scn);
 1915	if (!scn || !data) {
 1916		pr_warn("elf: failed to get legacy map definitions for %s\n",
 1917			obj->path);
 1918		return -EINVAL;
 1919	}
 1920
 1921	/*
 1922	 * Count number of maps. Each map has a name.
 1923	 * Array of maps is not supported: only the first element is
 1924	 * considered.
 1925	 *
 1926	 * TODO: Detect array of map and report error.
 1927	 */
 1928	nr_syms = symbols->d_size / sizeof(Elf64_Sym);
 1929	for (i = 0; i < nr_syms; i++) {
 1930		Elf64_Sym *sym = elf_sym_by_idx(obj, i);
 1931
 1932		if (sym->st_shndx != obj->efile.maps_shndx)
 1933			continue;
 1934		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
 1935			continue;
 1936		nr_maps++;
 1937	}
 1938	/* Assume equally sized map definitions */
 1939	pr_debug("elf: found %d legacy map definitions (%zd bytes) in %s\n",
 1940		 nr_maps, data->d_size, obj->path);
 1941
 1942	if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) {
 1943		pr_warn("elf: unable to determine legacy map definition size in %s\n",
 1944			obj->path);
 1945		return -EINVAL;
 1946	}
 1947	map_def_sz = data->d_size / nr_maps;
 1948
 1949	/* Fill obj->maps using data in "maps" section.  */
 1950	for (i = 0; i < nr_syms; i++) {
 1951		Elf64_Sym *sym = elf_sym_by_idx(obj, i);
 1952		const char *map_name;
 1953		struct bpf_map_def *def;
 1954		struct bpf_map *map;
 1955
 1956		if (sym->st_shndx != obj->efile.maps_shndx)
 1957			continue;
 1958		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION)
 1959			continue;
 1960
 1961		map = bpf_object__add_map(obj);
 1962		if (IS_ERR(map))
 1963			return PTR_ERR(map);
 1964
 1965		map_name = elf_sym_str(obj, sym->st_name);
 1966		if (!map_name) {
 1967			pr_warn("failed to get map #%d name sym string for obj %s\n",
 1968				i, obj->path);
 1969			return -LIBBPF_ERRNO__FORMAT;
 1970		}
 1971
 1972		if (ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
 1973			pr_warn("map '%s' (legacy): static maps are not supported\n", map_name);
 1974			return -ENOTSUP;
 1975		}
 1976
 1977		map->libbpf_type = LIBBPF_MAP_UNSPEC;
 1978		map->sec_idx = sym->st_shndx;
 1979		map->sec_offset = sym->st_value;
 1980		pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
 1981			 map_name, map->sec_idx, map->sec_offset);
 1982		if (sym->st_value + map_def_sz > data->d_size) {
 1983			pr_warn("corrupted maps section in %s: last map \"%s\" too small\n",
 1984				obj->path, map_name);
 1985			return -EINVAL;
 1986		}
 1987
 1988		map->name = strdup(map_name);
 1989		if (!map->name) {
 1990			pr_warn("map '%s': failed to alloc map name\n", map_name);
 1991			return -ENOMEM;
 1992		}
 1993		pr_debug("map %d is \"%s\"\n", i, map->name);
 1994		def = (struct bpf_map_def *)(data->d_buf + sym->st_value);
 1995		/*
 1996		 * If the definition of the map in the object file fits in
 1997		 * bpf_map_def, copy it.  Any extra fields in our version
 1998		 * of bpf_map_def will default to zero as a result of the
 1999		 * calloc above.
 2000		 */
 2001		if (map_def_sz <= sizeof(struct bpf_map_def)) {
 2002			memcpy(&map->def, def, map_def_sz);
 2003		} else {
 2004			/*
 2005			 * Here the map structure being read is bigger than what
 2006			 * we expect, truncate if the excess bits are all zero.
 2007			 * If they are not zero, reject this map as
 2008			 * incompatible.
 2009			 */
 2010			char *b;
 2011
 2012			for (b = ((char *)def) + sizeof(struct bpf_map_def);
 2013			     b < ((char *)def) + map_def_sz; b++) {
 2014				if (*b != 0) {
 2015					pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n",
 2016						obj->path, map_name);
 2017					if (strict)
 2018						return -EINVAL;
 2019				}
 2020			}
 2021			memcpy(&map->def, def, sizeof(struct bpf_map_def));
 2022		}
 2023	}
 2024	return 0;
 2025}
 2026
 2027const struct btf_type *
 2028skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
 2029{
 2030	const struct btf_type *t = btf__type_by_id(btf, id);
 2031
 2032	if (res_id)
 2033		*res_id = id;
 2034
 2035	while (btf_is_mod(t) || btf_is_typedef(t)) {
 2036		if (res_id)
 2037			*res_id = t->type;
 2038		t = btf__type_by_id(btf, t->type);
 2039	}
 2040
 2041	return t;
 2042}
 2043
 2044static const struct btf_type *
 2045resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
 2046{
 2047	const struct btf_type *t;
 2048
 2049	t = skip_mods_and_typedefs(btf, id, NULL);
 2050	if (!btf_is_ptr(t))
 2051		return NULL;
 2052
 2053	t = skip_mods_and_typedefs(btf, t->type, res_id);
 2054
 2055	return btf_is_func_proto(t) ? t : NULL;
 2056}
 2057
 2058static const char *__btf_kind_str(__u16 kind)
 2059{
 2060	switch (kind) {
 2061	case BTF_KIND_UNKN: return "void";
 2062	case BTF_KIND_INT: return "int";
 2063	case BTF_KIND_PTR: return "ptr";
 2064	case BTF_KIND_ARRAY: return "array";
 2065	case BTF_KIND_STRUCT: return "struct";
 2066	case BTF_KIND_UNION: return "union";
 2067	case BTF_KIND_ENUM: return "enum";
 2068	case BTF_KIND_FWD: return "fwd";
 2069	case BTF_KIND_TYPEDEF: return "typedef";
 2070	case BTF_KIND_VOLATILE: return "volatile";
 2071	case BTF_KIND_CONST: return "const";
 2072	case BTF_KIND_RESTRICT: return "restrict";
 2073	case BTF_KIND_FUNC: return "func";
 2074	case BTF_KIND_FUNC_PROTO: return "func_proto";
 2075	case BTF_KIND_VAR: return "var";
 2076	case BTF_KIND_DATASEC: return "datasec";
 2077	case BTF_KIND_FLOAT: return "float";
 2078	case BTF_KIND_DECL_TAG: return "decl_tag";
 2079	default: return "unknown";
 2080	}
 2081}
 2082
 2083const char *btf_kind_str(const struct btf_type *t)
 2084{
 2085	return __btf_kind_str(btf_kind(t));
 2086}
 2087
 2088/*
 2089 * Fetch integer attribute of BTF map definition. Such attributes are
 2090 * represented using a pointer to an array, in which dimensionality of array
 2091 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
 2092 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
 2093 * type definition, while using only sizeof(void *) space in ELF data section.
 2094 */
 2095static bool get_map_field_int(const char *map_name, const struct btf *btf,
 2096			      const struct btf_member *m, __u32 *res)
 2097{
 2098	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
 2099	const char *name = btf__name_by_offset(btf, m->name_off);
 2100	const struct btf_array *arr_info;
 2101	const struct btf_type *arr_t;
 2102
 2103	if (!btf_is_ptr(t)) {
 2104		pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
 2105			map_name, name, btf_kind_str(t));
 2106		return false;
 2107	}
 2108
 2109	arr_t = btf__type_by_id(btf, t->type);
 2110	if (!arr_t) {
 2111		pr_warn("map '%s': attr '%s': type [%u] not found.\n",
 2112			map_name, name, t->type);
 2113		return false;
 2114	}
 2115	if (!btf_is_array(arr_t)) {
 2116		pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
 2117			map_name, name, btf_kind_str(arr_t));
 2118		return false;
 2119	}
 2120	arr_info = btf_array(arr_t);
 2121	*res = arr_info->nelems;
 2122	return true;
 2123}
 2124
 2125static int build_map_pin_path(struct bpf_map *map, const char *path)
 2126{
 2127	char buf[PATH_MAX];
 2128	int len;
 2129
 2130	if (!path)
 2131		path = "/sys/fs/bpf";
 2132
 2133	len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
 2134	if (len < 0)
 2135		return -EINVAL;
 2136	else if (len >= PATH_MAX)
 2137		return -ENAMETOOLONG;
 2138
 2139	return bpf_map__set_pin_path(map, buf);
 2140}
 2141
 2142int parse_btf_map_def(const char *map_name, struct btf *btf,
 2143		      const struct btf_type *def_t, bool strict,
 2144		      struct btf_map_def *map_def, struct btf_map_def *inner_def)
 2145{
 2146	const struct btf_type *t;
 2147	const struct btf_member *m;
 2148	bool is_inner = inner_def == NULL;
 2149	int vlen, i;
 2150
 2151	vlen = btf_vlen(def_t);
 2152	m = btf_members(def_t);
 2153	for (i = 0; i < vlen; i++, m++) {
 2154		const char *name = btf__name_by_offset(btf, m->name_off);
 2155
 2156		if (!name) {
 2157			pr_warn("map '%s': invalid field #%d.\n", map_name, i);
 2158			return -EINVAL;
 2159		}
 2160		if (strcmp(name, "type") == 0) {
 2161			if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
 2162				return -EINVAL;
 2163			map_def->parts |= MAP_DEF_MAP_TYPE;
 2164		} else if (strcmp(name, "max_entries") == 0) {
 2165			if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
 2166				return -EINVAL;
 2167			map_def->parts |= MAP_DEF_MAX_ENTRIES;
 2168		} else if (strcmp(name, "map_flags") == 0) {
 2169			if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
 2170				return -EINVAL;
 2171			map_def->parts |= MAP_DEF_MAP_FLAGS;
 2172		} else if (strcmp(name, "numa_node") == 0) {
 2173			if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
 2174				return -EINVAL;
 2175			map_def->parts |= MAP_DEF_NUMA_NODE;
 2176		} else if (strcmp(name, "key_size") == 0) {
 2177			__u32 sz;
 2178
 2179			if (!get_map_field_int(map_name, btf, m, &sz))
 2180				return -EINVAL;
 2181			if (map_def->key_size && map_def->key_size != sz) {
 2182				pr_warn("map '%s': conflicting key size %u != %u.\n",
 2183					map_name, map_def->key_size, sz);
 2184				return -EINVAL;
 2185			}
 2186			map_def->key_size = sz;
 2187			map_def->parts |= MAP_DEF_KEY_SIZE;
 2188		} else if (strcmp(name, "key") == 0) {
 2189			__s64 sz;
 2190
 2191			t = btf__type_by_id(btf, m->type);
 2192			if (!t) {
 2193				pr_warn("map '%s': key type [%d] not found.\n",
 2194					map_name, m->type);
 2195				return -EINVAL;
 2196			}
 2197			if (!btf_is_ptr(t)) {
 2198				pr_warn("map '%s': key spec is not PTR: %s.\n",
 2199					map_name, btf_kind_str(t));
 2200				return -EINVAL;
 2201			}
 2202			sz = btf__resolve_size(btf, t->type);
 2203			if (sz < 0) {
 2204				pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
 2205					map_name, t->type, (ssize_t)sz);
 2206				return sz;
 2207			}
 2208			if (map_def->key_size && map_def->key_size != sz) {
 2209				pr_warn("map '%s': conflicting key size %u != %zd.\n",
 2210					map_name, map_def->key_size, (ssize_t)sz);
 2211				return -EINVAL;
 2212			}
 2213			map_def->key_size = sz;
 2214			map_def->key_type_id = t->type;
 2215			map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
 2216		} else if (strcmp(name, "value_size") == 0) {
 2217			__u32 sz;
 2218
 2219			if (!get_map_field_int(map_name, btf, m, &sz))
 2220				return -EINVAL;
 2221			if (map_def->value_size && map_def->value_size != sz) {
 2222				pr_warn("map '%s': conflicting value size %u != %u.\n",
 2223					map_name, map_def->value_size, sz);
 2224				return -EINVAL;
 2225			}
 2226			map_def->value_size = sz;
 2227			map_def->parts |= MAP_DEF_VALUE_SIZE;
 2228		} else if (strcmp(name, "value") == 0) {
 2229			__s64 sz;
 2230
 2231			t = btf__type_by_id(btf, m->type);
 2232			if (!t) {
 2233				pr_warn("map '%s': value type [%d] not found.\n",
 2234					map_name, m->type);
 2235				return -EINVAL;
 2236			}
 2237			if (!btf_is_ptr(t)) {
 2238				pr_warn("map '%s': value spec is not PTR: %s.\n",
 2239					map_name, btf_kind_str(t));
 2240				return -EINVAL;
 2241			}
 2242			sz = btf__resolve_size(btf, t->type);
 2243			if (sz < 0) {
 2244				pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
 2245					map_name, t->type, (ssize_t)sz);
 2246				return sz;
 2247			}
 2248			if (map_def->value_size && map_def->value_size != sz) {
 2249				pr_warn("map '%s': conflicting value size %u != %zd.\n",
 2250					map_name, map_def->value_size, (ssize_t)sz);
 2251				return -EINVAL;
 2252			}
 2253			map_def->value_size = sz;
 2254			map_def->value_type_id = t->type;
 2255			map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
 2256		}
 2257		else if (strcmp(name, "values") == 0) {
 2258			char inner_map_name[128];
 2259			int err;
 2260
 2261			if (is_inner) {
 2262				pr_warn("map '%s': multi-level inner maps not supported.\n",
 2263					map_name);
 2264				return -ENOTSUP;
 2265			}
 2266			if (i != vlen - 1) {
 2267				pr_warn("map '%s': '%s' member should be last.\n",
 2268					map_name, name);
 2269				return -EINVAL;
 2270			}
 2271			if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
 2272				pr_warn("map '%s': should be map-in-map.\n",
 2273					map_name);
 2274				return -ENOTSUP;
 2275			}
 2276			if (map_def->value_size && map_def->value_size != 4) {
 2277				pr_warn("map '%s': conflicting value size %u != 4.\n",
 2278					map_name, map_def->value_size);
 2279				return -EINVAL;
 2280			}
 2281			map_def->value_size = 4;
 2282			t = btf__type_by_id(btf, m->type);
 2283			if (!t) {
 2284				pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
 2285					map_name, m->type);
 2286				return -EINVAL;
 2287			}
 2288			if (!btf_is_array(t) || btf_array(t)->nelems) {
 2289				pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
 2290					map_name);
 2291				return -EINVAL;
 2292			}
 2293			t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
 2294			if (!btf_is_ptr(t)) {
 2295				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
 2296					map_name, btf_kind_str(t));
 2297				return -EINVAL;
 2298			}
 2299			t = skip_mods_and_typedefs(btf, t->type, NULL);
 2300			if (!btf_is_struct(t)) {
 2301				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
 2302					map_name, btf_kind_str(t));
 2303				return -EINVAL;
 2304			}
 2305
 2306			snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
 2307			err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
 2308			if (err)
 2309				return err;
 2310
 2311			map_def->parts |= MAP_DEF_INNER_MAP;
 2312		} else if (strcmp(name, "pinning") == 0) {
 2313			__u32 val;
 2314
 2315			if (is_inner) {
 2316				pr_warn("map '%s': inner def can't be pinned.\n", map_name);
 2317				return -EINVAL;
 2318			}
 2319			if (!get_map_field_int(map_name, btf, m, &val))
 2320				return -EINVAL;
 2321			if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
 2322				pr_warn("map '%s': invalid pinning value %u.\n",
 2323					map_name, val);
 2324				return -EINVAL;
 2325			}
 2326			map_def->pinning = val;
 2327			map_def->parts |= MAP_DEF_PINNING;
 2328		} else if (strcmp(name, "map_extra") == 0) {
 2329			__u32 map_extra;
 2330
 2331			if (!get_map_field_int(map_name, btf, m, &map_extra))
 2332				return -EINVAL;
 2333			map_def->map_extra = map_extra;
 2334			map_def->parts |= MAP_DEF_MAP_EXTRA;
 2335		} else {
 2336			if (strict) {
 2337				pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
 2338				return -ENOTSUP;
 2339			}
 2340			pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
 2341		}
 2342	}
 2343
 2344	if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
 2345		pr_warn("map '%s': map type isn't specified.\n", map_name);
 2346		return -EINVAL;
 2347	}
 2348
 2349	return 0;
 2350}
 2351
 2352static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
 2353{
 2354	map->def.type = def->map_type;
 2355	map->def.key_size = def->key_size;
 2356	map->def.value_size = def->value_size;
 2357	map->def.max_entries = def->max_entries;
 2358	map->def.map_flags = def->map_flags;
 2359	map->map_extra = def->map_extra;
 2360
 2361	map->numa_node = def->numa_node;
 2362	map->btf_key_type_id = def->key_type_id;
 2363	map->btf_value_type_id = def->value_type_id;
 2364
 2365	if (def->parts & MAP_DEF_MAP_TYPE)
 2366		pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
 2367
 2368	if (def->parts & MAP_DEF_KEY_TYPE)
 2369		pr_debug("map '%s': found key [%u], sz = %u.\n",
 2370			 map->name, def->key_type_id, def->key_size);
 2371	else if (def->parts & MAP_DEF_KEY_SIZE)
 2372		pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
 2373
 2374	if (def->parts & MAP_DEF_VALUE_TYPE)
 2375		pr_debug("map '%s': found value [%u], sz = %u.\n",
 2376			 map->name, def->value_type_id, def->value_size);
 2377	else if (def->parts & MAP_DEF_VALUE_SIZE)
 2378		pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
 2379
 2380	if (def->parts & MAP_DEF_MAX_ENTRIES)
 2381		pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
 2382	if (def->parts & MAP_DEF_MAP_FLAGS)
 2383		pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
 2384	if (def->parts & MAP_DEF_MAP_EXTRA)
 2385		pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
 2386			 (unsigned long long)def->map_extra);
 2387	if (def->parts & MAP_DEF_PINNING)
 2388		pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
 2389	if (def->parts & MAP_DEF_NUMA_NODE)
 2390		pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
 2391
 2392	if (def->parts & MAP_DEF_INNER_MAP)
 2393		pr_debug("map '%s': found inner map definition.\n", map->name);
 2394}
 2395
 2396static const char *btf_var_linkage_str(__u32 linkage)
 2397{
 2398	switch (linkage) {
 2399	case BTF_VAR_STATIC: return "static";
 2400	case BTF_VAR_GLOBAL_ALLOCATED: return "global";
 2401	case BTF_VAR_GLOBAL_EXTERN: return "extern";
 2402	default: return "unknown";
 2403	}
 2404}
 2405
 2406static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 2407					 const struct btf_type *sec,
 2408					 int var_idx, int sec_idx,
 2409					 const Elf_Data *data, bool strict,
 2410					 const char *pin_root_path)
 2411{
 2412	struct btf_map_def map_def = {}, inner_def = {};
 2413	const struct btf_type *var, *def;
 2414	const struct btf_var_secinfo *vi;
 2415	const struct btf_var *var_extra;
 2416	const char *map_name;
 2417	struct bpf_map *map;
 2418	int err;
 2419
 2420	vi = btf_var_secinfos(sec) + var_idx;
 2421	var = btf__type_by_id(obj->btf, vi->type);
 2422	var_extra = btf_var(var);
 2423	map_name = btf__name_by_offset(obj->btf, var->name_off);
 2424
 2425	if (map_name == NULL || map_name[0] == '\0') {
 2426		pr_warn("map #%d: empty name.\n", var_idx);
 2427		return -EINVAL;
 2428	}
 2429	if ((__u64)vi->offset + vi->size > data->d_size) {
 2430		pr_warn("map '%s' BTF data is corrupted.\n", map_name);
 2431		return -EINVAL;
 2432	}
 2433	if (!btf_is_var(var)) {
 2434		pr_warn("map '%s': unexpected var kind %s.\n",
 2435			map_name, btf_kind_str(var));
 2436		return -EINVAL;
 2437	}
 2438	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
 2439		pr_warn("map '%s': unsupported map linkage %s.\n",
 2440			map_name, btf_var_linkage_str(var_extra->linkage));
 2441		return -EOPNOTSUPP;
 2442	}
 2443
 2444	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 2445	if (!btf_is_struct(def)) {
 2446		pr_warn("map '%s': unexpected def kind %s.\n",
 2447			map_name, btf_kind_str(var));
 2448		return -EINVAL;
 2449	}
 2450	if (def->size > vi->size) {
 2451		pr_warn("map '%s': invalid def size.\n", map_name);
 2452		return -EINVAL;
 2453	}
 2454
 2455	map = bpf_object__add_map(obj);
 2456	if (IS_ERR(map))
 2457		return PTR_ERR(map);
 2458	map->name = strdup(map_name);
 2459	if (!map->name) {
 2460		pr_warn("map '%s': failed to alloc map name.\n", map_name);
 2461		return -ENOMEM;
 2462	}
 2463	map->libbpf_type = LIBBPF_MAP_UNSPEC;
 2464	map->def.type = BPF_MAP_TYPE_UNSPEC;
 2465	map->sec_idx = sec_idx;
 2466	map->sec_offset = vi->offset;
 2467	map->btf_var_idx = var_idx;
 2468	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
 2469		 map_name, map->sec_idx, map->sec_offset);
 2470
 2471	err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
 2472	if (err)
 2473		return err;
 2474
 2475	fill_map_from_def(map, &map_def);
 2476
 2477	if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
 2478		err = build_map_pin_path(map, pin_root_path);
 2479		if (err) {
 2480			pr_warn("map '%s': couldn't build pin path.\n", map->name);
 2481			return err;
 2482		}
 2483	}
 2484
 2485	if (map_def.parts & MAP_DEF_INNER_MAP) {
 2486		map->inner_map = calloc(1, sizeof(*map->inner_map));
 2487		if (!map->inner_map)
 2488			return -ENOMEM;
 2489		map->inner_map->fd = -1;
 2490		map->inner_map->sec_idx = sec_idx;
 2491		map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
 2492		if (!map->inner_map->name)
 2493			return -ENOMEM;
 2494		sprintf(map->inner_map->name, "%s.inner", map_name);
 2495
 2496		fill_map_from_def(map->inner_map, &inner_def);
 2497	}
 2498
 2499	return 0;
 2500}
 2501
 2502static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 2503					  const char *pin_root_path)
 2504{
 2505	const struct btf_type *sec = NULL;
 2506	int nr_types, i, vlen, err;
 2507	const struct btf_type *t;
 2508	const char *name;
 2509	Elf_Data *data;
 2510	Elf_Scn *scn;
 2511
 2512	if (obj->efile.btf_maps_shndx < 0)
 2513		return 0;
 2514
 2515	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
 2516	data = elf_sec_data(obj, scn);
 2517	if (!scn || !data) {
 2518		pr_warn("elf: failed to get %s map definitions for %s\n",
 2519			MAPS_ELF_SEC, obj->path);
 2520		return -EINVAL;
 2521	}
 2522
 2523	nr_types = btf__type_cnt(obj->btf);
 2524	for (i = 1; i < nr_types; i++) {
 2525		t = btf__type_by_id(obj->btf, i);
 2526		if (!btf_is_datasec(t))
 2527			continue;
 2528		name = btf__name_by_offset(obj->btf, t->name_off);
 2529		if (strcmp(name, MAPS_ELF_SEC) == 0) {
 2530			sec = t;
 2531			obj->efile.btf_maps_sec_btf_id = i;
 2532			break;
 2533		}
 2534	}
 2535
 2536	if (!sec) {
 2537		pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
 2538		return -ENOENT;
 2539	}
 2540
 2541	vlen = btf_vlen(sec);
 2542	for (i = 0; i < vlen; i++) {
 2543		err = bpf_object__init_user_btf_map(obj, sec, i,
 2544						    obj->efile.btf_maps_shndx,
 2545						    data, strict,
 2546						    pin_root_path);
 2547		if (err)
 2548			return err;
 2549	}
 2550
 2551	return 0;
 2552}
 2553
 2554static int bpf_object__init_maps(struct bpf_object *obj,
 2555				 const struct bpf_object_open_opts *opts)
 2556{
 2557	const char *pin_root_path;
 2558	bool strict;
 2559	int err;
 2560
 2561	strict = !OPTS_GET(opts, relaxed_maps, false);
 2562	pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
 2563
 2564	err = bpf_object__init_user_maps(obj, strict);
 2565	err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
 2566	err = err ?: bpf_object__init_global_data_maps(obj);
 2567	err = err ?: bpf_object__init_kconfig_map(obj);
 2568	err = err ?: bpf_object__init_struct_ops_maps(obj);
 2569
 2570	return err;
 2571}
 2572
 2573static bool section_have_execinstr(struct bpf_object *obj, int idx)
 2574{
 2575	Elf64_Shdr *sh;
 2576
 2577	sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
 2578	if (!sh)
 2579		return false;
 2580
 2581	return sh->sh_flags & SHF_EXECINSTR;
 2582}
 2583
 2584static bool btf_needs_sanitization(struct bpf_object *obj)
 2585{
 2586	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
 2587	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 2588	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 2589	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 2590	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
 2591
 2592	return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag;
 2593}
 2594
 2595static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 2596{
 2597	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
 2598	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 2599	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 2600	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 2601	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
 2602	struct btf_type *t;
 2603	int i, j, vlen;
 2604
 2605	for (i = 1; i < btf__type_cnt(btf); i++) {
 2606		t = (struct btf_type *)btf__type_by_id(btf, i);
 2607
 2608		if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
 2609			/* replace VAR/DECL_TAG with INT */
 2610			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
 2611			/*
 2612			 * using size = 1 is the safest choice, 4 will be too
 2613			 * big and cause kernel BTF validation failure if
 2614			 * original variable took less than 4 bytes
 2615			 */
 2616			t->size = 1;
 2617			*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
 2618		} else if (!has_datasec && btf_is_datasec(t)) {
 2619			/* replace DATASEC with STRUCT */
 2620			const struct btf_var_secinfo *v = btf_var_secinfos(t);
 2621			struct btf_member *m = btf_members(t);
 2622			struct btf_type *vt;
 2623			char *name;
 2624
 2625			name = (char *)btf__name_by_offset(btf, t->name_off);
 2626			while (*name) {
 2627				if (*name == '.')
 2628					*name = '_';
 2629				name++;
 2630			}
 2631
 2632			vlen = btf_vlen(t);
 2633			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
 2634			for (j = 0; j < vlen; j++, v++, m++) {
 2635				/* order of field assignments is important */
 2636				m->offset = v->offset * 8;
 2637				m->type = v->type;
 2638				/* preserve variable name as member name */
 2639				vt = (void *)btf__type_by_id(btf, v->type);
 2640				m->name_off = vt->name_off;
 2641			}
 2642		} else if (!has_func && btf_is_func_proto(t)) {
 2643			/* replace FUNC_PROTO with ENUM */
 2644			vlen = btf_vlen(t);
 2645			t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
 2646			t->size = sizeof(__u32); /* kernel enforced */
 2647		} else if (!has_func && btf_is_func(t)) {
 2648			/* replace FUNC with TYPEDEF */
 2649			t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
 2650		} else if (!has_func_global && btf_is_func(t)) {
 2651			/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
 2652			t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
 2653		} else if (!has_float && btf_is_float(t)) {
 2654			/* replace FLOAT with an equally-sized empty STRUCT;
 2655			 * since C compilers do not accept e.g. "float" as a
 2656			 * valid struct name, make it anonymous
 2657			 */
 2658			t->name_off = 0;
 2659			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
 2660		}
 2661	}
 2662}
 2663
 2664static bool libbpf_needs_btf(const struct bpf_object *obj)
 2665{
 2666	return obj->efile.btf_maps_shndx >= 0 ||
 2667	       obj->efile.st_ops_shndx >= 0 ||
 2668	       obj->nr_extern > 0;
 2669}
 2670
 2671static bool kernel_needs_btf(const struct bpf_object *obj)
 2672{
 2673	return obj->efile.st_ops_shndx >= 0;
 2674}
 2675
 2676static int bpf_object__init_btf(struct bpf_object *obj,
 2677				Elf_Data *btf_data,
 2678				Elf_Data *btf_ext_data)
 2679{
 2680	int err = -ENOENT;
 2681
 2682	if (btf_data) {
 2683		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
 2684		err = libbpf_get_error(obj->btf);
 2685		if (err) {
 2686			obj->btf = NULL;
 2687			pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
 2688			goto out;
 2689		}
 2690		/* enforce 8-byte pointers for BPF-targeted BTFs */
 2691		btf__set_pointer_size(obj->btf, 8);
 2692	}
 2693	if (btf_ext_data) {
 2694		if (!obj->btf) {
 2695			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
 2696				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
 2697			goto out;
 2698		}
 2699		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
 2700		err = libbpf_get_error(obj->btf_ext);
 2701		if (err) {
 2702			pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
 2703				BTF_EXT_ELF_SEC, err);
 2704			obj->btf_ext = NULL;
 2705			goto out;
 2706		}
 2707	}
 2708out:
 2709	if (err && libbpf_needs_btf(obj)) {
 2710		pr_warn("BTF is required, but is missing or corrupted.\n");
 2711		return err;
 2712	}
 2713	return 0;
 2714}
 2715
 2716static int compare_vsi_off(const void *_a, const void *_b)
 2717{
 2718	const struct btf_var_secinfo *a = _a;
 2719	const struct btf_var_secinfo *b = _b;
 2720
 2721	return a->offset - b->offset;
 2722}
 2723
 2724static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 2725			     struct btf_type *t)
 2726{
 2727	__u32 size = 0, off = 0, i, vars = btf_vlen(t);
 2728	const char *name = btf__name_by_offset(btf, t->name_off);
 2729	const struct btf_type *t_var;
 2730	struct btf_var_secinfo *vsi;
 2731	const struct btf_var *var;
 2732	int ret;
 2733
 2734	if (!name) {
 2735		pr_debug("No name found in string section for DATASEC kind.\n");
 2736		return -ENOENT;
 2737	}
 2738
 2739	/* .extern datasec size and var offsets were set correctly during
 2740	 * extern collection step, so just skip straight to sorting variables
 2741	 */
 2742	if (t->size)
 2743		goto sort_vars;
 2744
 2745	ret = find_elf_sec_sz(obj, name, &size);
 2746	if (ret || !size || (t->size && t->size != size)) {
 2747		pr_debug("Invalid size for section %s: %u bytes\n", name, size);
 2748		return -ENOENT;
 2749	}
 2750
 2751	t->size = size;
 2752
 2753	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
 2754		t_var = btf__type_by_id(btf, vsi->type);
 2755		var = btf_var(t_var);
 2756
 2757		if (!btf_is_var(t_var)) {
 2758			pr_debug("Non-VAR type seen in section %s\n", name);
 2759			return -EINVAL;
 2760		}
 2761
 2762		if (var->linkage == BTF_VAR_STATIC)
 2763			continue;
 2764
 2765		name = btf__name_by_offset(btf, t_var->name_off);
 2766		if (!name) {
 2767			pr_debug("No name found in string section for VAR kind\n");
 2768			return -ENOENT;
 2769		}
 2770
 2771		ret = find_elf_var_offset(obj, name, &off);
 2772		if (ret) {
 2773			pr_debug("No offset found in symbol table for VAR %s\n",
 2774				 name);
 2775			return -ENOENT;
 2776		}
 2777
 2778		vsi->offset = off;
 2779	}
 2780
 2781sort_vars:
 2782	qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
 2783	return 0;
 2784}
 2785
 2786static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
 2787{
 2788	int err = 0;
 2789	__u32 i, n = btf__type_cnt(btf);
 2790
 2791	for (i = 1; i < n; i++) {
 2792		struct btf_type *t = btf_type_by_id(btf, i);
 2793
 2794		/* Loader needs to fix up some of the things compiler
 2795		 * couldn't get its hands on while emitting BTF. This
 2796		 * is section size and global variable offset. We use
 2797		 * the info from the ELF itself for this purpose.
 2798		 */
 2799		if (btf_is_datasec(t)) {
 2800			err = btf_fixup_datasec(obj, btf, t);
 2801			if (err)
 2802				break;
 2803		}
 2804	}
 2805
 2806	return libbpf_err(err);
 2807}
 2808
 2809int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
 2810{
 2811	return btf_finalize_data(obj, btf);
 2812}
 2813
 2814static int bpf_object__finalize_btf(struct bpf_object *obj)
 2815{
 2816	int err;
 2817
 2818	if (!obj->btf)
 2819		return 0;
 2820
 2821	err = btf_finalize_data(obj, obj->btf);
 2822	if (err) {
 2823		pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
 2824		return err;
 2825	}
 2826
 2827	return 0;
 2828}
 2829
 2830static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 2831{
 2832	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
 2833	    prog->type == BPF_PROG_TYPE_LSM)
 2834		return true;
 2835
 2836	/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
 2837	 * also need vmlinux BTF
 2838	 */
 2839	if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
 2840		return true;
 2841
 2842	return false;
 2843}
 2844
 2845static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 2846{
 2847	struct bpf_program *prog;
 2848	int i;
 2849
 2850	/* CO-RE relocations need kernel BTF, only when btf_custom_path
 2851	 * is not specified
 2852	 */
 2853	if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
 2854		return true;
 2855
 2856	/* Support for typed ksyms needs kernel BTF */
 2857	for (i = 0; i < obj->nr_extern; i++) {
 2858		const struct extern_desc *ext;
 2859
 2860		ext = &obj->externs[i];
 2861		if (ext->type == EXT_KSYM && ext->ksym.type_id)
 2862			return true;
 2863	}
 2864
 2865	bpf_object__for_each_program(prog, obj) {
 2866		if (!prog->load)
 2867			continue;
 2868		if (prog_needs_vmlinux_btf(prog))
 2869			return true;
 2870	}
 2871
 2872	return false;
 2873}
 2874
 2875static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
 2876{
 2877	int err;
 2878
 2879	/* btf_vmlinux could be loaded earlier */
 2880	if (obj->btf_vmlinux || obj->gen_loader)
 2881		return 0;
 2882
 2883	if (!force && !obj_needs_vmlinux_btf(obj))
 2884		return 0;
 2885
 2886	obj->btf_vmlinux = btf__load_vmlinux_btf();
 2887	err = libbpf_get_error(obj->btf_vmlinux);
 2888	if (err) {
 2889		pr_warn("Error loading vmlinux BTF: %d\n", err);
 2890		obj->btf_vmlinux = NULL;
 2891		return err;
 2892	}
 2893	return 0;
 2894}
 2895
 2896static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 2897{
 2898	struct btf *kern_btf = obj->btf;
 2899	bool btf_mandatory, sanitize;
 2900	int i, err = 0;
 2901
 2902	if (!obj->btf)
 2903		return 0;
 2904
 2905	if (!kernel_supports(obj, FEAT_BTF)) {
 2906		if (kernel_needs_btf(obj)) {
 2907			err = -EOPNOTSUPP;
 2908			goto report;
 2909		}
 2910		pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
 2911		return 0;
 2912	}
 2913
 2914	/* Even though some subprogs are global/weak, user might prefer more
 2915	 * permissive BPF verification process that BPF verifier performs for
 2916	 * static functions, taking into account more context from the caller
 2917	 * functions. In such case, they need to mark such subprogs with
 2918	 * __attribute__((visibility("hidden"))) and libbpf will adjust
 2919	 * corresponding FUNC BTF type to be marked as static and trigger more
 2920	 * involved BPF verification process.
 2921	 */
 2922	for (i = 0; i < obj->nr_programs; i++) {
 2923		struct bpf_program *prog = &obj->programs[i];
 2924		struct btf_type *t;
 2925		const char *name;
 2926		int j, n;
 2927
 2928		if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
 2929			continue;
 2930
 2931		n = btf__type_cnt(obj->btf);
 2932		for (j = 1; j < n; j++) {
 2933			t = btf_type_by_id(obj->btf, j);
 2934			if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
 2935				continue;
 2936
 2937			name = btf__str_by_offset(obj->btf, t->name_off);
 2938			if (strcmp(name, prog->name) != 0)
 2939				continue;
 2940
 2941			t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
 2942			break;
 2943		}
 2944	}
 2945
 2946	sanitize = btf_needs_sanitization(obj);
 2947	if (sanitize) {
 2948		const void *raw_data;
 2949		__u32 sz;
 2950
 2951		/* clone BTF to sanitize a copy and leave the original intact */
 2952		raw_data = btf__raw_data(obj->btf, &sz);
 2953		kern_btf = btf__new(raw_data, sz);
 2954		err = libbpf_get_error(kern_btf);
 2955		if (err)
 2956			return err;
 2957
 2958		/* enforce 8-byte pointers for BPF-targeted BTFs */
 2959		btf__set_pointer_size(obj->btf, 8);
 2960		bpf_object__sanitize_btf(obj, kern_btf);
 2961	}
 2962
 2963	if (obj->gen_loader) {
 2964		__u32 raw_size = 0;
 2965		const void *raw_data = btf__raw_data(kern_btf, &raw_size);
 2966
 2967		if (!raw_data)
 2968			return -ENOMEM;
 2969		bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
 2970		/* Pretend to have valid FD to pass various fd >= 0 checks.
 2971		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
 2972		 */
 2973		btf__set_fd(kern_btf, 0);
 2974	} else {
 2975		err = btf__load_into_kernel(kern_btf);
 2976	}
 2977	if (sanitize) {
 2978		if (!err) {
 2979			/* move fd to libbpf's BTF */
 2980			btf__set_fd(obj->btf, btf__fd(kern_btf));
 2981			btf__set_fd(kern_btf, -1);
 2982		}
 2983		btf__free(kern_btf);
 2984	}
 2985report:
 2986	if (err) {
 2987		btf_mandatory = kernel_needs_btf(obj);
 2988		pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
 2989			btf_mandatory ? "BTF is mandatory, can't proceed."
 2990				      : "BTF is optional, ignoring.");
 2991		if (!btf_mandatory)
 2992			err = 0;
 2993	}
 2994	return err;
 2995}
 2996
 2997static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
 2998{
 2999	const char *name;
 3000
 3001	name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
 3002	if (!name) {
 3003		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
 3004			off, obj->path, elf_errmsg(-1));
 3005		return NULL;
 3006	}
 3007
 3008	return name;
 3009}
 3010
 3011static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
 3012{
 3013	const char *name;
 3014
 3015	name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
 3016	if (!name) {
 3017		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
 3018			off, obj->path, elf_errmsg(-1));
 3019		return NULL;
 3020	}
 3021
 3022	return name;
 3023}
 3024
 3025static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
 3026{
 3027	Elf_Scn *scn;
 3028
 3029	scn = elf_getscn(obj->efile.elf, idx);
 3030	if (!scn) {
 3031		pr_warn("elf: failed to get section(%zu) from %s: %s\n",
 3032			idx, obj->path, elf_errmsg(-1));
 3033		return NULL;
 3034	}
 3035	return scn;
 3036}
 3037
 3038static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
 3039{
 3040	Elf_Scn *scn = NULL;
 3041	Elf *elf = obj->efile.elf;
 3042	const char *sec_name;
 3043
 3044	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3045		sec_name = elf_sec_name(obj, scn);
 3046		if (!sec_name)
 3047			return NULL;
 3048
 3049		if (strcmp(sec_name, name) != 0)
 3050			continue;
 3051
 3052		return scn;
 3053	}
 3054	return NULL;
 3055}
 3056
 3057static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
 3058{
 3059	Elf64_Shdr *shdr;
 3060
 3061	if (!scn)
 3062		return NULL;
 3063
 3064	shdr = elf64_getshdr(scn);
 3065	if (!shdr) {
 3066		pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
 3067			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
 3068		return NULL;
 3069	}
 3070
 3071	return shdr;
 3072}
 3073
 3074static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
 3075{
 3076	const char *name;
 3077	Elf64_Shdr *sh;
 3078
 3079	if (!scn)
 3080		return NULL;
 3081
 3082	sh = elf_sec_hdr(obj, scn);
 3083	if (!sh)
 3084		return NULL;
 3085
 3086	name = elf_sec_str(obj, sh->sh_name);
 3087	if (!name) {
 3088		pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
 3089			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
 3090		return NULL;
 3091	}
 3092
 3093	return name;
 3094}
 3095
 3096static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
 3097{
 3098	Elf_Data *data;
 3099
 3100	if (!scn)
 3101		return NULL;
 3102
 3103	data = elf_getdata(scn, 0);
 3104	if (!data) {
 3105		pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
 3106			elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
 3107			obj->path, elf_errmsg(-1));
 3108		return NULL;
 3109	}
 3110
 3111	return data;
 3112}
 3113
 3114static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
 3115{
 3116	if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
 3117		return NULL;
 3118
 3119	return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
 3120}
 3121
 3122static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
 3123{
 3124	if (idx >= data->d_size / sizeof(Elf64_Rel))
 3125		return NULL;
 3126
 3127	return (Elf64_Rel *)data->d_buf + idx;
 3128}
 3129
 3130static bool is_sec_name_dwarf(const char *name)
 3131{
 3132	/* approximation, but the actual list is too long */
 3133	return str_has_pfx(name, ".debug_");
 3134}
 3135
 3136static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
 3137{
 3138	/* no special handling of .strtab */
 3139	if (hdr->sh_type == SHT_STRTAB)
 3140		return true;
 3141
 3142	/* ignore .llvm_addrsig section as well */
 3143	if (hdr->sh_type == SHT_LLVM_ADDRSIG)
 3144		return true;
 3145
 3146	/* no subprograms will lead to an empty .text section, ignore it */
 3147	if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
 3148	    strcmp(name, ".text") == 0)
 3149		return true;
 3150
 3151	/* DWARF sections */
 3152	if (is_sec_name_dwarf(name))
 3153		return true;
 3154
 3155	if (str_has_pfx(name, ".rel")) {
 3156		name += sizeof(".rel") - 1;
 3157		/* DWARF section relocations */
 3158		if (is_sec_name_dwarf(name))
 3159			return true;
 3160
 3161		/* .BTF and .BTF.ext don't need relocations */
 3162		if (strcmp(name, BTF_ELF_SEC) == 0 ||
 3163		    strcmp(name, BTF_EXT_ELF_SEC) == 0)
 3164			return true;
 3165	}
 3166
 3167	return false;
 3168}
 3169
 3170static int cmp_progs(const void *_a, const void *_b)
 3171{
 3172	const struct bpf_program *a = _a;
 3173	const struct bpf_program *b = _b;
 3174
 3175	if (a->sec_idx != b->sec_idx)
 3176		return a->sec_idx < b->sec_idx ? -1 : 1;
 3177
 3178	/* sec_insn_off can't be the same within the section */
 3179	return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
 3180}
 3181
 3182static int bpf_object__elf_collect(struct bpf_object *obj)
 3183{
 3184	struct elf_sec_desc *sec_desc;
 3185	Elf *elf = obj->efile.elf;
 3186	Elf_Data *btf_ext_data = NULL;
 3187	Elf_Data *btf_data = NULL;
 3188	int idx = 0, err = 0;
 3189	const char *name;
 3190	Elf_Data *data;
 3191	Elf_Scn *scn;
 3192	Elf64_Shdr *sh;
 3193
 3194	/* ELF section indices are 1-based, so allocate +1 element to keep
 3195	 * indexing simple. Also include 0th invalid section into sec_cnt for
 3196	 * simpler and more traditional iteration logic.
 3197	 */
 3198	obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum;
 3199	obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
 3200	if (!obj->efile.secs)
 3201		return -ENOMEM;
 3202
 3203	/* a bunch of ELF parsing functionality depends on processing symbols,
 3204	 * so do the first pass and find the symbol table
 3205	 */
 3206	scn = NULL;
 3207	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3208		sh = elf_sec_hdr(obj, scn);
 3209		if (!sh)
 3210			return -LIBBPF_ERRNO__FORMAT;
 3211
 3212		if (sh->sh_type == SHT_SYMTAB) {
 3213			if (obj->efile.symbols) {
 3214				pr_warn("elf: multiple symbol tables in %s\n", obj->path);
 3215				return -LIBBPF_ERRNO__FORMAT;
 3216			}
 3217
 3218			data = elf_sec_data(obj, scn);
 3219			if (!data)
 3220				return -LIBBPF_ERRNO__FORMAT;
 3221
 3222			idx = elf_ndxscn(scn);
 3223
 3224			obj->efile.symbols = data;
 3225			obj->efile.symbols_shndx = idx;
 3226			obj->efile.strtabidx = sh->sh_link;
 3227		}
 3228	}
 3229
 3230	if (!obj->efile.symbols) {
 3231		pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
 3232			obj->path);
 3233		return -ENOENT;
 3234	}
 3235
 3236	scn = NULL;
 3237	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3238		idx = elf_ndxscn(scn);
 3239		sec_desc = &obj->efile.secs[idx];
 3240
 3241		sh = elf_sec_hdr(obj, scn);
 3242		if (!sh)
 3243			return -LIBBPF_ERRNO__FORMAT;
 3244
 3245		name = elf_sec_str(obj, sh->sh_name);
 3246		if (!name)
 3247			return -LIBBPF_ERRNO__FORMAT;
 3248
 3249		if (ignore_elf_section(sh, name))
 3250			continue;
 3251
 3252		data = elf_sec_data(obj, scn);
 3253		if (!data)
 3254			return -LIBBPF_ERRNO__FORMAT;
 3255
 3256		pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
 3257			 idx, name, (unsigned long)data->d_size,
 3258			 (int)sh->sh_link, (unsigned long)sh->sh_flags,
 3259			 (int)sh->sh_type);
 3260
 3261		if (strcmp(name, "license") == 0) {
 3262			err = bpf_object__init_license(obj, data->d_buf, data->d_size);
 3263			if (err)
 3264				return err;
 3265		} else if (strcmp(name, "version") == 0) {
 3266			err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
 3267			if (err)
 3268				return err;
 3269		} else if (strcmp(name, "maps") == 0) {
 3270			obj->efile.maps_shndx = idx;
 3271		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
 3272			obj->efile.btf_maps_shndx = idx;
 3273		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
 3274			btf_data = data;
 3275		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
 3276			btf_ext_data = data;
 3277		} else if (sh->sh_type == SHT_SYMTAB) {
 3278			/* already processed during the first pass above */
 3279		} else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
 3280			if (sh->sh_flags & SHF_EXECINSTR) {
 3281				if (strcmp(name, ".text") == 0)
 3282					obj->efile.text_shndx = idx;
 3283				err = bpf_object__add_programs(obj, data, name, idx);
 3284				if (err)
 3285					return err;
 3286			} else if (strcmp(name, DATA_SEC) == 0 ||
 3287				   str_has_pfx(name, DATA_SEC ".")) {
 3288				sec_desc->sec_type = SEC_DATA;
 3289				sec_desc->shdr = sh;
 3290				sec_desc->data = data;
 3291			} else if (strcmp(name, RODATA_SEC) == 0 ||
 3292				   str_has_pfx(name, RODATA_SEC ".")) {
 3293				sec_desc->sec_type = SEC_RODATA;
 3294				sec_desc->shdr = sh;
 3295				sec_desc->data = data;
 3296			} else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
 3297				obj->efile.st_ops_data = data;
 3298				obj->efile.st_ops_shndx = idx;
 3299			} else {
 3300				pr_info("elf: skipping unrecognized data section(%d) %s\n",
 3301					idx, name);
 3302			}
 3303		} else if (sh->sh_type == SHT_REL) {
 3304			int targ_sec_idx = sh->sh_info; /* points to other section */
 3305
 3306			/* Only do relo for section with exec instructions */
 3307			if (!section_have_execinstr(obj, targ_sec_idx) &&
 3308			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
 3309			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
 3310				pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
 3311					idx, name, targ_sec_idx,
 3312					elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
 3313				continue;
 3314			}
 3315
 3316			sec_desc->sec_type = SEC_RELO;
 3317			sec_desc->shdr = sh;
 3318			sec_desc->data = data;
 3319		} else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
 3320			sec_desc->sec_type = SEC_BSS;
 3321			sec_desc->shdr = sh;
 3322			sec_desc->data = data;
 3323		} else {
 3324			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
 3325				(size_t)sh->sh_size);
 3326		}
 3327	}
 3328
 3329	if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
 3330		pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
 3331		return -LIBBPF_ERRNO__FORMAT;
 3332	}
 3333
 3334	/* sort BPF programs by section name and in-section instruction offset
 3335	 * for faster search */
 3336	qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
 3337
 3338	return bpf_object__init_btf(obj, btf_data, btf_ext_data);
 3339}
 3340
 3341static bool sym_is_extern(const Elf64_Sym *sym)
 3342{
 3343	int bind = ELF64_ST_BIND(sym->st_info);
 3344	/* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
 3345	return sym->st_shndx == SHN_UNDEF &&
 3346	       (bind == STB_GLOBAL || bind == STB_WEAK) &&
 3347	       ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
 3348}
 3349
 3350static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
 3351{
 3352	int bind = ELF64_ST_BIND(sym->st_info);
 3353	int type = ELF64_ST_TYPE(sym->st_info);
 3354
 3355	/* in .text section */
 3356	if (sym->st_shndx != text_shndx)
 3357		return false;
 3358
 3359	/* local function */
 3360	if (bind == STB_LOCAL && type == STT_SECTION)
 3361		return true;
 3362
 3363	/* global function */
 3364	return bind == STB_GLOBAL && type == STT_FUNC;
 3365}
 3366
 3367static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 3368{
 3369	const struct btf_type *t;
 3370	const char *tname;
 3371	int i, n;
 3372
 3373	if (!btf)
 3374		return -ESRCH;
 3375
 3376	n = btf__type_cnt(btf);
 3377	for (i = 1; i < n; i++) {
 3378		t = btf__type_by_id(btf, i);
 3379
 3380		if (!btf_is_var(t) && !btf_is_func(t))
 3381			continue;
 3382
 3383		tname = btf__name_by_offset(btf, t->name_off);
 3384		if (strcmp(tname, ext_name))
 3385			continue;
 3386
 3387		if (btf_is_var(t) &&
 3388		    btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
 3389			return -EINVAL;
 3390
 3391		if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
 3392			return -EINVAL;
 3393
 3394		return i;
 3395	}
 3396
 3397	return -ENOENT;
 3398}
 3399
 3400static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
 3401	const struct btf_var_secinfo *vs;
 3402	const struct btf_type *t;
 3403	int i, j, n;
 3404
 3405	if (!btf)
 3406		return -ESRCH;
 3407
 3408	n = btf__type_cnt(btf);
 3409	for (i = 1; i < n; i++) {
 3410		t = btf__type_by_id(btf, i);
 3411
 3412		if (!btf_is_datasec(t))
 3413			continue;
 3414
 3415		vs = btf_var_secinfos(t);
 3416		for (j = 0; j < btf_vlen(t); j++, vs++) {
 3417			if (vs->type == ext_btf_id)
 3418				return i;
 3419		}
 3420	}
 3421
 3422	return -ENOENT;
 3423}
 3424
 3425static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
 3426				     bool *is_signed)
 3427{
 3428	const struct btf_type *t;
 3429	const char *name;
 3430
 3431	t = skip_mods_and_typedefs(btf, id, NULL);
 3432	name = btf__name_by_offset(btf, t->name_off);
 3433
 3434	if (is_signed)
 3435		*is_signed = false;
 3436	switch (btf_kind(t)) {
 3437	case BTF_KIND_INT: {
 3438		int enc = btf_int_encoding(t);
 3439
 3440		if (enc & BTF_INT_BOOL)
 3441			return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
 3442		if (is_signed)
 3443			*is_signed = enc & BTF_INT_SIGNED;
 3444		if (t->size == 1)
 3445			return KCFG_CHAR;
 3446		if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
 3447			return KCFG_UNKNOWN;
 3448		return KCFG_INT;
 3449	}
 3450	case BTF_KIND_ENUM:
 3451		if (t->size != 4)
 3452			return KCFG_UNKNOWN;
 3453		if (strcmp(name, "libbpf_tristate"))
 3454			return KCFG_UNKNOWN;
 3455		return KCFG_TRISTATE;
 3456	case BTF_KIND_ARRAY:
 3457		if (btf_array(t)->nelems == 0)
 3458			return KCFG_UNKNOWN;
 3459		if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
 3460			return KCFG_UNKNOWN;
 3461		return KCFG_CHAR_ARR;
 3462	default:
 3463		return KCFG_UNKNOWN;
 3464	}
 3465}
 3466
 3467static int cmp_externs(const void *_a, const void *_b)
 3468{
 3469	const struct extern_desc *a = _a;
 3470	const struct extern_desc *b = _b;
 3471
 3472	if (a->type != b->type)
 3473		return a->type < b->type ? -1 : 1;
 3474
 3475	if (a->type == EXT_KCFG) {
 3476		/* descending order by alignment requirements */
 3477		if (a->kcfg.align != b->kcfg.align)
 3478			return a->kcfg.align > b->kcfg.align ? -1 : 1;
 3479		/* ascending order by size, within same alignment class */
 3480		if (a->kcfg.sz != b->kcfg.sz)
 3481			return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
 3482	}
 3483
 3484	/* resolve ties by name */
 3485	return strcmp(a->name, b->name);
 3486}
 3487
 3488static int find_int_btf_id(const struct btf *btf)
 3489{
 3490	const struct btf_type *t;
 3491	int i, n;
 3492
 3493	n = btf__type_cnt(btf);
 3494	for (i = 1; i < n; i++) {
 3495		t = btf__type_by_id(btf, i);
 3496
 3497		if (btf_is_int(t) && btf_int_bits(t) == 32)
 3498			return i;
 3499	}
 3500
 3501	return 0;
 3502}
 3503
 3504static int add_dummy_ksym_var(struct btf *btf)
 3505{
 3506	int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
 3507	const struct btf_var_secinfo *vs;
 3508	const struct btf_type *sec;
 3509
 3510	if (!btf)
 3511		return 0;
 3512
 3513	sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
 3514					    BTF_KIND_DATASEC);
 3515	if (sec_btf_id < 0)
 3516		return 0;
 3517
 3518	sec = btf__type_by_id(btf, sec_btf_id);
 3519	vs = btf_var_secinfos(sec);
 3520	for (i = 0; i < btf_vlen(sec); i++, vs++) {
 3521		const struct btf_type *vt;
 3522
 3523		vt = btf__type_by_id(btf, vs->type);
 3524		if (btf_is_func(vt))
 3525			break;
 3526	}
 3527
 3528	/* No func in ksyms sec.  No need to add dummy var. */
 3529	if (i == btf_vlen(sec))
 3530		return 0;
 3531
 3532	int_btf_id = find_int_btf_id(btf);
 3533	dummy_var_btf_id = btf__add_var(btf,
 3534					"dummy_ksym",
 3535					BTF_VAR_GLOBAL_ALLOCATED,
 3536					int_btf_id);
 3537	if (dummy_var_btf_id < 0)
 3538		pr_warn("cannot create a dummy_ksym var\n");
 3539
 3540	return dummy_var_btf_id;
 3541}
 3542
 3543static int bpf_object__collect_externs(struct bpf_object *obj)
 3544{
 3545	struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
 3546	const struct btf_type *t;
 3547	struct extern_desc *ext;
 3548	int i, n, off, dummy_var_btf_id;
 3549	const char *ext_name, *sec_name;
 3550	Elf_Scn *scn;
 3551	Elf64_Shdr *sh;
 3552
 3553	if (!obj->efile.symbols)
 3554		return 0;
 3555
 3556	scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
 3557	sh = elf_sec_hdr(obj, scn);
 3558	if (!sh)
 3559		return -LIBBPF_ERRNO__FORMAT;
 3560
 3561	dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
 3562	if (dummy_var_btf_id < 0)
 3563		return dummy_var_btf_id;
 3564
 3565	n = sh->sh_size / sh->sh_entsize;
 3566	pr_debug("looking for externs among %d symbols...\n", n);
 3567
 3568	for (i = 0; i < n; i++) {
 3569		Elf64_Sym *sym = elf_sym_by_idx(obj, i);
 3570
 3571		if (!sym)
 3572			return -LIBBPF_ERRNO__FORMAT;
 3573		if (!sym_is_extern(sym))
 3574			continue;
 3575		ext_name = elf_sym_str(obj, sym->st_name);
 3576		if (!ext_name || !ext_name[0])
 3577			continue;
 3578
 3579		ext = obj->externs;
 3580		ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
 3581		if (!ext)
 3582			return -ENOMEM;
 3583		obj->externs = ext;
 3584		ext = &ext[obj->nr_extern];
 3585		memset(ext, 0, sizeof(*ext));
 3586		obj->nr_extern++;
 3587
 3588		ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
 3589		if (ext->btf_id <= 0) {
 3590			pr_warn("failed to find BTF for extern '%s': %d\n",
 3591				ext_name, ext->btf_id);
 3592			return ext->btf_id;
 3593		}
 3594		t = btf__type_by_id(obj->btf, ext->btf_id);
 3595		ext->name = btf__name_by_offset(obj->btf, t->name_off);
 3596		ext->sym_idx = i;
 3597		ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
 3598
 3599		ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
 3600		if (ext->sec_btf_id <= 0) {
 3601			pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
 3602				ext_name, ext->btf_id, ext->sec_btf_id);
 3603			return ext->sec_btf_id;
 3604		}
 3605		sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
 3606		sec_name = btf__name_by_offset(obj->btf, sec->name_off);
 3607
 3608		if (strcmp(sec_name, KCONFIG_SEC) == 0) {
 3609			if (btf_is_func(t)) {
 3610				pr_warn("extern function %s is unsupported under %s section\n",
 3611					ext->name, KCONFIG_SEC);
 3612				return -ENOTSUP;
 3613			}
 3614			kcfg_sec = sec;
 3615			ext->type = EXT_KCFG;
 3616			ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
 3617			if (ext->kcfg.sz <= 0) {
 3618				pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
 3619					ext_name, ext->kcfg.sz);
 3620				return ext->kcfg.sz;
 3621			}
 3622			ext->kcfg.align = btf__align_of(obj->btf, t->type);
 3623			if (ext->kcfg.align <= 0) {
 3624				pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
 3625					ext_name, ext->kcfg.align);
 3626				return -EINVAL;
 3627			}
 3628			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
 3629						        &ext->kcfg.is_signed);
 3630			if (ext->kcfg.type == KCFG_UNKNOWN) {
 3631				pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
 3632				return -ENOTSUP;
 3633			}
 3634		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
 3635			ksym_sec = sec;
 3636			ext->type = EXT_KSYM;
 3637			skip_mods_and_typedefs(obj->btf, t->type,
 3638					       &ext->ksym.type_id);
 3639		} else {
 3640			pr_warn("unrecognized extern section '%s'\n", sec_name);
 3641			return -ENOTSUP;
 3642		}
 3643	}
 3644	pr_debug("collected %d externs total\n", obj->nr_extern);
 3645
 3646	if (!obj->nr_extern)
 3647		return 0;
 3648
 3649	/* sort externs by type, for kcfg ones also by (align, size, name) */
 3650	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
 3651
 3652	/* for .ksyms section, we need to turn all externs into allocated
 3653	 * variables in BTF to pass kernel verification; we do this by
 3654	 * pretending that each extern is a 8-byte variable
 3655	 */
 3656	if (ksym_sec) {
 3657		/* find existing 4-byte integer type in BTF to use for fake
 3658		 * extern variables in DATASEC
 3659		 */
 3660		int int_btf_id = find_int_btf_id(obj->btf);
 3661		/* For extern function, a dummy_var added earlier
 3662		 * will be used to replace the vs->type and
 3663		 * its name string will be used to refill
 3664		 * the missing param's name.
 3665		 */
 3666		const struct btf_type *dummy_var;
 3667
 3668		dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
 3669		for (i = 0; i < obj->nr_extern; i++) {
 3670			ext = &obj->externs[i];
 3671			if (ext->type != EXT_KSYM)
 3672				continue;
 3673			pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
 3674				 i, ext->sym_idx, ext->name);
 3675		}
 3676
 3677		sec = ksym_sec;
 3678		n = btf_vlen(sec);
 3679		for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
 3680			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
 3681			struct btf_type *vt;
 3682
 3683			vt = (void *)btf__type_by_id(obj->btf, vs->type);
 3684			ext_name = btf__name_by_offset(obj->btf, vt->name_off);
 3685			ext = find_extern_by_name(obj, ext_name);
 3686			if (!ext) {
 3687				pr_warn("failed to find extern definition for BTF %s '%s'\n",
 3688					btf_kind_str(vt), ext_name);
 3689				return -ESRCH;
 3690			}
 3691			if (btf_is_func(vt)) {
 3692				const struct btf_type *func_proto;
 3693				struct btf_param *param;
 3694				int j;
 3695
 3696				func_proto = btf__type_by_id(obj->btf,
 3697							     vt->type);
 3698				param = btf_params(func_proto);
 3699				/* Reuse the dummy_var string if the
 3700				 * func proto does not have param name.
 3701				 */
 3702				for (j = 0; j < btf_vlen(func_proto); j++)
 3703					if (param[j].type && !param[j].name_off)
 3704						param[j].name_off =
 3705							dummy_var->name_off;
 3706				vs->type = dummy_var_btf_id;
 3707				vt->info &= ~0xffff;
 3708				vt->info |= BTF_FUNC_GLOBAL;
 3709			} else {
 3710				btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 3711				vt->type = int_btf_id;
 3712			}
 3713			vs->offset = off;
 3714			vs->size = sizeof(int);
 3715		}
 3716		sec->size = off;
 3717	}
 3718
 3719	if (kcfg_sec) {
 3720		sec = kcfg_sec;
 3721		/* for kcfg externs calculate their offsets within a .kconfig map */
 3722		off = 0;
 3723		for (i = 0; i < obj->nr_extern; i++) {
 3724			ext = &obj->externs[i];
 3725			if (ext->type != EXT_KCFG)
 3726				continue;
 3727
 3728			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
 3729			off = ext->kcfg.data_off + ext->kcfg.sz;
 3730			pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
 3731				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
 3732		}
 3733		sec->size = off;
 3734		n = btf_vlen(sec);
 3735		for (i = 0; i < n; i++) {
 3736			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
 3737
 3738			t = btf__type_by_id(obj->btf, vs->type);
 3739			ext_name = btf__name_by_offset(obj->btf, t->name_off);
 3740			ext = find_extern_by_name(obj, ext_name);
 3741			if (!ext) {
 3742				pr_warn("failed to find extern definition for BTF var '%s'\n",
 3743					ext_name);
 3744				return -ESRCH;
 3745			}
 3746			btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 3747			vs->offset = ext->kcfg.data_off;
 3748		}
 3749	}
 3750	return 0;
 3751}
 3752
 3753struct bpf_program *
 3754bpf_object__find_program_by_title(const struct bpf_object *obj,
 3755				  const char *title)
 3756{
 3757	struct bpf_program *pos;
 3758
 3759	bpf_object__for_each_program(pos, obj) {
 3760		if (pos->sec_name && !strcmp(pos->sec_name, title))
 3761			return pos;
 3762	}
 3763	return errno = ENOENT, NULL;
 3764}
 3765
 3766static bool prog_is_subprog(const struct bpf_object *obj,
 3767			    const struct bpf_program *prog)
 3768{
 3769	/* For legacy reasons, libbpf supports an entry-point BPF programs
 3770	 * without SEC() attribute, i.e., those in the .text section. But if
 3771	 * there are 2 or more such programs in the .text section, they all
 3772	 * must be subprograms called from entry-point BPF programs in
 3773	 * designated SEC()'tions, otherwise there is no way to distinguish
 3774	 * which of those programs should be loaded vs which are a subprogram.
 3775	 * Similarly, if there is a function/program in .text and at least one
 3776	 * other BPF program with custom SEC() attribute, then we just assume
 3777	 * .text programs are subprograms (even if they are not called from
 3778	 * other programs), because libbpf never explicitly supported mixing
 3779	 * SEC()-designated BPF programs and .text entry-point BPF programs.
 3780	 */
 3781	return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 3782}
 3783
 3784struct bpf_program *
 3785bpf_object__find_program_by_name(const struct bpf_object *obj,
 3786				 const char *name)
 3787{
 3788	struct bpf_program *prog;
 3789
 3790	bpf_object__for_each_program(prog, obj) {
 3791		if (prog_is_subprog(obj, prog))
 3792			continue;
 3793		if (!strcmp(prog->name, name))
 3794			return prog;
 3795	}
 3796	return errno = ENOENT, NULL;
 3797}
 3798
 3799static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 3800				      int shndx)
 3801{
 3802	switch (obj->efile.secs[shndx].sec_type) {
 3803	case SEC_BSS:
 3804	case SEC_DATA:
 3805	case SEC_RODATA:
 3806		return true;
 3807	default:
 3808		return false;
 3809	}
 3810}
 3811
 3812static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
 3813				      int shndx)
 3814{
 3815	return shndx == obj->efile.maps_shndx ||
 3816	       shndx == obj->efile.btf_maps_shndx;
 3817}
 3818
 3819static enum libbpf_map_type
 3820bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
 3821{
 3822	if (shndx == obj->efile.symbols_shndx)
 3823		return LIBBPF_MAP_KCONFIG;
 3824
 3825	switch (obj->efile.secs[shndx].sec_type) {
 3826	case SEC_BSS:
 3827		return LIBBPF_MAP_BSS;
 3828	case SEC_DATA:
 3829		return LIBBPF_MAP_DATA;
 3830	case SEC_RODATA:
 3831		return LIBBPF_MAP_RODATA;
 3832	default:
 3833		return LIBBPF_MAP_UNSPEC;
 3834	}
 3835}
 3836
 3837static int bpf_program__record_reloc(struct bpf_program *prog,
 3838				     struct reloc_desc *reloc_desc,
 3839				     __u32 insn_idx, const char *sym_name,
 3840				     const Elf64_Sym *sym, const Elf64_Rel *rel)
 3841{
 3842	struct bpf_insn *insn = &prog->insns[insn_idx];
 3843	size_t map_idx, nr_maps = prog->obj->nr_maps;
 3844	struct bpf_object *obj = prog->obj;
 3845	__u32 shdr_idx = sym->st_shndx;
 3846	enum libbpf_map_type type;
 3847	const char *sym_sec_name;
 3848	struct bpf_map *map;
 3849
 3850	if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
 3851		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
 3852			prog->name, sym_name, insn_idx, insn->code);
 3853		return -LIBBPF_ERRNO__RELOC;
 3854	}
 3855
 3856	if (sym_is_extern(sym)) {
 3857		int sym_idx = ELF64_R_SYM(rel->r_info);
 3858		int i, n = obj->nr_extern;
 3859		struct extern_desc *ext;
 3860
 3861		for (i = 0; i < n; i++) {
 3862			ext = &obj->externs[i];
 3863			if (ext->sym_idx == sym_idx)
 3864				break;
 3865		}
 3866		if (i >= n) {
 3867			pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
 3868				prog->name, sym_name, sym_idx);
 3869			return -LIBBPF_ERRNO__RELOC;
 3870		}
 3871		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
 3872			 prog->name, i, ext->name, ext->sym_idx, insn_idx);
 3873		if (insn->code == (BPF_JMP | BPF_CALL))
 3874			reloc_desc->type = RELO_EXTERN_FUNC;
 3875		else
 3876			reloc_desc->type = RELO_EXTERN_VAR;
 3877		reloc_desc->insn_idx = insn_idx;
 3878		reloc_desc->sym_off = i; /* sym_off stores extern index */
 3879		return 0;
 3880	}
 3881
 3882	/* sub-program call relocation */
 3883	if (is_call_insn(insn)) {
 3884		if (insn->src_reg != BPF_PSEUDO_CALL) {
 3885			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
 3886			return -LIBBPF_ERRNO__RELOC;
 3887		}
 3888		/* text_shndx can be 0, if no default "main" program exists */
 3889		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
 3890			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 3891			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
 3892				prog->name, sym_name, sym_sec_name);
 3893			return -LIBBPF_ERRNO__RELOC;
 3894		}
 3895		if (sym->st_value % BPF_INSN_SZ) {
 3896			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
 3897				prog->name, sym_name, (size_t)sym->st_value);
 3898			return -LIBBPF_ERRNO__RELOC;
 3899		}
 3900		reloc_desc->type = RELO_CALL;
 3901		reloc_desc->insn_idx = insn_idx;
 3902		reloc_desc->sym_off = sym->st_value;
 3903		return 0;
 3904	}
 3905
 3906	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
 3907		pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
 3908			prog->name, sym_name, shdr_idx);
 3909		return -LIBBPF_ERRNO__RELOC;
 3910	}
 3911
 3912	/* loading subprog addresses */
 3913	if (sym_is_subprog(sym, obj->efile.text_shndx)) {
 3914		/* global_func: sym->st_value = offset in the section, insn->imm = 0.
 3915		 * local_func: sym->st_value = 0, insn->imm = offset in the section.
 3916		 */
 3917		if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
 3918			pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
 3919				prog->name, sym_name, (size_t)sym->st_value, insn->imm);
 3920			return -LIBBPF_ERRNO__RELOC;
 3921		}
 3922
 3923		reloc_desc->type = RELO_SUBPROG_ADDR;
 3924		reloc_desc->insn_idx = insn_idx;
 3925		reloc_desc->sym_off = sym->st_value;
 3926		return 0;
 3927	}
 3928
 3929	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
 3930	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 3931
 3932	/* generic map reference relocation */
 3933	if (type == LIBBPF_MAP_UNSPEC) {
 3934		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
 3935			pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
 3936				prog->name, sym_name, sym_sec_name);
 3937			return -LIBBPF_ERRNO__RELOC;
 3938		}
 3939		for (map_idx = 0; map_idx < nr_maps; map_idx++) {
 3940			map = &obj->maps[map_idx];
 3941			if (map->libbpf_type != type ||
 3942			    map->sec_idx != sym->st_shndx ||
 3943			    map->sec_offset != sym->st_value)
 3944				continue;
 3945			pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
 3946				 prog->name, map_idx, map->name, map->sec_idx,
 3947				 map->sec_offset, insn_idx);
 3948			break;
 3949		}
 3950		if (map_idx >= nr_maps) {
 3951			pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
 3952				prog->name, sym_sec_name, (size_t)sym->st_value);
 3953			return -LIBBPF_ERRNO__RELOC;
 3954		}
 3955		reloc_desc->type = RELO_LD64;
 3956		reloc_desc->insn_idx = insn_idx;
 3957		reloc_desc->map_idx = map_idx;
 3958		reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
 3959		return 0;
 3960	}
 3961
 3962	/* global data map relocation */
 3963	if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
 3964		pr_warn("prog '%s': bad data relo against section '%s'\n",
 3965			prog->name, sym_sec_name);
 3966		return -LIBBPF_ERRNO__RELOC;
 3967	}
 3968	for (map_idx = 0; map_idx < nr_maps; map_idx++) {
 3969		map = &obj->maps[map_idx];
 3970		if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
 3971			continue;
 3972		pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
 3973			 prog->name, map_idx, map->name, map->sec_idx,
 3974			 map->sec_offset, insn_idx);
 3975		break;
 3976	}
 3977	if (map_idx >= nr_maps) {
 3978		pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
 3979			prog->name, sym_sec_name);
 3980		return -LIBBPF_ERRNO__RELOC;
 3981	}
 3982
 3983	reloc_desc->type = RELO_DATA;
 3984	reloc_desc->insn_idx = insn_idx;
 3985	reloc_desc->map_idx = map_idx;
 3986	reloc_desc->sym_off = sym->st_value;
 3987	return 0;
 3988}
 3989
 3990static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
 3991{
 3992	return insn_idx >= prog->sec_insn_off &&
 3993	       insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
 3994}
 3995
 3996static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
 3997						 size_t sec_idx, size_t insn_idx)
 3998{
 3999	int l = 0, r = obj->nr_programs - 1, m;
 4000	struct bpf_program *prog;
 4001
 4002	while (l < r) {
 4003		m = l + (r - l + 1) / 2;
 4004		prog = &obj->programs[m];
 4005
 4006		if (prog->sec_idx < sec_idx ||
 4007		    (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
 4008			l = m;
 4009		else
 4010			r = m - 1;
 4011	}
 4012	/* matching program could be at index l, but it still might be the
 4013	 * wrong one, so we need to double check conditions for the last time
 4014	 */
 4015	prog = &obj->programs[l];
 4016	if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
 4017		return prog;
 4018	return NULL;
 4019}
 4020
 4021static int
 4022bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
 4023{
 4024	const char *relo_sec_name, *sec_name;
 4025	size_t sec_idx = shdr->sh_info;
 4026	struct bpf_program *prog;
 4027	struct reloc_desc *relos;
 4028	int err, i, nrels;
 4029	const char *sym_name;
 4030	__u32 insn_idx;
 4031	Elf_Scn *scn;
 4032	Elf_Data *scn_data;
 4033	Elf64_Sym *sym;
 4034	Elf64_Rel *rel;
 4035
 4036	scn = elf_sec_by_idx(obj, sec_idx);
 4037	scn_data = elf_sec_data(obj, scn);
 4038
 4039	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
 4040	sec_name = elf_sec_name(obj, scn);
 4041	if (!relo_sec_name || !sec_name)
 4042		return -EINVAL;
 4043
 4044	pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
 4045		 relo_sec_name, sec_idx, sec_name);
 4046	nrels = shdr->sh_size / shdr->sh_entsize;
 4047
 4048	for (i = 0; i < nrels; i++) {
 4049		rel = elf_rel_by_idx(data, i);
 4050		if (!rel) {
 4051			pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
 4052			return -LIBBPF_ERRNO__FORMAT;
 4053		}
 4054
 4055		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
 4056		if (!sym) {
 4057			pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n",
 4058				relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i);
 4059			return -LIBBPF_ERRNO__FORMAT;
 4060		}
 4061
 4062		if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
 4063			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
 4064				relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i);
 4065			return -LIBBPF_ERRNO__FORMAT;
 4066		}
 4067
 4068		insn_idx = rel->r_offset / BPF_INSN_SZ;
 4069		/* relocations against static functions are recorded as
 4070		 * relocations against the section that contains a function;
 4071		 * in such case, symbol will be STT_SECTION and sym.st_name
 4072		 * will point to empty string (0), so fetch section name
 4073		 * instead
 4074		 */
 4075		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
 4076			sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
 4077		else
 4078			sym_name = elf_sym_str(obj, sym->st_name);
 4079		sym_name = sym_name ?: "<?";
 4080
 4081		pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
 4082			 relo_sec_name, i, insn_idx, sym_name);
 4083
 4084		prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
 4085		if (!prog) {
 4086			pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
 4087				relo_sec_name, i, sec_name, insn_idx);
 4088			continue;
 4089		}
 4090
 4091		relos = libbpf_reallocarray(prog->reloc_desc,
 4092					    prog->nr_reloc + 1, sizeof(*relos));
 4093		if (!relos)
 4094			return -ENOMEM;
 4095		prog->reloc_desc = relos;
 4096
 4097		/* adjust insn_idx to local BPF program frame of reference */
 4098		insn_idx -= prog->sec_insn_off;
 4099		err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
 4100						insn_idx, sym_name, sym, rel);
 4101		if (err)
 4102			return err;
 4103
 4104		prog->nr_reloc++;
 4105	}
 4106	return 0;
 4107}
 4108
 4109static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 4110{
 4111	struct bpf_map_def *def = &map->def;
 4112	__u32 key_type_id = 0, value_type_id = 0;
 4113	int ret;
 4114
 4115	/* if it's BTF-defined map, we don't need to search for type IDs.
 4116	 * For struct_ops map, it does not need btf_key_type_id and
 4117	 * btf_value_type_id.
 4118	 */
 4119	if (map->sec_idx == obj->efile.btf_maps_shndx ||
 4120	    bpf_map__is_struct_ops(map))
 4121		return 0;
 4122
 4123	if (!bpf_map__is_internal(map)) {
 4124		ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
 4125					   def->value_size, &key_type_id,
 4126					   &value_type_id);
 4127	} else {
 4128		/*
 4129		 * LLVM annotates global data differently in BTF, that is,
 4130		 * only as '.data', '.bss' or '.rodata'.
 4131		 */
 4132		ret = btf__find_by_name(obj->btf, map->real_name);
 4133	}
 4134	if (ret < 0)
 4135		return ret;
 4136
 4137	map->btf_key_type_id = key_type_id;
 4138	map->btf_value_type_id = bpf_map__is_internal(map) ?
 4139				 ret : value_type_id;
 4140	return 0;
 4141}
 4142
 4143static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
 4144{
 4145	char file[PATH_MAX], buff[4096];
 4146	FILE *fp;
 4147	__u32 val;
 4148	int err;
 4149
 4150	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
 4151	memset(info, 0, sizeof(*info));
 4152
 4153	fp = fopen(file, "r");
 4154	if (!fp) {
 4155		err = -errno;
 4156		pr_warn("failed to open %s: %d. No procfs support?\n", file,
 4157			err);
 4158		return err;
 4159	}
 4160
 4161	while (fgets(buff, sizeof(buff), fp)) {
 4162		if (sscanf(buff, "map_type:\t%u", &val) == 1)
 4163			info->type = val;
 4164		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
 4165			info->key_size = val;
 4166		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
 4167			info->value_size = val;
 4168		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
 4169			info->max_entries = val;
 4170		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
 4171			info->map_flags = val;
 4172	}
 4173
 4174	fclose(fp);
 4175
 4176	return 0;
 4177}
 4178
 4179int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 4180{
 4181	struct bpf_map_info info = {};
 4182	__u32 len = sizeof(info);
 4183	int new_fd, err;
 4184	char *new_name;
 4185
 4186	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 4187	if (err && errno == EINVAL)
 4188		err = bpf_get_map_info_from_fdinfo(fd, &info);
 4189	if (err)
 4190		return libbpf_err(err);
 4191
 4192	new_name = strdup(info.name);
 4193	if (!new_name)
 4194		return libbpf_err(-errno);
 4195
 4196	new_fd = open("/", O_RDONLY | O_CLOEXEC);
 4197	if (new_fd < 0) {
 4198		err = -errno;
 4199		goto err_free_new_name;
 4200	}
 4201
 4202	new_fd = dup3(fd, new_fd, O_CLOEXEC);
 4203	if (new_fd < 0) {
 4204		err = -errno;
 4205		goto err_close_new_fd;
 4206	}
 4207
 4208	err = zclose(map->fd);
 4209	if (err) {
 4210		err = -errno;
 4211		goto err_close_new_fd;
 4212	}
 4213	free(map->name);
 4214
 4215	map->fd = new_fd;
 4216	map->name = new_name;
 4217	map->def.type = info.type;
 4218	map->def.key_size = info.key_size;
 4219	map->def.value_size = info.value_size;
 4220	map->def.max_entries = info.max_entries;
 4221	map->def.map_flags = info.map_flags;
 4222	map->btf_key_type_id = info.btf_key_type_id;
 4223	map->btf_value_type_id = info.btf_value_type_id;
 4224	map->reused = true;
 4225	map->map_extra = info.map_extra;
 4226
 4227	return 0;
 4228
 4229err_close_new_fd:
 4230	close(new_fd);
 4231err_free_new_name:
 4232	free(new_name);
 4233	return libbpf_err(err);
 4234}
 4235
 4236__u32 bpf_map__max_entries(const struct bpf_map *map)
 4237{
 4238	return map->def.max_entries;
 4239}
 4240
 4241struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 4242{
 4243	if (!bpf_map_type__is_map_in_map(map->def.type))
 4244		return errno = EINVAL, NULL;
 4245
 4246	return map->inner_map;
 4247}
 4248
 4249int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 4250{
 4251	if (map->fd >= 0)
 4252		return libbpf_err(-EBUSY);
 4253	map->def.max_entries = max_entries;
 4254	return 0;
 4255}
 4256
 4257int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
 4258{
 4259	if (!map || !max_entries)
 4260		return libbpf_err(-EINVAL);
 4261
 4262	return bpf_map__set_max_entries(map, max_entries);
 4263}
 4264
 4265static int
 4266bpf_object__probe_loading(struct bpf_object *obj)
 4267{
 4268	struct bpf_load_program_attr attr;
 4269	char *cp, errmsg[STRERR_BUFSIZE];
 4270	struct bpf_insn insns[] = {
 4271		BPF_MOV64_IMM(BPF_REG_0, 0),
 4272		BPF_EXIT_INSN(),
 4273	};
 4274	int ret;
 4275
 4276	if (obj->gen_loader)
 4277		return 0;
 4278
 4279	/* make sure basic loading works */
 4280
 4281	memset(&attr, 0, sizeof(attr));
 4282	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 4283	attr.insns = insns;
 4284	attr.insns_cnt = ARRAY_SIZE(insns);
 4285	attr.license = "GPL";
 4286
 4287	ret = bpf_load_program_xattr(&attr, NULL, 0);
 4288	if (ret < 0) {
 4289		attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
 4290		ret = bpf_load_program_xattr(&attr, NULL, 0);
 4291	}
 4292	if (ret < 0) {
 4293		ret = errno;
 4294		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4295		pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
 4296			"program. Make sure your kernel supports BPF "
 4297			"(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
 4298			"set to big enough value.\n", __func__, cp, ret);
 4299		return -ret;
 4300	}
 4301	close(ret);
 4302
 4303	return 0;
 4304}
 4305
 4306static int probe_fd(int fd)
 4307{
 4308	if (fd >= 0)
 4309		close(fd);
 4310	return fd >= 0;
 4311}
 4312
 4313static int probe_kern_prog_name(void)
 4314{
 4315	struct bpf_load_program_attr attr;
 4316	struct bpf_insn insns[] = {
 4317		BPF_MOV64_IMM(BPF_REG_0, 0),
 4318		BPF_EXIT_INSN(),
 4319	};
 4320	int ret;
 4321
 4322	/* make sure loading with name works */
 4323
 4324	memset(&attr, 0, sizeof(attr));
 4325	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 4326	attr.insns = insns;
 4327	attr.insns_cnt = ARRAY_SIZE(insns);
 4328	attr.license = "GPL";
 4329	attr.name = "test";
 4330	ret = bpf_load_program_xattr(&attr, NULL, 0);
 4331	return probe_fd(ret);
 4332}
 4333
 4334static int probe_kern_global_data(void)
 4335{
 4336	struct bpf_load_program_attr prg_attr;
 4337	struct bpf_create_map_attr map_attr;
 4338	char *cp, errmsg[STRERR_BUFSIZE];
 4339	struct bpf_insn insns[] = {
 4340		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
 4341		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
 4342		BPF_MOV64_IMM(BPF_REG_0, 0),
 4343		BPF_EXIT_INSN(),
 4344	};
 4345	int ret, map;
 4346
 4347	memset(&map_attr, 0, sizeof(map_attr));
 4348	map_attr.map_type = BPF_MAP_TYPE_ARRAY;
 4349	map_attr.key_size = sizeof(int);
 4350	map_attr.value_size = 32;
 4351	map_attr.max_entries = 1;
 4352
 4353	map = bpf_create_map_xattr(&map_attr);
 4354	if (map < 0) {
 4355		ret = -errno;
 4356		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4357		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
 4358			__func__, cp, -ret);
 4359		return ret;
 4360	}
 4361
 4362	insns[0].imm = map;
 4363
 4364	memset(&prg_attr, 0, sizeof(prg_attr));
 4365	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 4366	prg_attr.insns = insns;
 4367	prg_attr.insns_cnt = ARRAY_SIZE(insns);
 4368	prg_attr.license = "GPL";
 4369
 4370	ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
 4371	close(map);
 4372	return probe_fd(ret);
 4373}
 4374
 4375static int probe_kern_btf(void)
 4376{
 4377	static const char strs[] = "\0int";
 4378	__u32 types[] = {
 4379		/* int */
 4380		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
 4381	};
 4382
 4383	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4384					     strs, sizeof(strs)));
 4385}
 4386
 4387static int probe_kern_btf_func(void)
 4388{
 4389	static const char strs[] = "\0int\0x\0a";
 4390	/* void x(int a) {} */
 4391	__u32 types[] = {
 4392		/* int */
 4393		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4394		/* FUNC_PROTO */                                /* [2] */
 4395		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
 4396		BTF_PARAM_ENC(7, 1),
 4397		/* FUNC x */                                    /* [3] */
 4398		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
 4399	};
 4400
 4401	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4402					     strs, sizeof(strs)));
 4403}
 4404
 4405static int probe_kern_btf_func_global(void)
 4406{
 4407	static const char strs[] = "\0int\0x\0a";
 4408	/* static void x(int a) {} */
 4409	__u32 types[] = {
 4410		/* int */
 4411		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4412		/* FUNC_PROTO */                                /* [2] */
 4413		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
 4414		BTF_PARAM_ENC(7, 1),
 4415		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
 4416		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
 4417	};
 4418
 4419	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4420					     strs, sizeof(strs)));
 4421}
 4422
 4423static int probe_kern_btf_datasec(void)
 4424{
 4425	static const char strs[] = "\0x\0.data";
 4426	/* static int a; */
 4427	__u32 types[] = {
 4428		/* int */
 4429		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4430		/* VAR x */                                     /* [2] */
 4431		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
 4432		BTF_VAR_STATIC,
 4433		/* DATASEC val */                               /* [3] */
 4434		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
 4435		BTF_VAR_SECINFO_ENC(2, 0, 4),
 4436	};
 4437
 4438	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4439					     strs, sizeof(strs)));
 4440}
 4441
 4442static int probe_kern_btf_float(void)
 4443{
 4444	static const char strs[] = "\0float";
 4445	__u32 types[] = {
 4446		/* float */
 4447		BTF_TYPE_FLOAT_ENC(1, 4),
 4448	};
 4449
 4450	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4451					     strs, sizeof(strs)));
 4452}
 4453
 4454static int probe_kern_btf_decl_tag(void)
 4455{
 4456	static const char strs[] = "\0tag";
 4457	__u32 types[] = {
 4458		/* int */
 4459		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4460		/* VAR x */                                     /* [2] */
 4461		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
 4462		BTF_VAR_STATIC,
 4463		/* attr */
 4464		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
 4465	};
 4466
 4467	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4468					     strs, sizeof(strs)));
 4469}
 4470
 4471static int probe_kern_array_mmap(void)
 4472{
 4473	struct bpf_create_map_attr attr = {
 4474		.map_type = BPF_MAP_TYPE_ARRAY,
 4475		.map_flags = BPF_F_MMAPABLE,
 4476		.key_size = sizeof(int),
 4477		.value_size = sizeof(int),
 4478		.max_entries = 1,
 4479	};
 4480
 4481	return probe_fd(bpf_create_map_xattr(&attr));
 4482}
 4483
 4484static int probe_kern_exp_attach_type(void)
 4485{
 4486	struct bpf_load_program_attr attr;
 4487	struct bpf_insn insns[] = {
 4488		BPF_MOV64_IMM(BPF_REG_0, 0),
 4489		BPF_EXIT_INSN(),
 4490	};
 4491
 4492	memset(&attr, 0, sizeof(attr));
 4493	/* use any valid combination of program type and (optional)
 4494	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
 4495	 * to see if kernel supports expected_attach_type field for
 4496	 * BPF_PROG_LOAD command
 4497	 */
 4498	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
 4499	attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
 4500	attr.insns = insns;
 4501	attr.insns_cnt = ARRAY_SIZE(insns);
 4502	attr.license = "GPL";
 4503
 4504	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
 4505}
 4506
 4507static int probe_kern_probe_read_kernel(void)
 4508{
 4509	struct bpf_load_program_attr attr;
 4510	struct bpf_insn insns[] = {
 4511		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
 4512		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
 4513		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
 4514		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
 4515		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
 4516		BPF_EXIT_INSN(),
 4517	};
 4518
 4519	memset(&attr, 0, sizeof(attr));
 4520	attr.prog_type = BPF_PROG_TYPE_KPROBE;
 4521	attr.insns = insns;
 4522	attr.insns_cnt = ARRAY_SIZE(insns);
 4523	attr.license = "GPL";
 4524
 4525	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0));
 4526}
 4527
 4528static int probe_prog_bind_map(void)
 4529{
 4530	struct bpf_load_program_attr prg_attr;
 4531	struct bpf_create_map_attr map_attr;
 4532	char *cp, errmsg[STRERR_BUFSIZE];
 4533	struct bpf_insn insns[] = {
 4534		BPF_MOV64_IMM(BPF_REG_0, 0),
 4535		BPF_EXIT_INSN(),
 4536	};
 4537	int ret, map, prog;
 4538
 4539	memset(&map_attr, 0, sizeof(map_attr));
 4540	map_attr.map_type = BPF_MAP_TYPE_ARRAY;
 4541	map_attr.key_size = sizeof(int);
 4542	map_attr.value_size = 32;
 4543	map_attr.max_entries = 1;
 4544
 4545	map = bpf_create_map_xattr(&map_attr);
 4546	if (map < 0) {
 4547		ret = -errno;
 4548		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4549		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
 4550			__func__, cp, -ret);
 4551		return ret;
 4552	}
 4553
 4554	memset(&prg_attr, 0, sizeof(prg_attr));
 4555	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 4556	prg_attr.insns = insns;
 4557	prg_attr.insns_cnt = ARRAY_SIZE(insns);
 4558	prg_attr.license = "GPL";
 4559
 4560	prog = bpf_load_program_xattr(&prg_attr, NULL, 0);
 4561	if (prog < 0) {
 4562		close(map);
 4563		return 0;
 4564	}
 4565
 4566	ret = bpf_prog_bind_map(prog, map, NULL);
 4567
 4568	close(map);
 4569	close(prog);
 4570
 4571	return ret >= 0;
 4572}
 4573
 4574static int probe_module_btf(void)
 4575{
 4576	static const char strs[] = "\0int";
 4577	__u32 types[] = {
 4578		/* int */
 4579		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
 4580	};
 4581	struct bpf_btf_info info;
 4582	__u32 len = sizeof(info);
 4583	char name[16];
 4584	int fd, err;
 4585
 4586	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
 4587	if (fd < 0)
 4588		return 0; /* BTF not supported at all */
 4589
 4590	memset(&info, 0, sizeof(info));
 4591	info.name = ptr_to_u64(name);
 4592	info.name_len = sizeof(name);
 4593
 4594	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
 4595	 * kernel's module BTF support coincides with support for
 4596	 * name/name_len fields in struct bpf_btf_info.
 4597	 */
 4598	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 4599	close(fd);
 4600	return !err;
 4601}
 4602
 4603static int probe_perf_link(void)
 4604{
 4605	struct bpf_load_program_attr attr;
 4606	struct bpf_insn insns[] = {
 4607		BPF_MOV64_IMM(BPF_REG_0, 0),
 4608		BPF_EXIT_INSN(),
 4609	};
 4610	int prog_fd, link_fd, err;
 4611
 4612	memset(&attr, 0, sizeof(attr));
 4613	attr.prog_type = BPF_PROG_TYPE_TRACEPOINT;
 4614	attr.insns = insns;
 4615	attr.insns_cnt = ARRAY_SIZE(insns);
 4616	attr.license = "GPL";
 4617	prog_fd = bpf_load_program_xattr(&attr, NULL, 0);
 4618	if (prog_fd < 0)
 4619		return -errno;
 4620
 4621	/* use invalid perf_event FD to get EBADF, if link is supported;
 4622	 * otherwise EINVAL should be returned
 4623	 */
 4624	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
 4625	err = -errno; /* close() can clobber errno */
 4626
 4627	if (link_fd >= 0)
 4628		close(link_fd);
 4629	close(prog_fd);
 4630
 4631	return link_fd < 0 && err == -EBADF;
 4632}
 4633
 4634enum kern_feature_result {
 4635	FEAT_UNKNOWN = 0,
 4636	FEAT_SUPPORTED = 1,
 4637	FEAT_MISSING = 2,
 4638};
 4639
 4640typedef int (*feature_probe_fn)(void);
 4641
 4642static struct kern_feature_desc {
 4643	const char *desc;
 4644	feature_probe_fn probe;
 4645	enum kern_feature_result res;
 4646} feature_probes[__FEAT_CNT] = {
 4647	[FEAT_PROG_NAME] = {
 4648		"BPF program name", probe_kern_prog_name,
 4649	},
 4650	[FEAT_GLOBAL_DATA] = {
 4651		"global variables", probe_kern_global_data,
 4652	},
 4653	[FEAT_BTF] = {
 4654		"minimal BTF", probe_kern_btf,
 4655	},
 4656	[FEAT_BTF_FUNC] = {
 4657		"BTF functions", probe_kern_btf_func,
 4658	},
 4659	[FEAT_BTF_GLOBAL_FUNC] = {
 4660		"BTF global function", probe_kern_btf_func_global,
 4661	},
 4662	[FEAT_BTF_DATASEC] = {
 4663		"BTF data section and variable", probe_kern_btf_datasec,
 4664	},
 4665	[FEAT_ARRAY_MMAP] = {
 4666		"ARRAY map mmap()", probe_kern_array_mmap,
 4667	},
 4668	[FEAT_EXP_ATTACH_TYPE] = {
 4669		"BPF_PROG_LOAD expected_attach_type attribute",
 4670		probe_kern_exp_attach_type,
 4671	},
 4672	[FEAT_PROBE_READ_KERN] = {
 4673		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
 4674	},
 4675	[FEAT_PROG_BIND_MAP] = {
 4676		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
 4677	},
 4678	[FEAT_MODULE_BTF] = {
 4679		"module BTF support", probe_module_btf,
 4680	},
 4681	[FEAT_BTF_FLOAT] = {
 4682		"BTF_KIND_FLOAT support", probe_kern_btf_float,
 4683	},
 4684	[FEAT_PERF_LINK] = {
 4685		"BPF perf link support", probe_perf_link,
 4686	},
 4687	[FEAT_BTF_DECL_TAG] = {
 4688		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
 4689	},
 4690};
 4691
 4692static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 4693{
 4694	struct kern_feature_desc *feat = &feature_probes[feat_id];
 4695	int ret;
 4696
 4697	if (obj->gen_loader)
 4698		/* To generate loader program assume the latest kernel
 4699		 * to avoid doing extra prog_load, map_create syscalls.
 4700		 */
 4701		return true;
 4702
 4703	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
 4704		ret = feat->probe();
 4705		if (ret > 0) {
 4706			WRITE_ONCE(feat->res, FEAT_SUPPORTED);
 4707		} else if (ret == 0) {
 4708			WRITE_ONCE(feat->res, FEAT_MISSING);
 4709		} else {
 4710			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
 4711			WRITE_ONCE(feat->res, FEAT_MISSING);
 4712		}
 4713	}
 4714
 4715	return READ_ONCE(feat->res) == FEAT_SUPPORTED;
 4716}
 4717
 4718static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
 4719{
 4720	struct bpf_map_info map_info = {};
 4721	char msg[STRERR_BUFSIZE];
 4722	__u32 map_info_len;
 4723	int err;
 4724
 4725	map_info_len = sizeof(map_info);
 4726
 4727	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
 4728	if (err && errno == EINVAL)
 4729		err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
 4730	if (err) {
 4731		pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
 4732			libbpf_strerror_r(errno, msg, sizeof(msg)));
 4733		return false;
 4734	}
 4735
 4736	return (map_info.type == map->def.type &&
 4737		map_info.key_size == map->def.key_size &&
 4738		map_info.value_size == map->def.value_size &&
 4739		map_info.max_entries == map->def.max_entries &&
 4740		map_info.map_flags == map->def.map_flags &&
 4741		map_info.map_extra == map->map_extra);
 4742}
 4743
 4744static int
 4745bpf_object__reuse_map(struct bpf_map *map)
 4746{
 4747	char *cp, errmsg[STRERR_BUFSIZE];
 4748	int err, pin_fd;
 4749
 4750	pin_fd = bpf_obj_get(map->pin_path);
 4751	if (pin_fd < 0) {
 4752		err = -errno;
 4753		if (err == -ENOENT) {
 4754			pr_debug("found no pinned map to reuse at '%s'\n",
 4755				 map->pin_path);
 4756			return 0;
 4757		}
 4758
 4759		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 4760		pr_warn("couldn't retrieve pinned map '%s': %s\n",
 4761			map->pin_path, cp);
 4762		return err;
 4763	}
 4764
 4765	if (!map_is_reuse_compat(map, pin_fd)) {
 4766		pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
 4767			map->pin_path);
 4768		close(pin_fd);
 4769		return -EINVAL;
 4770	}
 4771
 4772	err = bpf_map__reuse_fd(map, pin_fd);
 4773	if (err) {
 4774		close(pin_fd);
 4775		return err;
 4776	}
 4777	map->pinned = true;
 4778	pr_debug("reused pinned map at '%s'\n", map->pin_path);
 4779
 4780	return 0;
 4781}
 4782
 4783static int
 4784bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 4785{
 4786	enum libbpf_map_type map_type = map->libbpf_type;
 4787	char *cp, errmsg[STRERR_BUFSIZE];
 4788	int err, zero = 0;
 4789
 4790	if (obj->gen_loader) {
 4791		bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
 4792					 map->mmaped, map->def.value_size);
 4793		if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
 4794			bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
 4795		return 0;
 4796	}
 4797	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
 4798	if (err) {
 4799		err = -errno;
 4800		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 4801		pr_warn("Error setting initial map(%s) contents: %s\n",
 4802			map->name, cp);
 4803		return err;
 4804	}
 4805
 4806	/* Freeze .rodata and .kconfig map as read-only from syscall side. */
 4807	if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
 4808		err = bpf_map_freeze(map->fd);
 4809		if (err) {
 4810			err = -errno;
 4811			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 4812			pr_warn("Error freezing map(%s) as read-only: %s\n",
 4813				map->name, cp);
 4814			return err;
 4815		}
 4816	}
 4817	return 0;
 4818}
 4819
 4820static void bpf_map__destroy(struct bpf_map *map);
 4821
 4822static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
 4823{
 4824	struct bpf_create_map_params create_attr;
 4825	struct bpf_map_def *def = &map->def;
 4826	int err = 0;
 4827
 4828	memset(&create_attr, 0, sizeof(create_attr));
 4829
 4830	if (kernel_supports(obj, FEAT_PROG_NAME))
 4831		create_attr.name = map->name;
 4832	create_attr.map_ifindex = map->map_ifindex;
 4833	create_attr.map_type = def->type;
 4834	create_attr.map_flags = def->map_flags;
 4835	create_attr.key_size = def->key_size;
 4836	create_attr.value_size = def->value_size;
 4837	create_attr.numa_node = map->numa_node;
 4838	create_attr.map_extra = map->map_extra;
 4839
 4840	if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
 4841		int nr_cpus;
 4842
 4843		nr_cpus = libbpf_num_possible_cpus();
 4844		if (nr_cpus < 0) {
 4845			pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
 4846				map->name, nr_cpus);
 4847			return nr_cpus;
 4848		}
 4849		pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
 4850		create_attr.max_entries = nr_cpus;
 4851	} else {
 4852		create_attr.max_entries = def->max_entries;
 4853	}
 4854
 4855	if (bpf_map__is_struct_ops(map))
 4856		create_attr.btf_vmlinux_value_type_id =
 4857			map->btf_vmlinux_value_type_id;
 4858
 4859	create_attr.btf_fd = 0;
 4860	create_attr.btf_key_type_id = 0;
 4861	create_attr.btf_value_type_id = 0;
 4862	if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
 4863		create_attr.btf_fd = btf__fd(obj->btf);
 4864		create_attr.btf_key_type_id = map->btf_key_type_id;
 4865		create_attr.btf_value_type_id = map->btf_value_type_id;
 4866	}
 4867
 4868	if (bpf_map_type__is_map_in_map(def->type)) {
 4869		if (map->inner_map) {
 4870			err = bpf_object__create_map(obj, map->inner_map, true);
 4871			if (err) {
 4872				pr_warn("map '%s': failed to create inner map: %d\n",
 4873					map->name, err);
 4874				return err;
 4875			}
 4876			map->inner_map_fd = bpf_map__fd(map->inner_map);
 4877		}
 4878		if (map->inner_map_fd >= 0)
 4879			create_attr.inner_map_fd = map->inner_map_fd;
 4880	}
 4881
 4882	switch (def->type) {
 4883	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 4884	case BPF_MAP_TYPE_CGROUP_ARRAY:
 4885	case BPF_MAP_TYPE_STACK_TRACE:
 4886	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 4887	case BPF_MAP_TYPE_HASH_OF_MAPS:
 4888	case BPF_MAP_TYPE_DEVMAP:
 4889	case BPF_MAP_TYPE_DEVMAP_HASH:
 4890	case BPF_MAP_TYPE_CPUMAP:
 4891	case BPF_MAP_TYPE_XSKMAP:
 4892	case BPF_MAP_TYPE_SOCKMAP:
 4893	case BPF_MAP_TYPE_SOCKHASH:
 4894	case BPF_MAP_TYPE_QUEUE:
 4895	case BPF_MAP_TYPE_STACK:
 4896	case BPF_MAP_TYPE_RINGBUF:
 4897		create_attr.btf_fd = 0;
 4898		create_attr.btf_key_type_id = 0;
 4899		create_attr.btf_value_type_id = 0;
 4900		map->btf_key_type_id = 0;
 4901		map->btf_value_type_id = 0;
 4902	default:
 4903		break;
 4904	}
 4905
 4906	if (obj->gen_loader) {
 4907		bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps);
 4908		/* Pretend to have valid FD to pass various fd >= 0 checks.
 4909		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
 4910		 */
 4911		map->fd = 0;
 4912	} else {
 4913		map->fd = libbpf__bpf_create_map_xattr(&create_attr);
 4914	}
 4915	if (map->fd < 0 && (create_attr.btf_key_type_id ||
 4916			    create_attr.btf_value_type_id)) {
 4917		char *cp, errmsg[STRERR_BUFSIZE];
 4918
 4919		err = -errno;
 4920		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 4921		pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
 4922			map->name, cp, err);
 4923		create_attr.btf_fd = 0;
 4924		create_attr.btf_key_type_id = 0;
 4925		create_attr.btf_value_type_id = 0;
 4926		map->btf_key_type_id = 0;
 4927		map->btf_value_type_id = 0;
 4928		map->fd = libbpf__bpf_create_map_xattr(&create_attr);
 4929	}
 4930
 4931	err = map->fd < 0 ? -errno : 0;
 4932
 4933	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
 4934		if (obj->gen_loader)
 4935			map->inner_map->fd = -1;
 4936		bpf_map__destroy(map->inner_map);
 4937		zfree(&map->inner_map);
 4938	}
 4939
 4940	return err;
 4941}
 4942
 4943static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
 4944{
 4945	const struct bpf_map *targ_map;
 4946	unsigned int i;
 4947	int fd, err = 0;
 4948
 4949	for (i = 0; i < map->init_slots_sz; i++) {
 4950		if (!map->init_slots[i])
 4951			continue;
 4952
 4953		targ_map = map->init_slots[i];
 4954		fd = bpf_map__fd(targ_map);
 4955		if (obj->gen_loader) {
 4956			pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
 4957				map - obj->maps, i, targ_map - obj->maps);
 4958			return -ENOTSUP;
 4959		} else {
 4960			err = bpf_map_update_elem(map->fd, &i, &fd, 0);
 4961		}
 4962		if (err) {
 4963			err = -errno;
 4964			pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
 4965				map->name, i, targ_map->name,
 4966				fd, err);
 4967			return err;
 4968		}
 4969		pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
 4970			 map->name, i, targ_map->name, fd);
 4971	}
 4972
 4973	zfree(&map->init_slots);
 4974	map->init_slots_sz = 0;
 4975
 4976	return 0;
 4977}
 4978
 4979static int
 4980bpf_object__create_maps(struct bpf_object *obj)
 4981{
 4982	struct bpf_map *map;
 4983	char *cp, errmsg[STRERR_BUFSIZE];
 4984	unsigned int i, j;
 4985	int err;
 4986	bool retried;
 4987
 4988	for (i = 0; i < obj->nr_maps; i++) {
 4989		map = &obj->maps[i];
 4990
 4991		retried = false;
 4992retry:
 4993		if (map->pin_path) {
 4994			err = bpf_object__reuse_map(map);
 4995			if (err) {
 4996				pr_warn("map '%s': error reusing pinned map\n",
 4997					map->name);
 4998				goto err_out;
 4999			}
 5000			if (retried && map->fd < 0) {
 5001				pr_warn("map '%s': cannot find pinned map\n",
 5002					map->name);
 5003				err = -ENOENT;
 5004				goto err_out;
 5005			}
 5006		}
 5007
 5008		if (map->fd >= 0) {
 5009			pr_debug("map '%s': skipping creation (preset fd=%d)\n",
 5010				 map->name, map->fd);
 5011		} else {
 5012			err = bpf_object__create_map(obj, map, false);
 5013			if (err)
 5014				goto err_out;
 5015
 5016			pr_debug("map '%s': created successfully, fd=%d\n",
 5017				 map->name, map->fd);
 5018
 5019			if (bpf_map__is_internal(map)) {
 5020				err = bpf_object__populate_internal_map(obj, map);
 5021				if (err < 0) {
 5022					zclose(map->fd);
 5023					goto err_out;
 5024				}
 5025			}
 5026
 5027			if (map->init_slots_sz) {
 5028				err = init_map_slots(obj, map);
 5029				if (err < 0) {
 5030					zclose(map->fd);
 5031					goto err_out;
 5032				}
 5033			}
 5034		}
 5035
 5036		if (map->pin_path && !map->pinned) {
 5037			err = bpf_map__pin(map, NULL);
 5038			if (err) {
 5039				zclose(map->fd);
 5040				if (!retried && err == -EEXIST) {
 5041					retried = true;
 5042					goto retry;
 5043				}
 5044				pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
 5045					map->name, map->pin_path, err);
 5046				goto err_out;
 5047			}
 5048		}
 5049	}
 5050
 5051	return 0;
 5052
 5053err_out:
 5054	cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 5055	pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
 5056	pr_perm_msg(err);
 5057	for (j = 0; j < i; j++)
 5058		zclose(obj->maps[j].fd);
 5059	return err;
 5060}
 5061
 5062static bool bpf_core_is_flavor_sep(const char *s)
 5063{
 5064	/* check X___Y name pattern, where X and Y are not underscores */
 5065	return s[0] != '_' &&				      /* X */
 5066	       s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
 5067	       s[4] != '_';				      /* Y */
 5068}
 5069
 5070/* Given 'some_struct_name___with_flavor' return the length of a name prefix
 5071 * before last triple underscore. Struct name part after last triple
 5072 * underscore is ignored by BPF CO-RE relocation during relocation matching.
 5073 */
 5074size_t bpf_core_essential_name_len(const char *name)
 5075{
 5076	size_t n = strlen(name);
 5077	int i;
 5078
 5079	for (i = n - 5; i >= 0; i--) {
 5080		if (bpf_core_is_flavor_sep(name + i))
 5081			return i + 1;
 5082	}
 5083	return n;
 5084}
 5085
 5086static void bpf_core_free_cands(struct bpf_core_cand_list *cands)
 5087{
 5088	free(cands->cands);
 5089	free(cands);
 5090}
 5091
 5092static int bpf_core_add_cands(struct bpf_core_cand *local_cand,
 5093			      size_t local_essent_len,
 5094			      const struct btf *targ_btf,
 5095			      const char *targ_btf_name,
 5096			      int targ_start_id,
 5097			      struct bpf_core_cand_list *cands)
 5098{
 5099	struct bpf_core_cand *new_cands, *cand;
 5100	const struct btf_type *t;
 5101	const char *targ_name;
 5102	size_t targ_essent_len;
 5103	int n, i;
 5104
 5105	n = btf__type_cnt(targ_btf);
 5106	for (i = targ_start_id; i < n; i++) {
 5107		t = btf__type_by_id(targ_btf, i);
 5108		if (btf_kind(t) != btf_kind(local_cand->t))
 5109			continue;
 5110
 5111		targ_name = btf__name_by_offset(targ_btf, t->name_off);
 5112		if (str_is_empty(targ_name))
 5113			continue;
 5114
 5115		targ_essent_len = bpf_core_essential_name_len(targ_name);
 5116		if (targ_essent_len != local_essent_len)
 5117			continue;
 5118
 5119		if (strncmp(local_cand->name, targ_name, local_essent_len) != 0)
 5120			continue;
 5121
 5122		pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
 5123			 local_cand->id, btf_kind_str(local_cand->t),
 5124			 local_cand->name, i, btf_kind_str(t), targ_name,
 5125			 targ_btf_name);
 5126		new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
 5127					      sizeof(*cands->cands));
 5128		if (!new_cands)
 5129			return -ENOMEM;
 5130
 5131		cand = &new_cands[cands->len];
 5132		cand->btf = targ_btf;
 5133		cand->t = t;
 5134		cand->name = targ_name;
 5135		cand->id = i;
 5136
 5137		cands->cands = new_cands;
 5138		cands->len++;
 5139	}
 5140	return 0;
 5141}
 5142
 5143static int load_module_btfs(struct bpf_object *obj)
 5144{
 5145	struct bpf_btf_info info;
 5146	struct module_btf *mod_btf;
 5147	struct btf *btf;
 5148	char name[64];
 5149	__u32 id = 0, len;
 5150	int err, fd;
 5151
 5152	if (obj->btf_modules_loaded)
 5153		return 0;
 5154
 5155	if (obj->gen_loader)
 5156		return 0;
 5157
 5158	/* don't do this again, even if we find no module BTFs */
 5159	obj->btf_modules_loaded = true;
 5160
 5161	/* kernel too old to support module BTFs */
 5162	if (!kernel_supports(obj, FEAT_MODULE_BTF))
 5163		return 0;
 5164
 5165	while (true) {
 5166		err = bpf_btf_get_next_id(id, &id);
 5167		if (err && errno == ENOENT)
 5168			return 0;
 5169		if (err) {
 5170			err = -errno;
 5171			pr_warn("failed to iterate BTF objects: %d\n", err);
 5172			return err;
 5173		}
 5174
 5175		fd = bpf_btf_get_fd_by_id(id);
 5176		if (fd < 0) {
 5177			if (errno == ENOENT)
 5178				continue; /* expected race: BTF was unloaded */
 5179			err = -errno;
 5180			pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
 5181			return err;
 5182		}
 5183
 5184		len = sizeof(info);
 5185		memset(&info, 0, sizeof(info));
 5186		info.name = ptr_to_u64(name);
 5187		info.name_len = sizeof(name);
 5188
 5189		err = bpf_obj_get_info_by_fd(fd, &info, &len);
 5190		if (err) {
 5191			err = -errno;
 5192			pr_warn("failed to get BTF object #%d info: %d\n", id, err);
 5193			goto err_out;
 5194		}
 5195
 5196		/* ignore non-module BTFs */
 5197		if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
 5198			close(fd);
 5199			continue;
 5200		}
 5201
 5202		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
 5203		err = libbpf_get_error(btf);
 5204		if (err) {
 5205			pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
 5206				name, id, err);
 5207			goto err_out;
 5208		}
 5209
 5210		err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
 5211				        sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 5212		if (err)
 5213			goto err_out;
 5214
 5215		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
 5216
 5217		mod_btf->btf = btf;
 5218		mod_btf->id = id;
 5219		mod_btf->fd = fd;
 5220		mod_btf->name = strdup(name);
 5221		if (!mod_btf->name) {
 5222			err = -ENOMEM;
 5223			goto err_out;
 5224		}
 5225		continue;
 5226
 5227err_out:
 5228		close(fd);
 5229		return err;
 5230	}
 5231
 5232	return 0;
 5233}
 5234
 5235static struct bpf_core_cand_list *
 5236bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
 5237{
 5238	struct bpf_core_cand local_cand = {};
 5239	struct bpf_core_cand_list *cands;
 5240	const struct btf *main_btf;
 5241	size_t local_essent_len;
 5242	int err, i;
 5243
 5244	local_cand.btf = local_btf;
 5245	local_cand.t = btf__type_by_id(local_btf, local_type_id);
 5246	if (!local_cand.t)
 5247		return ERR_PTR(-EINVAL);
 5248
 5249	local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off);
 5250	if (str_is_empty(local_cand.name))
 5251		return ERR_PTR(-EINVAL);
 5252	local_essent_len = bpf_core_essential_name_len(local_cand.name);
 5253
 5254	cands = calloc(1, sizeof(*cands));
 5255	if (!cands)
 5256		return ERR_PTR(-ENOMEM);
 5257
 5258	/* Attempt to find target candidates in vmlinux BTF first */
 5259	main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
 5260	err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
 5261	if (err)
 5262		goto err_out;
 5263
 5264	/* if vmlinux BTF has any candidate, don't got for module BTFs */
 5265	if (cands->len)
 5266		return cands;
 5267
 5268	/* if vmlinux BTF was overridden, don't attempt to load module BTFs */
 5269	if (obj->btf_vmlinux_override)
 5270		return cands;
 5271
 5272	/* now look through module BTFs, trying to still find candidates */
 5273	err = load_module_btfs(obj);
 5274	if (err)
 5275		goto err_out;
 5276
 5277	for (i = 0; i < obj->btf_module_cnt; i++) {
 5278		err = bpf_core_add_cands(&local_cand, local_essent_len,
 5279					 obj->btf_modules[i].btf,
 5280					 obj->btf_modules[i].name,
 5281					 btf__type_cnt(obj->btf_vmlinux),
 5282					 cands);
 5283		if (err)
 5284			goto err_out;
 5285	}
 5286
 5287	return cands;
 5288err_out:
 5289	bpf_core_free_cands(cands);
 5290	return ERR_PTR(err);
 5291}
 5292
 5293/* Check local and target types for compatibility. This check is used for
 5294 * type-based CO-RE relocations and follow slightly different rules than
 5295 * field-based relocations. This function assumes that root types were already
 5296 * checked for name match. Beyond that initial root-level name check, names
 5297 * are completely ignored. Compatibility rules are as follows:
 5298 *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
 5299 *     kind should match for local and target types (i.e., STRUCT is not
 5300 *     compatible with UNION);
 5301 *   - for ENUMs, the size is ignored;
 5302 *   - for INT, size and signedness are ignored;
 5303 *   - for ARRAY, dimensionality is ignored, element types are checked for
 5304 *     compatibility recursively;
 5305 *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
 5306 *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
 5307 *   - FUNC_PROTOs are compatible if they have compatible signature: same
 5308 *     number of input args and compatible return and argument types.
 5309 * These rules are not set in stone and probably will be adjusted as we get
 5310 * more experience with using BPF CO-RE relocations.
 5311 */
 5312int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 5313			      const struct btf *targ_btf, __u32 targ_id)
 5314{
 5315	const struct btf_type *local_type, *targ_type;
 5316	int depth = 32; /* max recursion depth */
 5317
 5318	/* caller made sure that names match (ignoring flavor suffix) */
 5319	local_type = btf__type_by_id(local_btf, local_id);
 5320	targ_type = btf__type_by_id(targ_btf, targ_id);
 5321	if (btf_kind(local_type) != btf_kind(targ_type))
 5322		return 0;
 5323
 5324recur:
 5325	depth--;
 5326	if (depth < 0)
 5327		return -EINVAL;
 5328
 5329	local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
 5330	targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
 5331	if (!local_type || !targ_type)
 5332		return -EINVAL;
 5333
 5334	if (btf_kind(local_type) != btf_kind(targ_type))
 5335		return 0;
 5336
 5337	switch (btf_kind(local_type)) {
 5338	case BTF_KIND_UNKN:
 5339	case BTF_KIND_STRUCT:
 5340	case BTF_KIND_UNION:
 5341	case BTF_KIND_ENUM:
 5342	case BTF_KIND_FWD:
 5343		return 1;
 5344	case BTF_KIND_INT:
 5345		/* just reject deprecated bitfield-like integers; all other
 5346		 * integers are by default compatible between each other
 5347		 */
 5348		return btf_int_offset(local_type) == 0 && btf_int_offset(targ_type) == 0;
 5349	case BTF_KIND_PTR:
 5350		local_id = local_type->type;
 5351		targ_id = targ_type->type;
 5352		goto recur;
 5353	case BTF_KIND_ARRAY:
 5354		local_id = btf_array(local_type)->type;
 5355		targ_id = btf_array(targ_type)->type;
 5356		goto recur;
 5357	case BTF_KIND_FUNC_PROTO: {
 5358		struct btf_param *local_p = btf_params(local_type);
 5359		struct btf_param *targ_p = btf_params(targ_type);
 5360		__u16 local_vlen = btf_vlen(local_type);
 5361		__u16 targ_vlen = btf_vlen(targ_type);
 5362		int i, err;
 5363
 5364		if (local_vlen != targ_vlen)
 5365			return 0;
 5366
 5367		for (i = 0; i < local_vlen; i++, local_p++, targ_p++) {
 5368			skip_mods_and_typedefs(local_btf, local_p->type, &local_id);
 5369			skip_mods_and_typedefs(targ_btf, targ_p->type, &targ_id);
 5370			err = bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id);
 5371			if (err <= 0)
 5372				return err;
 5373		}
 5374
 5375		/* tail recurse for return type check */
 5376		skip_mods_and_typedefs(local_btf, local_type->type, &local_id);
 5377		skip_mods_and_typedefs(targ_btf, targ_type->type, &targ_id);
 5378		goto recur;
 5379	}
 5380	default:
 5381		pr_warn("unexpected kind %s relocated, local [%d], target [%d]\n",
 5382			btf_kind_str(local_type), local_id, targ_id);
 5383		return 0;
 5384	}
 5385}
 5386
 5387static size_t bpf_core_hash_fn(const void *key, void *ctx)
 5388{
 5389	return (size_t)key;
 5390}
 5391
 5392static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
 5393{
 5394	return k1 == k2;
 5395}
 5396
 5397static void *u32_as_hash_key(__u32 x)
 5398{
 5399	return (void *)(uintptr_t)x;
 5400}
 5401
 5402static int bpf_core_apply_relo(struct bpf_program *prog,
 5403			       const struct bpf_core_relo *relo,
 5404			       int relo_idx,
 5405			       const struct btf *local_btf,
 5406			       struct hashmap *cand_cache)
 5407{
 5408	const void *type_key = u32_as_hash_key(relo->type_id);
 5409	struct bpf_core_cand_list *cands = NULL;
 5410	const char *prog_name = prog->name;
 5411	const struct btf_type *local_type;
 5412	const char *local_name;
 5413	__u32 local_id = relo->type_id;
 5414	struct bpf_insn *insn;
 5415	int insn_idx, err;
 5416
 5417	if (relo->insn_off % BPF_INSN_SZ)
 5418		return -EINVAL;
 5419	insn_idx = relo->insn_off / BPF_INSN_SZ;
 5420	/* adjust insn_idx from section frame of reference to the local
 5421	 * program's frame of reference; (sub-)program code is not yet
 5422	 * relocated, so it's enough to just subtract in-section offset
 5423	 */
 5424	insn_idx = insn_idx - prog->sec_insn_off;
 5425	if (insn_idx >= prog->insns_cnt)
 5426		return -EINVAL;
 5427	insn = &prog->insns[insn_idx];
 5428
 5429	local_type = btf__type_by_id(local_btf, local_id);
 5430	if (!local_type)
 5431		return -EINVAL;
 5432
 5433	local_name = btf__name_by_offset(local_btf, local_type->name_off);
 5434	if (!local_name)
 5435		return -EINVAL;
 5436
 5437	if (prog->obj->gen_loader) {
 5438		pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n",
 5439			prog - prog->obj->programs, relo->insn_off / 8,
 5440			local_name, relo->kind);
 5441		return -ENOTSUP;
 5442	}
 5443
 5444	if (relo->kind != BPF_TYPE_ID_LOCAL &&
 5445	    !hashmap__find(cand_cache, type_key, (void **)&cands)) {
 5446		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
 5447		if (IS_ERR(cands)) {
 5448			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
 5449				prog_name, relo_idx, local_id, btf_kind_str(local_type),
 5450				local_name, PTR_ERR(cands));
 5451			return PTR_ERR(cands);
 5452		}
 5453		err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
 5454		if (err) {
 5455			bpf_core_free_cands(cands);
 5456			return err;
 5457		}
 5458	}
 5459
 5460	return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands);
 5461}
 5462
 5463static int
 5464bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 5465{
 5466	const struct btf_ext_info_sec *sec;
 5467	const struct bpf_core_relo *rec;
 5468	const struct btf_ext_info *seg;
 5469	struct hashmap_entry *entry;
 5470	struct hashmap *cand_cache = NULL;
 5471	struct bpf_program *prog;
 5472	const char *sec_name;
 5473	int i, err = 0, insn_idx, sec_idx;
 5474
 5475	if (obj->btf_ext->core_relo_info.len == 0)
 5476		return 0;
 5477
 5478	if (targ_btf_path) {
 5479		obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
 5480		err = libbpf_get_error(obj->btf_vmlinux_override);
 5481		if (err) {
 5482			pr_warn("failed to parse target BTF: %d\n", err);
 5483			return err;
 5484		}
 5485	}
 5486
 5487	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
 5488	if (IS_ERR(cand_cache)) {
 5489		err = PTR_ERR(cand_cache);
 5490		goto out;
 5491	}
 5492
 5493	seg = &obj->btf_ext->core_relo_info;
 5494	for_each_btf_ext_sec(seg, sec) {
 5495		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 5496		if (str_is_empty(sec_name)) {
 5497			err = -EINVAL;
 5498			goto out;
 5499		}
 5500		/* bpf_object's ELF is gone by now so it's not easy to find
 5501		 * section index by section name, but we can find *any*
 5502		 * bpf_program within desired section name and use it's
 5503		 * prog->sec_idx to do a proper search by section index and
 5504		 * instruction offset
 5505		 */
 5506		prog = NULL;
 5507		for (i = 0; i < obj->nr_programs; i++) {
 5508			prog = &obj->programs[i];
 5509			if (strcmp(prog->sec_name, sec_name) == 0)
 5510				break;
 5511		}
 5512		if (!prog) {
 5513			pr_warn("sec '%s': failed to find a BPF program\n", sec_name);
 5514			return -ENOENT;
 5515		}
 5516		sec_idx = prog->sec_idx;
 5517
 5518		pr_debug("sec '%s': found %d CO-RE relocations\n",
 5519			 sec_name, sec->num_info);
 5520
 5521		for_each_btf_ext_rec(seg, sec, i, rec) {
 5522			insn_idx = rec->insn_off / BPF_INSN_SZ;
 5523			prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
 5524			if (!prog) {
 5525				pr_warn("sec '%s': failed to find program at insn #%d for CO-RE offset relocation #%d\n",
 5526					sec_name, insn_idx, i);
 5527				err = -EINVAL;
 5528				goto out;
 5529			}
 5530			/* no need to apply CO-RE relocation if the program is
 5531			 * not going to be loaded
 5532			 */
 5533			if (!prog->load)
 5534				continue;
 5535
 5536			err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache);
 5537			if (err) {
 5538				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
 5539					prog->name, i, err);
 5540				goto out;
 5541			}
 5542		}
 5543	}
 5544
 5545out:
 5546	/* obj->btf_vmlinux and module BTFs are freed after object load */
 5547	btf__free(obj->btf_vmlinux_override);
 5548	obj->btf_vmlinux_override = NULL;
 5549
 5550	if (!IS_ERR_OR_NULL(cand_cache)) {
 5551		hashmap__for_each_entry(cand_cache, entry, i) {
 5552			bpf_core_free_cands(entry->value);
 5553		}
 5554		hashmap__free(cand_cache);
 5555	}
 5556	return err;
 5557}
 5558
 5559/* Relocate data references within program code:
 5560 *  - map references;
 5561 *  - global variable references;
 5562 *  - extern references.
 5563 */
 5564static int
 5565bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 5566{
 5567	int i;
 5568
 5569	for (i = 0; i < prog->nr_reloc; i++) {
 5570		struct reloc_desc *relo = &prog->reloc_desc[i];
 5571		struct bpf_insn *insn = &prog->insns[relo->insn_idx];
 5572		struct extern_desc *ext;
 5573
 5574		switch (relo->type) {
 5575		case RELO_LD64:
 5576			if (obj->gen_loader) {
 5577				insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
 5578				insn[0].imm = relo->map_idx;
 5579			} else {
 5580				insn[0].src_reg = BPF_PSEUDO_MAP_FD;
 5581				insn[0].imm = obj->maps[relo->map_idx].fd;
 5582			}
 5583			break;
 5584		case RELO_DATA:
 5585			insn[1].imm = insn[0].imm + relo->sym_off;
 5586			if (obj->gen_loader) {
 5587				insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
 5588				insn[0].imm = relo->map_idx;
 5589			} else {
 5590				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 5591				insn[0].imm = obj->maps[relo->map_idx].fd;
 5592			}
 5593			break;
 5594		case RELO_EXTERN_VAR:
 5595			ext = &obj->externs[relo->sym_off];
 5596			if (ext->type == EXT_KCFG) {
 5597				if (obj->gen_loader) {
 5598					insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
 5599					insn[0].imm = obj->kconfig_map_idx;
 5600				} else {
 5601					insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 5602					insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
 5603				}
 5604				insn[1].imm = ext->kcfg.data_off;
 5605			} else /* EXT_KSYM */ {
 5606				if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
 5607					insn[0].src_reg = BPF_PSEUDO_BTF_ID;
 5608					insn[0].imm = ext->ksym.kernel_btf_id;
 5609					insn[1].imm = ext->ksym.kernel_btf_obj_fd;
 5610				} else { /* typeless ksyms or unresolved typed ksyms */
 5611					insn[0].imm = (__u32)ext->ksym.addr;
 5612					insn[1].imm = ext->ksym.addr >> 32;
 5613				}
 5614			}
 5615			break;
 5616		case RELO_EXTERN_FUNC:
 5617			ext = &obj->externs[relo->sym_off];
 5618			insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
 5619			if (ext->is_set) {
 5620				insn[0].imm = ext->ksym.kernel_btf_id;
 5621				insn[0].off = ext->ksym.btf_fd_idx;
 5622			} else { /* unresolved weak kfunc */
 5623				insn[0].imm = 0;
 5624				insn[0].off = 0;
 5625			}
 5626			break;
 5627		case RELO_SUBPROG_ADDR:
 5628			if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
 5629				pr_warn("prog '%s': relo #%d: bad insn\n",
 5630					prog->name, i);
 5631				return -EINVAL;
 5632			}
 5633			/* handled already */
 5634			break;
 5635		case RELO_CALL:
 5636			/* handled already */
 5637			break;
 5638		default:
 5639			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
 5640				prog->name, i, relo->type);
 5641			return -EINVAL;
 5642		}
 5643	}
 5644
 5645	return 0;
 5646}
 5647
 5648static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
 5649				    const struct bpf_program *prog,
 5650				    const struct btf_ext_info *ext_info,
 5651				    void **prog_info, __u32 *prog_rec_cnt,
 5652				    __u32 *prog_rec_sz)
 5653{
 5654	void *copy_start = NULL, *copy_end = NULL;
 5655	void *rec, *rec_end, *new_prog_info;
 5656	const struct btf_ext_info_sec *sec;
 5657	size_t old_sz, new_sz;
 5658	const char *sec_name;
 5659	int i, off_adj;
 5660
 5661	for_each_btf_ext_sec(ext_info, sec) {
 5662		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 5663		if (!sec_name)
 5664			return -EINVAL;
 5665		if (strcmp(sec_name, prog->sec_name) != 0)
 5666			continue;
 5667
 5668		for_each_btf_ext_rec(ext_info, sec, i, rec) {
 5669			__u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
 5670
 5671			if (insn_off < prog->sec_insn_off)
 5672				continue;
 5673			if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
 5674				break;
 5675
 5676			if (!copy_start)
 5677				copy_start = rec;
 5678			copy_end = rec + ext_info->rec_size;
 5679		}
 5680
 5681		if (!copy_start)
 5682			return -ENOENT;
 5683
 5684		/* append func/line info of a given (sub-)program to the main
 5685		 * program func/line info
 5686		 */
 5687		old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
 5688		new_sz = old_sz + (copy_end - copy_start);
 5689		new_prog_info = realloc(*prog_info, new_sz);
 5690		if (!new_prog_info)
 5691			return -ENOMEM;
 5692		*prog_info = new_prog_info;
 5693		*prog_rec_cnt = new_sz / ext_info->rec_size;
 5694		memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
 5695
 5696		/* Kernel instruction offsets are in units of 8-byte
 5697		 * instructions, while .BTF.ext instruction offsets generated
 5698		 * by Clang are in units of bytes. So convert Clang offsets
 5699		 * into kernel offsets and adjust offset according to program
 5700		 * relocated position.
 5701		 */
 5702		off_adj = prog->sub_insn_off - prog->sec_insn_off;
 5703		rec = new_prog_info + old_sz;
 5704		rec_end = new_prog_info + new_sz;
 5705		for (; rec < rec_end; rec += ext_info->rec_size) {
 5706			__u32 *insn_off = rec;
 5707
 5708			*insn_off = *insn_off / BPF_INSN_SZ + off_adj;
 5709		}
 5710		*prog_rec_sz = ext_info->rec_size;
 5711		return 0;
 5712	}
 5713
 5714	return -ENOENT;
 5715}
 5716
 5717static int
 5718reloc_prog_func_and_line_info(const struct bpf_object *obj,
 5719			      struct bpf_program *main_prog,
 5720			      const struct bpf_program *prog)
 5721{
 5722	int err;
 5723
 5724	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
 5725	 * supprot func/line info
 5726	 */
 5727	if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
 5728		return 0;
 5729
 5730	/* only attempt func info relocation if main program's func_info
 5731	 * relocation was successful
 5732	 */
 5733	if (main_prog != prog && !main_prog->func_info)
 5734		goto line_info;
 5735
 5736	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
 5737				       &main_prog->func_info,
 5738				       &main_prog->func_info_cnt,
 5739				       &main_prog->func_info_rec_size);
 5740	if (err) {
 5741		if (err != -ENOENT) {
 5742			pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
 5743				prog->name, err);
 5744			return err;
 5745		}
 5746		if (main_prog->func_info) {
 5747			/*
 5748			 * Some info has already been found but has problem
 5749			 * in the last btf_ext reloc. Must have to error out.
 5750			 */
 5751			pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
 5752			return err;
 5753		}
 5754		/* Have problem loading the very first info. Ignore the rest. */
 5755		pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
 5756			prog->name);
 5757	}
 5758
 5759line_info:
 5760	/* don't relocate line info if main program's relocation failed */
 5761	if (main_prog != prog && !main_prog->line_info)
 5762		return 0;
 5763
 5764	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
 5765				       &main_prog->line_info,
 5766				       &main_prog->line_info_cnt,
 5767				       &main_prog->line_info_rec_size);
 5768	if (err) {
 5769		if (err != -ENOENT) {
 5770			pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
 5771				prog->name, err);
 5772			return err;
 5773		}
 5774		if (main_prog->line_info) {
 5775			/*
 5776			 * Some info has already been found but has problem
 5777			 * in the last btf_ext reloc. Must have to error out.
 5778			 */
 5779			pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
 5780			return err;
 5781		}
 5782		/* Have problem loading the very first info. Ignore the rest. */
 5783		pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
 5784			prog->name);
 5785	}
 5786	return 0;
 5787}
 5788
 5789static int cmp_relo_by_insn_idx(const void *key, const void *elem)
 5790{
 5791	size_t insn_idx = *(const size_t *)key;
 5792	const struct reloc_desc *relo = elem;
 5793
 5794	if (insn_idx == relo->insn_idx)
 5795		return 0;
 5796	return insn_idx < relo->insn_idx ? -1 : 1;
 5797}
 5798
 5799static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
 5800{
 5801	return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
 5802		       sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
 5803}
 5804
 5805static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
 5806{
 5807	int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
 5808	struct reloc_desc *relos;
 5809	int i;
 5810
 5811	if (main_prog == subprog)
 5812		return 0;
 5813	relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
 5814	if (!relos)
 5815		return -ENOMEM;
 5816	memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
 5817	       sizeof(*relos) * subprog->nr_reloc);
 5818
 5819	for (i = main_prog->nr_reloc; i < new_cnt; i++)
 5820		relos[i].insn_idx += subprog->sub_insn_off;
 5821	/* After insn_idx adjustment the 'relos' array is still sorted
 5822	 * by insn_idx and doesn't break bsearch.
 5823	 */
 5824	main_prog->reloc_desc = relos;
 5825	main_prog->nr_reloc = new_cnt;
 5826	return 0;
 5827}
 5828
 5829static int
 5830bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 5831		       struct bpf_program *prog)
 5832{
 5833	size_t sub_insn_idx, insn_idx, new_cnt;
 5834	struct bpf_program *subprog;
 5835	struct bpf_insn *insns, *insn;
 5836	struct reloc_desc *relo;
 5837	int err;
 5838
 5839	err = reloc_prog_func_and_line_info(obj, main_prog, prog);
 5840	if (err)
 5841		return err;
 5842
 5843	for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
 5844		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
 5845		if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
 5846			continue;
 5847
 5848		relo = find_prog_insn_relo(prog, insn_idx);
 5849		if (relo && relo->type == RELO_EXTERN_FUNC)
 5850			/* kfunc relocations will be handled later
 5851			 * in bpf_object__relocate_data()
 5852			 */
 5853			continue;
 5854		if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
 5855			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
 5856				prog->name, insn_idx, relo->type);
 5857			return -LIBBPF_ERRNO__RELOC;
 5858		}
 5859		if (relo) {
 5860			/* sub-program instruction index is a combination of
 5861			 * an offset of a symbol pointed to by relocation and
 5862			 * call instruction's imm field; for global functions,
 5863			 * call always has imm = -1, but for static functions
 5864			 * relocation is against STT_SECTION and insn->imm
 5865			 * points to a start of a static function
 5866			 *
 5867			 * for subprog addr relocation, the relo->sym_off + insn->imm is
 5868			 * the byte offset in the corresponding section.
 5869			 */
 5870			if (relo->type == RELO_CALL)
 5871				sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
 5872			else
 5873				sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
 5874		} else if (insn_is_pseudo_func(insn)) {
 5875			/*
 5876			 * RELO_SUBPROG_ADDR relo is always emitted even if both
 5877			 * functions are in the same section, so it shouldn't reach here.
 5878			 */
 5879			pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
 5880				prog->name, insn_idx);
 5881			return -LIBBPF_ERRNO__RELOC;
 5882		} else {
 5883			/* if subprogram call is to a static function within
 5884			 * the same ELF section, there won't be any relocation
 5885			 * emitted, but it also means there is no additional
 5886			 * offset necessary, insns->imm is relative to
 5887			 * instruction's original position within the section
 5888			 */
 5889			sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
 5890		}
 5891
 5892		/* we enforce that sub-programs should be in .text section */
 5893		subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
 5894		if (!subprog) {
 5895			pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
 5896				prog->name);
 5897			return -LIBBPF_ERRNO__RELOC;
 5898		}
 5899
 5900		/* if it's the first call instruction calling into this
 5901		 * subprogram (meaning this subprog hasn't been processed
 5902		 * yet) within the context of current main program:
 5903		 *   - append it at the end of main program's instructions blog;
 5904		 *   - process is recursively, while current program is put on hold;
 5905		 *   - if that subprogram calls some other not yet processes
 5906		 *   subprogram, same thing will happen recursively until
 5907		 *   there are no more unprocesses subprograms left to append
 5908		 *   and relocate.
 5909		 */
 5910		if (subprog->sub_insn_off == 0) {
 5911			subprog->sub_insn_off = main_prog->insns_cnt;
 5912
 5913			new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
 5914			insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
 5915			if (!insns) {
 5916				pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
 5917				return -ENOMEM;
 5918			}
 5919			main_prog->insns = insns;
 5920			main_prog->insns_cnt = new_cnt;
 5921
 5922			memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
 5923			       subprog->insns_cnt * sizeof(*insns));
 5924
 5925			pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
 5926				 main_prog->name, subprog->insns_cnt, subprog->name);
 5927
 5928			/* The subprog insns are now appended. Append its relos too. */
 5929			err = append_subprog_relos(main_prog, subprog);
 5930			if (err)
 5931				return err;
 5932			err = bpf_object__reloc_code(obj, main_prog, subprog);
 5933			if (err)
 5934				return err;
 5935		}
 5936
 5937		/* main_prog->insns memory could have been re-allocated, so
 5938		 * calculate pointer again
 5939		 */
 5940		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
 5941		/* calculate correct instruction position within current main
 5942		 * prog; each main prog can have a different set of
 5943		 * subprograms appended (potentially in different order as
 5944		 * well), so position of any subprog can be different for
 5945		 * different main programs */
 5946		insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
 5947
 5948		pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
 5949			 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
 5950	}
 5951
 5952	return 0;
 5953}
 5954
 5955/*
 5956 * Relocate sub-program calls.
 5957 *
 5958 * Algorithm operates as follows. Each entry-point BPF program (referred to as
 5959 * main prog) is processed separately. For each subprog (non-entry functions,
 5960 * that can be called from either entry progs or other subprogs) gets their
 5961 * sub_insn_off reset to zero. This serves as indicator that this subprogram
 5962 * hasn't been yet appended and relocated within current main prog. Once its
 5963 * relocated, sub_insn_off will point at the position within current main prog
 5964 * where given subprog was appended. This will further be used to relocate all
 5965 * the call instructions jumping into this subprog.
 5966 *
 5967 * We start with main program and process all call instructions. If the call
 5968 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
 5969 * is zero), subprog instructions are appended at the end of main program's
 5970 * instruction array. Then main program is "put on hold" while we recursively
 5971 * process newly appended subprogram. If that subprogram calls into another
 5972 * subprogram that hasn't been appended, new subprogram is appended again to
 5973 * the *main* prog's instructions (subprog's instructions are always left
 5974 * untouched, as they need to be in unmodified state for subsequent main progs
 5975 * and subprog instructions are always sent only as part of a main prog) and
 5976 * the process continues recursively. Once all the subprogs called from a main
 5977 * prog or any of its subprogs are appended (and relocated), all their
 5978 * positions within finalized instructions array are known, so it's easy to
 5979 * rewrite call instructions with correct relative offsets, corresponding to
 5980 * desired target subprog.
 5981 *
 5982 * Its important to realize that some subprogs might not be called from some
 5983 * main prog and any of its called/used subprogs. Those will keep their
 5984 * subprog->sub_insn_off as zero at all times and won't be appended to current
 5985 * main prog and won't be relocated within the context of current main prog.
 5986 * They might still be used from other main progs later.
 5987 *
 5988 * Visually this process can be shown as below. Suppose we have two main
 5989 * programs mainA and mainB and BPF object contains three subprogs: subA,
 5990 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
 5991 * subC both call subB:
 5992 *
 5993 *        +--------+ +-------+
 5994 *        |        v v       |
 5995 *     +--+---+ +--+-+-+ +---+--+
 5996 *     | subA | | subB | | subC |
 5997 *     +--+---+ +------+ +---+--+
 5998 *        ^                  ^
 5999 *        |                  |
 6000 *    +---+-------+   +------+----+
 6001 *    |   mainA   |   |   mainB   |
 6002 *    +-----------+   +-----------+
 6003 *
 6004 * We'll start relocating mainA, will find subA, append it and start
 6005 * processing sub A recursively:
 6006 *
 6007 *    +-----------+------+
 6008 *    |   mainA   | subA |
 6009 *    +-----------+------+
 6010 *
 6011 * At this point we notice that subB is used from subA, so we append it and
 6012 * relocate (there are no further subcalls from subB):
 6013 *
 6014 *    +-----------+------+------+
 6015 *    |   mainA   | subA | subB |
 6016 *    +-----------+------+------+
 6017 *
 6018 * At this point, we relocate subA calls, then go one level up and finish with
 6019 * relocatin mainA calls. mainA is done.
 6020 *
 6021 * For mainB process is similar but results in different order. We start with
 6022 * mainB and skip subA and subB, as mainB never calls them (at least
 6023 * directly), but we see subC is needed, so we append and start processing it:
 6024 *
 6025 *    +-----------+------+
 6026 *    |   mainB   | subC |
 6027 *    +-----------+------+
 6028 * Now we see subC needs subB, so we go back to it, append and relocate it:
 6029 *
 6030 *    +-----------+------+------+
 6031 *    |   mainB   | subC | subB |
 6032 *    +-----------+------+------+
 6033 *
 6034 * At this point we unwind recursion, relocate calls in subC, then in mainB.
 6035 */
 6036static int
 6037bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
 6038{
 6039	struct bpf_program *subprog;
 6040	int i, err;
 6041
 6042	/* mark all subprogs as not relocated (yet) within the context of
 6043	 * current main program
 6044	 */
 6045	for (i = 0; i < obj->nr_programs; i++) {
 6046		subprog = &obj->programs[i];
 6047		if (!prog_is_subprog(obj, subprog))
 6048			continue;
 6049
 6050		subprog->sub_insn_off = 0;
 6051	}
 6052
 6053	err = bpf_object__reloc_code(obj, prog, prog);
 6054	if (err)
 6055		return err;
 6056
 6057
 6058	return 0;
 6059}
 6060
 6061static void
 6062bpf_object__free_relocs(struct bpf_object *obj)
 6063{
 6064	struct bpf_program *prog;
 6065	int i;
 6066
 6067	/* free up relocation descriptors */
 6068	for (i = 0; i < obj->nr_programs; i++) {
 6069		prog = &obj->programs[i];
 6070		zfree(&prog->reloc_desc);
 6071		prog->nr_reloc = 0;
 6072	}
 6073}
 6074
 6075static int
 6076bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 6077{
 6078	struct bpf_program *prog;
 6079	size_t i, j;
 6080	int err;
 6081
 6082	if (obj->btf_ext) {
 6083		err = bpf_object__relocate_core(obj, targ_btf_path);
 6084		if (err) {
 6085			pr_warn("failed to perform CO-RE relocations: %d\n",
 6086				err);
 6087			return err;
 6088		}
 6089	}
 6090
 6091	/* Before relocating calls pre-process relocations and mark
 6092	 * few ld_imm64 instructions that points to subprogs.
 6093	 * Otherwise bpf_object__reloc_code() later would have to consider
 6094	 * all ld_imm64 insns as relocation candidates. That would
 6095	 * reduce relocation speed, since amount of find_prog_insn_relo()
 6096	 * would increase and most of them will fail to find a relo.
 6097	 */
 6098	for (i = 0; i < obj->nr_programs; i++) {
 6099		prog = &obj->programs[i];
 6100		for (j = 0; j < prog->nr_reloc; j++) {
 6101			struct reloc_desc *relo = &prog->reloc_desc[j];
 6102			struct bpf_insn *insn = &prog->insns[relo->insn_idx];
 6103
 6104			/* mark the insn, so it's recognized by insn_is_pseudo_func() */
 6105			if (relo->type == RELO_SUBPROG_ADDR)
 6106				insn[0].src_reg = BPF_PSEUDO_FUNC;
 6107		}
 6108	}
 6109
 6110	/* relocate subprogram calls and append used subprograms to main
 6111	 * programs; each copy of subprogram code needs to be relocated
 6112	 * differently for each main program, because its code location might
 6113	 * have changed.
 6114	 * Append subprog relos to main programs to allow data relos to be
 6115	 * processed after text is completely relocated.
 6116	 */
 6117	for (i = 0; i < obj->nr_programs; i++) {
 6118		prog = &obj->programs[i];
 6119		/* sub-program's sub-calls are relocated within the context of
 6120		 * its main program only
 6121		 */
 6122		if (prog_is_subprog(obj, prog))
 6123			continue;
 6124
 6125		err = bpf_object__relocate_calls(obj, prog);
 6126		if (err) {
 6127			pr_warn("prog '%s': failed to relocate calls: %d\n",
 6128				prog->name, err);
 6129			return err;
 6130		}
 6131	}
 6132	/* Process data relos for main programs */
 6133	for (i = 0; i < obj->nr_programs; i++) {
 6134		prog = &obj->programs[i];
 6135		if (prog_is_subprog(obj, prog))
 6136			continue;
 6137		err = bpf_object__relocate_data(obj, prog);
 6138		if (err) {
 6139			pr_warn("prog '%s': failed to relocate data references: %d\n",
 6140				prog->name, err);
 6141			return err;
 6142		}
 6143	}
 6144	if (!obj->gen_loader)
 6145		bpf_object__free_relocs(obj);
 6146	return 0;
 6147}
 6148
 6149static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 6150					    Elf64_Shdr *shdr, Elf_Data *data);
 6151
 6152static int bpf_object__collect_map_relos(struct bpf_object *obj,
 6153					 Elf64_Shdr *shdr, Elf_Data *data)
 6154{
 6155	const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
 6156	int i, j, nrels, new_sz;
 6157	const struct btf_var_secinfo *vi = NULL;
 6158	const struct btf_type *sec, *var, *def;
 6159	struct bpf_map *map = NULL, *targ_map;
 6160	const struct btf_member *member;
 6161	const char *name, *mname;
 6162	unsigned int moff;
 6163	Elf64_Sym *sym;
 6164	Elf64_Rel *rel;
 6165	void *tmp;
 6166
 6167	if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
 6168		return -EINVAL;
 6169	sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
 6170	if (!sec)
 6171		return -EINVAL;
 6172
 6173	nrels = shdr->sh_size / shdr->sh_entsize;
 6174	for (i = 0; i < nrels; i++) {
 6175		rel = elf_rel_by_idx(data, i);
 6176		if (!rel) {
 6177			pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
 6178			return -LIBBPF_ERRNO__FORMAT;
 6179		}
 6180
 6181		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
 6182		if (!sym) {
 6183			pr_warn(".maps relo #%d: symbol %zx not found\n",
 6184				i, (size_t)ELF64_R_SYM(rel->r_info));
 6185			return -LIBBPF_ERRNO__FORMAT;
 6186		}
 6187		name = elf_sym_str(obj, sym->st_name) ?: "<?>";
 6188		if (sym->st_shndx != obj->efile.btf_maps_shndx) {
 6189			pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
 6190				i, name);
 6191			return -LIBBPF_ERRNO__RELOC;
 6192		}
 6193
 6194		pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
 6195			 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
 6196			 (size_t)rel->r_offset, sym->st_name, name);
 6197
 6198		for (j = 0; j < obj->nr_maps; j++) {
 6199			map = &obj->maps[j];
 6200			if (map->sec_idx != obj->efile.btf_maps_shndx)
 6201				continue;
 6202
 6203			vi = btf_var_secinfos(sec) + map->btf_var_idx;
 6204			if (vi->offset <= rel->r_offset &&
 6205			    rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
 6206				break;
 6207		}
 6208		if (j == obj->nr_maps) {
 6209			pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
 6210				i, name, (size_t)rel->r_offset);
 6211			return -EINVAL;
 6212		}
 6213
 6214		if (!bpf_map_type__is_map_in_map(map->def.type))
 6215			return -EINVAL;
 6216		if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
 6217		    map->def.key_size != sizeof(int)) {
 6218			pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
 6219				i, map->name, sizeof(int));
 6220			return -EINVAL;
 6221		}
 6222
 6223		targ_map = bpf_object__find_map_by_name(obj, name);
 6224		if (!targ_map)
 6225			return -ESRCH;
 6226
 6227		var = btf__type_by_id(obj->btf, vi->type);
 6228		def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 6229		if (btf_vlen(def) == 0)
 6230			return -EINVAL;
 6231		member = btf_members(def) + btf_vlen(def) - 1;
 6232		mname = btf__name_by_offset(obj->btf, member->name_off);
 6233		if (strcmp(mname, "values"))
 6234			return -EINVAL;
 6235
 6236		moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
 6237		if (rel->r_offset - vi->offset < moff)
 6238			return -EINVAL;
 6239
 6240		moff = rel->r_offset - vi->offset - moff;
 6241		/* here we use BPF pointer size, which is always 64 bit, as we
 6242		 * are parsing ELF that was built for BPF target
 6243		 */
 6244		if (moff % bpf_ptr_sz)
 6245			return -EINVAL;
 6246		moff /= bpf_ptr_sz;
 6247		if (moff >= map->init_slots_sz) {
 6248			new_sz = moff + 1;
 6249			tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
 6250			if (!tmp)
 6251				return -ENOMEM;
 6252			map->init_slots = tmp;
 6253			memset(map->init_slots + map->init_slots_sz, 0,
 6254			       (new_sz - map->init_slots_sz) * host_ptr_sz);
 6255			map->init_slots_sz = new_sz;
 6256		}
 6257		map->init_slots[moff] = targ_map;
 6258
 6259		pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
 6260			 i, map->name, moff, name);
 6261	}
 6262
 6263	return 0;
 6264}
 6265
 6266static int cmp_relocs(const void *_a, const void *_b)
 6267{
 6268	const struct reloc_desc *a = _a;
 6269	const struct reloc_desc *b = _b;
 6270
 6271	if (a->insn_idx != b->insn_idx)
 6272		return a->insn_idx < b->insn_idx ? -1 : 1;
 6273
 6274	/* no two relocations should have the same insn_idx, but ... */
 6275	if (a->type != b->type)
 6276		return a->type < b->type ? -1 : 1;
 6277
 6278	return 0;
 6279}
 6280
 6281static int bpf_object__collect_relos(struct bpf_object *obj)
 6282{
 6283	int i, err;
 6284
 6285	for (i = 0; i < obj->efile.sec_cnt; i++) {
 6286		struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
 6287		Elf64_Shdr *shdr;
 6288		Elf_Data *data;
 6289		int idx;
 6290
 6291		if (sec_desc->sec_type != SEC_RELO)
 6292			continue;
 6293
 6294		shdr = sec_desc->shdr;
 6295		data = sec_desc->data;
 6296		idx = shdr->sh_info;
 6297
 6298		if (shdr->sh_type != SHT_REL) {
 6299			pr_warn("internal error at %d\n", __LINE__);
 6300			return -LIBBPF_ERRNO__INTERNAL;
 6301		}
 6302
 6303		if (idx == obj->efile.st_ops_shndx)
 6304			err = bpf_object__collect_st_ops_relos(obj, shdr, data);
 6305		else if (idx == obj->efile.btf_maps_shndx)
 6306			err = bpf_object__collect_map_relos(obj, shdr, data);
 6307		else
 6308			err = bpf_object__collect_prog_relos(obj, shdr, data);
 6309		if (err)
 6310			return err;
 6311	}
 6312
 6313	for (i = 0; i < obj->nr_programs; i++) {
 6314		struct bpf_program *p = &obj->programs[i];
 6315
 6316		if (!p->nr_reloc)
 6317			continue;
 6318
 6319		qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
 6320	}
 6321	return 0;
 6322}
 6323
 6324static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
 6325{
 6326	if (BPF_CLASS(insn->code) == BPF_JMP &&
 6327	    BPF_OP(insn->code) == BPF_CALL &&
 6328	    BPF_SRC(insn->code) == BPF_K &&
 6329	    insn->src_reg == 0 &&
 6330	    insn->dst_reg == 0) {
 6331		    *func_id = insn->imm;
 6332		    return true;
 6333	}
 6334	return false;
 6335}
 6336
 6337static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
 6338{
 6339	struct bpf_insn *insn = prog->insns;
 6340	enum bpf_func_id func_id;
 6341	int i;
 6342
 6343	if (obj->gen_loader)
 6344		return 0;
 6345
 6346	for (i = 0; i < prog->insns_cnt; i++, insn++) {
 6347		if (!insn_is_helper_call(insn, &func_id))
 6348			continue;
 6349
 6350		/* on kernels that don't yet support
 6351		 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
 6352		 * to bpf_probe_read() which works well for old kernels
 6353		 */
 6354		switch (func_id) {
 6355		case BPF_FUNC_probe_read_kernel:
 6356		case BPF_FUNC_probe_read_user:
 6357			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 6358				insn->imm = BPF_FUNC_probe_read;
 6359			break;
 6360		case BPF_FUNC_probe_read_kernel_str:
 6361		case BPF_FUNC_probe_read_user_str:
 6362			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 6363				insn->imm = BPF_FUNC_probe_read_str;
 6364			break;
 6365		default:
 6366			break;
 6367		}
 6368	}
 6369	return 0;
 6370}
 6371
 6372static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
 6373				     int *btf_obj_fd, int *btf_type_id);
 6374
 6375/* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */
 6376static int libbpf_preload_prog(struct bpf_program *prog,
 6377			       struct bpf_prog_load_params *attr, long cookie)
 6378{
 6379	enum sec_def_flags def = cookie;
 6380
 6381	/* old kernels might not support specifying expected_attach_type */
 6382	if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
 6383		attr->expected_attach_type = 0;
 6384
 6385	if (def & SEC_SLEEPABLE)
 6386		attr->prog_flags |= BPF_F_SLEEPABLE;
 6387
 6388	if ((prog->type == BPF_PROG_TYPE_TRACING ||
 6389	     prog->type == BPF_PROG_TYPE_LSM ||
 6390	     prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
 6391		int btf_obj_fd = 0, btf_type_id = 0, err;
 6392		const char *attach_name;
 6393
 6394		attach_name = strchr(prog->sec_name, '/') + 1;
 6395		err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
 6396		if (err)
 6397			return err;
 6398
 6399		/* cache resolved BTF FD and BTF type ID in the prog */
 6400		prog->attach_btf_obj_fd = btf_obj_fd;
 6401		prog->attach_btf_id = btf_type_id;
 6402
 6403		/* but by now libbpf common logic is not utilizing
 6404		 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
 6405		 * this callback is called after attrs were populated by
 6406		 * libbpf, so this callback has to update attr explicitly here
 6407		 */
 6408		attr->attach_btf_obj_fd = btf_obj_fd;
 6409		attr->attach_btf_id = btf_type_id;
 6410	}
 6411	return 0;
 6412}
 6413
 6414static int
 6415load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
 6416	     char *license, __u32 kern_version, int *pfd)
 6417{
 6418	struct bpf_prog_load_params load_attr = {};
 6419	struct bpf_object *obj = prog->obj;
 6420	char *cp, errmsg[STRERR_BUFSIZE];
 6421	size_t log_buf_size = 0;
 6422	char *log_buf = NULL;
 6423	int btf_fd, ret, err;
 6424
 6425	if (prog->type == BPF_PROG_TYPE_UNSPEC) {
 6426		/*
 6427		 * The program type must be set.  Most likely we couldn't find a proper
 6428		 * section definition at load time, and thus we didn't infer the type.
 6429		 */
 6430		pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
 6431			prog->name, prog->sec_name);
 6432		return -EINVAL;
 6433	}
 6434
 6435	if (!insns || !insns_cnt)
 6436		return -EINVAL;
 6437
 6438	load_attr.prog_type = prog->type;
 6439	load_attr.expected_attach_type = prog->expected_attach_type;
 6440	if (kernel_supports(obj, FEAT_PROG_NAME))
 6441		load_attr.name = prog->name;
 6442	load_attr.insns = insns;
 6443	load_attr.insn_cnt = insns_cnt;
 6444	load_attr.license = license;
 6445	load_attr.attach_btf_id = prog->attach_btf_id;
 6446	load_attr.attach_prog_fd = prog->attach_prog_fd;
 6447	load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
 6448	load_attr.attach_btf_id = prog->attach_btf_id;
 6449	load_attr.kern_version = kern_version;
 6450	load_attr.prog_ifindex = prog->prog_ifindex;
 6451
 6452	/* specify func_info/line_info only if kernel supports them */
 6453	btf_fd = bpf_object__btf_fd(obj);
 6454	if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
 6455		load_attr.prog_btf_fd = btf_fd;
 6456		load_attr.func_info = prog->func_info;
 6457		load_attr.func_info_rec_size = prog->func_info_rec_size;
 6458		load_attr.func_info_cnt = prog->func_info_cnt;
 6459		load_attr.line_info = prog->line_info;
 6460		load_attr.line_info_rec_size = prog->line_info_rec_size;
 6461		load_attr.line_info_cnt = prog->line_info_cnt;
 6462	}
 6463	load_attr.log_level = prog->log_level;
 6464	load_attr.prog_flags = prog->prog_flags;
 6465	load_attr.fd_array = obj->fd_array;
 6466
 6467	/* adjust load_attr if sec_def provides custom preload callback */
 6468	if (prog->sec_def && prog->sec_def->preload_fn) {
 6469		err = prog->sec_def->preload_fn(prog, &load_attr, prog->sec_def->cookie);
 6470		if (err < 0) {
 6471			pr_warn("prog '%s': failed to prepare load attributes: %d\n",
 6472				prog->name, err);
 6473			return err;
 6474		}
 6475	}
 6476
 6477	if (obj->gen_loader) {
 6478		bpf_gen__prog_load(obj->gen_loader, &load_attr,
 6479				   prog - obj->programs);
 6480		*pfd = -1;
 6481		return 0;
 6482	}
 6483retry_load:
 6484	if (log_buf_size) {
 6485		log_buf = malloc(log_buf_size);
 6486		if (!log_buf)
 6487			return -ENOMEM;
 6488
 6489		*log_buf = 0;
 6490	}
 6491
 6492	load_attr.log_buf = log_buf;
 6493	load_attr.log_buf_sz = log_buf_size;
 6494	ret = libbpf__bpf_prog_load(&load_attr);
 6495
 6496	if (ret >= 0) {
 6497		if (log_buf && load_attr.log_level)
 6498			pr_debug("verifier log:\n%s", log_buf);
 6499
 6500		if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
 6501			struct bpf_map *map;
 6502			int i;
 6503
 6504			for (i = 0; i < obj->nr_maps; i++) {
 6505				map = &prog->obj->maps[i];
 6506				if (map->libbpf_type != LIBBPF_MAP_RODATA)
 6507					continue;
 6508
 6509				if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
 6510					cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 6511					pr_warn("prog '%s': failed to bind .rodata map: %s\n",
 6512						prog->name, cp);
 6513					/* Don't fail hard if can't bind rodata. */
 6514				}
 6515			}
 6516		}
 6517
 6518		*pfd = ret;
 6519		ret = 0;
 6520		goto out;
 6521	}
 6522
 6523	if (!log_buf || errno == ENOSPC) {
 6524		log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
 6525				   log_buf_size << 1);
 6526
 6527		free(log_buf);
 6528		goto retry_load;
 6529	}
 6530	ret = errno ? -errno : -LIBBPF_ERRNO__LOAD;
 6531	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 6532	pr_warn("load bpf program failed: %s\n", cp);
 6533	pr_perm_msg(ret);
 6534
 6535	if (log_buf && log_buf[0] != '\0') {
 6536		ret = -LIBBPF_ERRNO__VERIFY;
 6537		pr_warn("-- BEGIN DUMP LOG ---\n");
 6538		pr_warn("\n%s\n", log_buf);
 6539		pr_warn("-- END LOG --\n");
 6540	} else if (load_attr.insn_cnt >= BPF_MAXINSNS) {
 6541		pr_warn("Program too large (%zu insns), at most %d insns\n",
 6542			load_attr.insn_cnt, BPF_MAXINSNS);
 6543		ret = -LIBBPF_ERRNO__PROG2BIG;
 6544	} else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
 6545		/* Wrong program type? */
 6546		int fd;
 6547
 6548		load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
 6549		load_attr.expected_attach_type = 0;
 6550		load_attr.log_buf = NULL;
 6551		load_attr.log_buf_sz = 0;
 6552		fd = libbpf__bpf_prog_load(&load_attr);
 6553		if (fd >= 0) {
 6554			close(fd);
 6555			ret = -LIBBPF_ERRNO__PROGTYPE;
 6556			goto out;
 6557		}
 6558	}
 6559
 6560out:
 6561	free(log_buf);
 6562	return ret;
 6563}
 6564
 6565static int bpf_program__record_externs(struct bpf_program *prog)
 6566{
 6567	struct bpf_object *obj = prog->obj;
 6568	int i;
 6569
 6570	for (i = 0; i < prog->nr_reloc; i++) {
 6571		struct reloc_desc *relo = &prog->reloc_desc[i];
 6572		struct extern_desc *ext = &obj->externs[relo->sym_off];
 6573
 6574		switch (relo->type) {
 6575		case RELO_EXTERN_VAR:
 6576			if (ext->type != EXT_KSYM)
 6577				continue;
 6578			bpf_gen__record_extern(obj->gen_loader, ext->name,
 6579					       ext->is_weak, !ext->ksym.type_id,
 6580					       BTF_KIND_VAR, relo->insn_idx);
 6581			break;
 6582		case RELO_EXTERN_FUNC:
 6583			bpf_gen__record_extern(obj->gen_loader, ext->name,
 6584					       ext->is_weak, false, BTF_KIND_FUNC,
 6585					       relo->insn_idx);
 6586			break;
 6587		default:
 6588			continue;
 6589		}
 6590	}
 6591	return 0;
 6592}
 6593
 6594int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 6595{
 6596	int err = 0, fd, i;
 6597
 6598	if (prog->obj->loaded) {
 6599		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
 6600		return libbpf_err(-EINVAL);
 6601	}
 6602
 6603	if (prog->instances.nr < 0 || !prog->instances.fds) {
 6604		if (prog->preprocessor) {
 6605			pr_warn("Internal error: can't load program '%s'\n",
 6606				prog->name);
 6607			return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
 6608		}
 6609
 6610		prog->instances.fds = malloc(sizeof(int));
 6611		if (!prog->instances.fds) {
 6612			pr_warn("Not enough memory for BPF fds\n");
 6613			return libbpf_err(-ENOMEM);
 6614		}
 6615		prog->instances.nr = 1;
 6616		prog->instances.fds[0] = -1;
 6617	}
 6618
 6619	if (!prog->preprocessor) {
 6620		if (prog->instances.nr != 1) {
 6621			pr_warn("prog '%s': inconsistent nr(%d) != 1\n",
 6622				prog->name, prog->instances.nr);
 6623		}
 6624		if (prog->obj->gen_loader)
 6625			bpf_program__record_externs(prog);
 6626		err = load_program(prog, prog->insns, prog->insns_cnt,
 6627				   license, kern_ver, &fd);
 6628		if (!err)
 6629			prog->instances.fds[0] = fd;
 6630		goto out;
 6631	}
 6632
 6633	for (i = 0; i < prog->instances.nr; i++) {
 6634		struct bpf_prog_prep_result result;
 6635		bpf_program_prep_t preprocessor = prog->preprocessor;
 6636
 6637		memset(&result, 0, sizeof(result));
 6638		err = preprocessor(prog, i, prog->insns,
 6639				   prog->insns_cnt, &result);
 6640		if (err) {
 6641			pr_warn("Preprocessing the %dth instance of program '%s' failed\n",
 6642				i, prog->name);
 6643			goto out;
 6644		}
 6645
 6646		if (!result.new_insn_ptr || !result.new_insn_cnt) {
 6647			pr_debug("Skip loading the %dth instance of program '%s'\n",
 6648				 i, prog->name);
 6649			prog->instances.fds[i] = -1;
 6650			if (result.pfd)
 6651				*result.pfd = -1;
 6652			continue;
 6653		}
 6654
 6655		err = load_program(prog, result.new_insn_ptr,
 6656				   result.new_insn_cnt, license, kern_ver, &fd);
 6657		if (err) {
 6658			pr_warn("Loading the %dth instance of program '%s' failed\n",
 6659				i, prog->name);
 6660			goto out;
 6661		}
 6662
 6663		if (result.pfd)
 6664			*result.pfd = fd;
 6665		prog->instances.fds[i] = fd;
 6666	}
 6667out:
 6668	if (err)
 6669		pr_warn("failed to load program '%s'\n", prog->name);
 6670	return libbpf_err(err);
 6671}
 6672
 6673static int
 6674bpf_object__load_progs(struct bpf_object *obj, int log_level)
 6675{
 6676	struct bpf_program *prog;
 6677	size_t i;
 6678	int err;
 6679
 6680	for (i = 0; i < obj->nr_programs; i++) {
 6681		prog = &obj->programs[i];
 6682		err = bpf_object__sanitize_prog(obj, prog);
 6683		if (err)
 6684			return err;
 6685	}
 6686
 6687	for (i = 0; i < obj->nr_programs; i++) {
 6688		prog = &obj->programs[i];
 6689		if (prog_is_subprog(obj, prog))
 6690			continue;
 6691		if (!prog->load) {
 6692			pr_debug("prog '%s': skipped loading\n", prog->name);
 6693			continue;
 6694		}
 6695		prog->log_level |= log_level;
 6696		err = bpf_program__load(prog, obj->license, obj->kern_version);
 6697		if (err)
 6698			return err;
 6699	}
 6700	if (obj->gen_loader)
 6701		bpf_object__free_relocs(obj);
 6702	return 0;
 6703}
 6704
 6705static const struct bpf_sec_def *find_sec_def(const char *sec_name);
 6706
 6707static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
 6708{
 6709	struct bpf_program *prog;
 6710	int err;
 6711
 6712	bpf_object__for_each_program(prog, obj) {
 6713		prog->sec_def = find_sec_def(prog->sec_name);
 6714		if (!prog->sec_def) {
 6715			/* couldn't guess, but user might manually specify */
 6716			pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
 6717				prog->name, prog->sec_name);
 6718			continue;
 6719		}
 6720
 6721		bpf_program__set_type(prog, prog->sec_def->prog_type);
 6722		bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type);
 6723
 6724#pragma GCC diagnostic push
 6725#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 6726		if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
 6727		    prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
 6728			prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
 6729#pragma GCC diagnostic pop
 6730
 6731		/* sec_def can have custom callback which should be called
 6732		 * after bpf_program is initialized to adjust its properties
 6733		 */
 6734		if (prog->sec_def->init_fn) {
 6735			err = prog->sec_def->init_fn(prog, prog->sec_def->cookie);
 6736			if (err < 0) {
 6737				pr_warn("prog '%s': failed to initialize: %d\n",
 6738					prog->name, err);
 6739				return err;
 6740			}
 6741		}
 6742	}
 6743
 6744	return 0;
 6745}
 6746
 6747static struct bpf_object *
 6748__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 6749		   const struct bpf_object_open_opts *opts)
 6750{
 6751	const char *obj_name, *kconfig, *btf_tmp_path;
 6752	struct bpf_object *obj;
 6753	char tmp_name[64];
 6754	int err;
 6755
 6756	if (elf_version(EV_CURRENT) == EV_NONE) {
 6757		pr_warn("failed to init libelf for %s\n",
 6758			path ? : "(mem buf)");
 6759		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
 6760	}
 6761
 6762	if (!OPTS_VALID(opts, bpf_object_open_opts))
 6763		return ERR_PTR(-EINVAL);
 6764
 6765	obj_name = OPTS_GET(opts, object_name, NULL);
 6766	if (obj_buf) {
 6767		if (!obj_name) {
 6768			snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
 6769				 (unsigned long)obj_buf,
 6770				 (unsigned long)obj_buf_sz);
 6771			obj_name = tmp_name;
 6772		}
 6773		path = obj_name;
 6774		pr_debug("loading object '%s' from buffer\n", obj_name);
 6775	}
 6776
 6777	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
 6778	if (IS_ERR(obj))
 6779		return obj;
 6780
 6781	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
 6782	if (btf_tmp_path) {
 6783		if (strlen(btf_tmp_path) >= PATH_MAX) {
 6784			err = -ENAMETOOLONG;
 6785			goto out;
 6786		}
 6787		obj->btf_custom_path = strdup(btf_tmp_path);
 6788		if (!obj->btf_custom_path) {
 6789			err = -ENOMEM;
 6790			goto out;
 6791		}
 6792	}
 6793
 6794	kconfig = OPTS_GET(opts, kconfig, NULL);
 6795	if (kconfig) {
 6796		obj->kconfig = strdup(kconfig);
 6797		if (!obj->kconfig) {
 6798			err = -ENOMEM;
 6799			goto out;
 6800		}
 6801	}
 6802
 6803	err = bpf_object__elf_init(obj);
 6804	err = err ? : bpf_object__check_endianness(obj);
 6805	err = err ? : bpf_object__elf_collect(obj);
 6806	err = err ? : bpf_object__collect_externs(obj);
 6807	err = err ? : bpf_object__finalize_btf(obj);
 6808	err = err ? : bpf_object__init_maps(obj, opts);
 6809	err = err ? : bpf_object_init_progs(obj, opts);
 6810	err = err ? : bpf_object__collect_relos(obj);
 6811	if (err)
 6812		goto out;
 6813
 6814	bpf_object__elf_finish(obj);
 6815
 6816	return obj;
 6817out:
 6818	bpf_object__close(obj);
 6819	return ERR_PTR(err);
 6820}
 6821
 6822static struct bpf_object *
 6823__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
 6824{
 6825	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
 6826		.relaxed_maps = flags & MAPS_RELAX_COMPAT,
 6827	);
 6828
 6829	/* param validation */
 6830	if (!attr->file)
 6831		return NULL;
 6832
 6833	pr_debug("loading %s\n", attr->file);
 6834	return __bpf_object__open(attr->file, NULL, 0, &opts);
 6835}
 6836
 6837struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
 6838{
 6839	return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
 6840}
 6841
 6842struct bpf_object *bpf_object__open(const char *path)
 6843{
 6844	struct bpf_object_open_attr attr = {
 6845		.file		= path,
 6846		.prog_type	= BPF_PROG_TYPE_UNSPEC,
 6847	};
 6848
 6849	return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
 6850}
 6851
 6852struct bpf_object *
 6853bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 6854{
 6855	if (!path)
 6856		return libbpf_err_ptr(-EINVAL);
 6857
 6858	pr_debug("loading %s\n", path);
 6859
 6860	return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
 6861}
 6862
 6863struct bpf_object *
 6864bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 6865		     const struct bpf_object_open_opts *opts)
 6866{
 6867	if (!obj_buf || obj_buf_sz == 0)
 6868		return libbpf_err_ptr(-EINVAL);
 6869
 6870	return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
 6871}
 6872
 6873struct bpf_object *
 6874bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
 6875			const char *name)
 6876{
 6877	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
 6878		.object_name = name,
 6879		/* wrong default, but backwards-compatible */
 6880		.relaxed_maps = true,
 6881	);
 6882
 6883	/* returning NULL is wrong, but backwards-compatible */
 6884	if (!obj_buf || obj_buf_sz == 0)
 6885		return errno = EINVAL, NULL;
 6886
 6887	return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
 6888}
 6889
 6890static int bpf_object_unload(struct bpf_object *obj)
 6891{
 6892	size_t i;
 6893
 6894	if (!obj)
 6895		return libbpf_err(-EINVAL);
 6896
 6897	for (i = 0; i < obj->nr_maps; i++) {
 6898		zclose(obj->maps[i].fd);
 6899		if (obj->maps[i].st_ops)
 6900			zfree(&obj->maps[i].st_ops->kern_vdata);
 6901	}
 6902
 6903	for (i = 0; i < obj->nr_programs; i++)
 6904		bpf_program__unload(&obj->programs[i]);
 6905
 6906	return 0;
 6907}
 6908
 6909int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload")));
 6910
 6911static int bpf_object__sanitize_maps(struct bpf_object *obj)
 6912{
 6913	struct bpf_map *m;
 6914
 6915	bpf_object__for_each_map(m, obj) {
 6916		if (!bpf_map__is_internal(m))
 6917			continue;
 6918		if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) {
 6919			pr_warn("kernel doesn't support global data\n");
 6920			return -ENOTSUP;
 6921		}
 6922		if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
 6923			m->def.map_flags ^= BPF_F_MMAPABLE;
 6924	}
 6925
 6926	return 0;
 6927}
 6928
 6929static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 6930{
 6931	char sym_type, sym_name[500];
 6932	unsigned long long sym_addr;
 6933	const struct btf_type *t;
 6934	struct extern_desc *ext;
 6935	int ret, err = 0;
 6936	FILE *f;
 6937
 6938	f = fopen("/proc/kallsyms", "r");
 6939	if (!f) {
 6940		err = -errno;
 6941		pr_warn("failed to open /proc/kallsyms: %d\n", err);
 6942		return err;
 6943	}
 6944
 6945	while (true) {
 6946		ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
 6947			     &sym_addr, &sym_type, sym_name);
 6948		if (ret == EOF && feof(f))
 6949			break;
 6950		if (ret != 3) {
 6951			pr_warn("failed to read kallsyms entry: %d\n", ret);
 6952			err = -EINVAL;
 6953			goto out;
 6954		}
 6955
 6956		ext = find_extern_by_name(obj, sym_name);
 6957		if (!ext || ext->type != EXT_KSYM)
 6958			continue;
 6959
 6960		t = btf__type_by_id(obj->btf, ext->btf_id);
 6961		if (!btf_is_var(t))
 6962			continue;
 6963
 6964		if (ext->is_set && ext->ksym.addr != sym_addr) {
 6965			pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
 6966				sym_name, ext->ksym.addr, sym_addr);
 6967			err = -EINVAL;
 6968			goto out;
 6969		}
 6970		if (!ext->is_set) {
 6971			ext->is_set = true;
 6972			ext->ksym.addr = sym_addr;
 6973			pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
 6974		}
 6975	}
 6976
 6977out:
 6978	fclose(f);
 6979	return err;
 6980}
 6981
 6982static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
 6983			    __u16 kind, struct btf **res_btf,
 6984			    struct module_btf **res_mod_btf)
 6985{
 6986	struct module_btf *mod_btf;
 6987	struct btf *btf;
 6988	int i, id, err;
 6989
 6990	btf = obj->btf_vmlinux;
 6991	mod_btf = NULL;
 6992	id = btf__find_by_name_kind(btf, ksym_name, kind);
 6993
 6994	if (id == -ENOENT) {
 6995		err = load_module_btfs(obj);
 6996		if (err)
 6997			return err;
 6998
 6999		for (i = 0; i < obj->btf_module_cnt; i++) {
 7000			/* we assume module_btf's BTF FD is always >0 */
 7001			mod_btf = &obj->btf_modules[i];
 7002			btf = mod_btf->btf;
 7003			id = btf__find_by_name_kind_own(btf, ksym_name, kind);
 7004			if (id != -ENOENT)
 7005				break;
 7006		}
 7007	}
 7008	if (id <= 0)
 7009		return -ESRCH;
 7010
 7011	*res_btf = btf;
 7012	*res_mod_btf = mod_btf;
 7013	return id;
 7014}
 7015
 7016static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
 7017					       struct extern_desc *ext)
 7018{
 7019	const struct btf_type *targ_var, *targ_type;
 7020	__u32 targ_type_id, local_type_id;
 7021	struct module_btf *mod_btf = NULL;
 7022	const char *targ_var_name;
 7023	struct btf *btf = NULL;
 7024	int id, err;
 7025
 7026	id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
 7027	if (id < 0) {
 7028		if (id == -ESRCH && ext->is_weak)
 7029			return 0;
 7030		pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
 7031			ext->name);
 7032		return id;
 7033	}
 7034
 7035	/* find local type_id */
 7036	local_type_id = ext->ksym.type_id;
 7037
 7038	/* find target type_id */
 7039	targ_var = btf__type_by_id(btf, id);
 7040	targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
 7041	targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
 7042
 7043	err = bpf_core_types_are_compat(obj->btf, local_type_id,
 7044					btf, targ_type_id);
 7045	if (err <= 0) {
 7046		const struct btf_type *local_type;
 7047		const char *targ_name, *local_name;
 7048
 7049		local_type = btf__type_by_id(obj->btf, local_type_id);
 7050		local_name = btf__name_by_offset(obj->btf, local_type->name_off);
 7051		targ_name = btf__name_by_offset(btf, targ_type->name_off);
 7052
 7053		pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
 7054			ext->name, local_type_id,
 7055			btf_kind_str(local_type), local_name, targ_type_id,
 7056			btf_kind_str(targ_type), targ_name);
 7057		return -EINVAL;
 7058	}
 7059
 7060	ext->is_set = true;
 7061	ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
 7062	ext->ksym.kernel_btf_id = id;
 7063	pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
 7064		 ext->name, id, btf_kind_str(targ_var), targ_var_name);
 7065
 7066	return 0;
 7067}
 7068
 7069static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
 7070						struct extern_desc *ext)
 7071{
 7072	int local_func_proto_id, kfunc_proto_id, kfunc_id;
 7073	struct module_btf *mod_btf = NULL;
 7074	const struct btf_type *kern_func;
 7075	struct btf *kern_btf = NULL;
 7076	int ret;
 7077
 7078	local_func_proto_id = ext->ksym.type_id;
 7079
 7080	kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf);
 7081	if (kfunc_id < 0) {
 7082		if (kfunc_id == -ESRCH && ext->is_weak)
 7083			return 0;
 7084		pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
 7085			ext->name);
 7086		return kfunc_id;
 7087	}
 7088
 7089	kern_func = btf__type_by_id(kern_btf, kfunc_id);
 7090	kfunc_proto_id = kern_func->type;
 7091
 7092	ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
 7093					kern_btf, kfunc_proto_id);
 7094	if (ret <= 0) {
 7095		pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
 7096			ext->name, local_func_proto_id, kfunc_proto_id);
 7097		return -EINVAL;
 7098	}
 7099
 7100	/* set index for module BTF fd in fd_array, if unset */
 7101	if (mod_btf && !mod_btf->fd_array_idx) {
 7102		/* insn->off is s16 */
 7103		if (obj->fd_array_cnt == INT16_MAX) {
 7104			pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
 7105				ext->name, mod_btf->fd_array_idx);
 7106			return -E2BIG;
 7107		}
 7108		/* Cannot use index 0 for module BTF fd */
 7109		if (!obj->fd_array_cnt)
 7110			obj->fd_array_cnt = 1;
 7111
 7112		ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
 7113					obj->fd_array_cnt + 1);
 7114		if (ret)
 7115			return ret;
 7116		mod_btf->fd_array_idx = obj->fd_array_cnt;
 7117		/* we assume module BTF FD is always >0 */
 7118		obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
 7119	}
 7120
 7121	ext->is_set = true;
 7122	ext->ksym.kernel_btf_id = kfunc_id;
 7123	ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
 7124	pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
 7125		 ext->name, kfunc_id);
 7126
 7127	return 0;
 7128}
 7129
 7130static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
 7131{
 7132	const struct btf_type *t;
 7133	struct extern_desc *ext;
 7134	int i, err;
 7135
 7136	for (i = 0; i < obj->nr_extern; i++) {
 7137		ext = &obj->externs[i];
 7138		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
 7139			continue;
 7140
 7141		if (obj->gen_loader) {
 7142			ext->is_set = true;
 7143			ext->ksym.kernel_btf_obj_fd = 0;
 7144			ext->ksym.kernel_btf_id = 0;
 7145			continue;
 7146		}
 7147		t = btf__type_by_id(obj->btf, ext->btf_id);
 7148		if (btf_is_var(t))
 7149			err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
 7150		else
 7151			err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
 7152		if (err)
 7153			return err;
 7154	}
 7155	return 0;
 7156}
 7157
 7158static int bpf_object__resolve_externs(struct bpf_object *obj,
 7159				       const char *extra_kconfig)
 7160{
 7161	bool need_config = false, need_kallsyms = false;
 7162	bool need_vmlinux_btf = false;
 7163	struct extern_desc *ext;
 7164	void *kcfg_data = NULL;
 7165	int err, i;
 7166
 7167	if (obj->nr_extern == 0)
 7168		return 0;
 7169
 7170	if (obj->kconfig_map_idx >= 0)
 7171		kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
 7172
 7173	for (i = 0; i < obj->nr_extern; i++) {
 7174		ext = &obj->externs[i];
 7175
 7176		if (ext->type == EXT_KCFG &&
 7177		    strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
 7178			void *ext_val = kcfg_data + ext->kcfg.data_off;
 7179			__u32 kver = get_kernel_version();
 7180
 7181			if (!kver) {
 7182				pr_warn("failed to get kernel version\n");
 7183				return -EINVAL;
 7184			}
 7185			err = set_kcfg_value_num(ext, ext_val, kver);
 7186			if (err)
 7187				return err;
 7188			pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
 7189		} else if (ext->type == EXT_KCFG && str_has_pfx(ext->name, "CONFIG_")) {
 7190			need_config = true;
 7191		} else if (ext->type == EXT_KSYM) {
 7192			if (ext->ksym.type_id)
 7193				need_vmlinux_btf = true;
 7194			else
 7195				need_kallsyms = true;
 7196		} else {
 7197			pr_warn("unrecognized extern '%s'\n", ext->name);
 7198			return -EINVAL;
 7199		}
 7200	}
 7201	if (need_config && extra_kconfig) {
 7202		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
 7203		if (err)
 7204			return -EINVAL;
 7205		need_config = false;
 7206		for (i = 0; i < obj->nr_extern; i++) {
 7207			ext = &obj->externs[i];
 7208			if (ext->type == EXT_KCFG && !ext->is_set) {
 7209				need_config = true;
 7210				break;
 7211			}
 7212		}
 7213	}
 7214	if (need_config) {
 7215		err = bpf_object__read_kconfig_file(obj, kcfg_data);
 7216		if (err)
 7217			return -EINVAL;
 7218	}
 7219	if (need_kallsyms) {
 7220		err = bpf_object__read_kallsyms_file(obj);
 7221		if (err)
 7222			return -EINVAL;
 7223	}
 7224	if (need_vmlinux_btf) {
 7225		err = bpf_object__resolve_ksyms_btf_id(obj);
 7226		if (err)
 7227			return -EINVAL;
 7228	}
 7229	for (i = 0; i < obj->nr_extern; i++) {
 7230		ext = &obj->externs[i];
 7231
 7232		if (!ext->is_set && !ext->is_weak) {
 7233			pr_warn("extern %s (strong) not resolved\n", ext->name);
 7234			return -ESRCH;
 7235		} else if (!ext->is_set) {
 7236			pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
 7237				 ext->name);
 7238		}
 7239	}
 7240
 7241	return 0;
 7242}
 7243
 7244int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 7245{
 7246	struct bpf_object *obj;
 7247	int err, i;
 7248
 7249	if (!attr)
 7250		return libbpf_err(-EINVAL);
 7251	obj = attr->obj;
 7252	if (!obj)
 7253		return libbpf_err(-EINVAL);
 7254
 7255	if (obj->loaded) {
 7256		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
 7257		return libbpf_err(-EINVAL);
 7258	}
 7259
 7260	if (obj->gen_loader)
 7261		bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps);
 7262
 7263	err = bpf_object__probe_loading(obj);
 7264	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 7265	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 7266	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 7267	err = err ? : bpf_object__sanitize_maps(obj);
 7268	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
 7269	err = err ? : bpf_object__create_maps(obj);
 7270	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path);
 7271	err = err ? : bpf_object__load_progs(obj, attr->log_level);
 7272
 7273	if (obj->gen_loader) {
 7274		/* reset FDs */
 7275		if (obj->btf)
 7276			btf__set_fd(obj->btf, -1);
 7277		for (i = 0; i < obj->nr_maps; i++)
 7278			obj->maps[i].fd = -1;
 7279		if (!err)
 7280			err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
 7281	}
 7282
 7283	/* clean up fd_array */
 7284	zfree(&obj->fd_array);
 7285
 7286	/* clean up module BTFs */
 7287	for (i = 0; i < obj->btf_module_cnt; i++) {
 7288		close(obj->btf_modules[i].fd);
 7289		btf__free(obj->btf_modules[i].btf);
 7290		free(obj->btf_modules[i].name);
 7291	}
 7292	free(obj->btf_modules);
 7293
 7294	/* clean up vmlinux BTF */
 7295	btf__free(obj->btf_vmlinux);
 7296	obj->btf_vmlinux = NULL;
 7297
 7298	obj->loaded = true; /* doesn't matter if successfully or not */
 7299
 7300	if (err)
 7301		goto out;
 7302
 7303	return 0;
 7304out:
 7305	/* unpin any maps that were auto-pinned during load */
 7306	for (i = 0; i < obj->nr_maps; i++)
 7307		if (obj->maps[i].pinned && !obj->maps[i].reused)
 7308			bpf_map__unpin(&obj->maps[i], NULL);
 7309
 7310	bpf_object_unload(obj);
 7311	pr_warn("failed to load object '%s'\n", obj->path);
 7312	return libbpf_err(err);
 7313}
 7314
 7315int bpf_object__load(struct bpf_object *obj)
 7316{
 7317	struct bpf_object_load_attr attr = {
 7318		.obj = obj,
 7319	};
 7320
 7321	return bpf_object__load_xattr(&attr);
 7322}
 7323
 7324static int make_parent_dir(const char *path)
 7325{
 7326	char *cp, errmsg[STRERR_BUFSIZE];
 7327	char *dname, *dir;
 7328	int err = 0;
 7329
 7330	dname = strdup(path);
 7331	if (dname == NULL)
 7332		return -ENOMEM;
 7333
 7334	dir = dirname(dname);
 7335	if (mkdir(dir, 0700) && errno != EEXIST)
 7336		err = -errno;
 7337
 7338	free(dname);
 7339	if (err) {
 7340		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 7341		pr_warn("failed to mkdir %s: %s\n", path, cp);
 7342	}
 7343	return err;
 7344}
 7345
 7346static int check_path(const char *path)
 7347{
 7348	char *cp, errmsg[STRERR_BUFSIZE];
 7349	struct statfs st_fs;
 7350	char *dname, *dir;
 7351	int err = 0;
 7352
 7353	if (path == NULL)
 7354		return -EINVAL;
 7355
 7356	dname = strdup(path);
 7357	if (dname == NULL)
 7358		return -ENOMEM;
 7359
 7360	dir = dirname(dname);
 7361	if (statfs(dir, &st_fs)) {
 7362		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 7363		pr_warn("failed to statfs %s: %s\n", dir, cp);
 7364		err = -errno;
 7365	}
 7366	free(dname);
 7367
 7368	if (!err && st_fs.f_type != BPF_FS_MAGIC) {
 7369		pr_warn("specified path %s is not on BPF FS\n", path);
 7370		err = -EINVAL;
 7371	}
 7372
 7373	return err;
 7374}
 7375
 7376static int bpf_program_pin_instance(struct bpf_program *prog, const char *path, int instance)
 7377{
 7378	char *cp, errmsg[STRERR_BUFSIZE];
 7379	int err;
 7380
 7381	err = make_parent_dir(path);
 7382	if (err)
 7383		return libbpf_err(err);
 7384
 7385	err = check_path(path);
 7386	if (err)
 7387		return libbpf_err(err);
 7388
 7389	if (prog == NULL) {
 7390		pr_warn("invalid program pointer\n");
 7391		return libbpf_err(-EINVAL);
 7392	}
 7393
 7394	if (instance < 0 || instance >= prog->instances.nr) {
 7395		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
 7396			instance, prog->name, prog->instances.nr);
 7397		return libbpf_err(-EINVAL);
 7398	}
 7399
 7400	if (bpf_obj_pin(prog->instances.fds[instance], path)) {
 7401		err = -errno;
 7402		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 7403		pr_warn("failed to pin program: %s\n", cp);
 7404		return libbpf_err(err);
 7405	}
 7406	pr_debug("pinned program '%s'\n", path);
 7407
 7408	return 0;
 7409}
 7410
 7411static int bpf_program_unpin_instance(struct bpf_program *prog, const char *path, int instance)
 7412{
 7413	int err;
 7414
 7415	err = check_path(path);
 7416	if (err)
 7417		return libbpf_err(err);
 7418
 7419	if (prog == NULL) {
 7420		pr_warn("invalid program pointer\n");
 7421		return libbpf_err(-EINVAL);
 7422	}
 7423
 7424	if (instance < 0 || instance >= prog->instances.nr) {
 7425		pr_warn("invalid prog instance %d of prog %s (max %d)\n",
 7426			instance, prog->name, prog->instances.nr);
 7427		return libbpf_err(-EINVAL);
 7428	}
 7429
 7430	err = unlink(path);
 7431	if (err != 0)
 7432		return libbpf_err(-errno);
 7433
 7434	pr_debug("unpinned program '%s'\n", path);
 7435
 7436	return 0;
 7437}
 7438
 7439__attribute__((alias("bpf_program_pin_instance")))
 7440int bpf_object__pin_instance(struct bpf_program *prog, const char *path, int instance);
 7441
 7442__attribute__((alias("bpf_program_unpin_instance")))
 7443int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, int instance);
 7444
 7445int bpf_program__pin(struct bpf_program *prog, const char *path)
 7446{
 7447	int i, err;
 7448
 7449	err = make_parent_dir(path);
 7450	if (err)
 7451		return libbpf_err(err);
 7452
 7453	err = check_path(path);
 7454	if (err)
 7455		return libbpf_err(err);
 7456
 7457	if (prog == NULL) {
 7458		pr_warn("invalid program pointer\n");
 7459		return libbpf_err(-EINVAL);
 7460	}
 7461
 7462	if (prog->instances.nr <= 0) {
 7463		pr_warn("no instances of prog %s to pin\n", prog->name);
 7464		return libbpf_err(-EINVAL);
 7465	}
 7466
 7467	if (prog->instances.nr == 1) {
 7468		/* don't create subdirs when pinning single instance */
 7469		return bpf_program_pin_instance(prog, path, 0);
 7470	}
 7471
 7472	for (i = 0; i < prog->instances.nr; i++) {
 7473		char buf[PATH_MAX];
 7474		int len;
 7475
 7476		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
 7477		if (len < 0) {
 7478			err = -EINVAL;
 7479			goto err_unpin;
 7480		} else if (len >= PATH_MAX) {
 7481			err = -ENAMETOOLONG;
 7482			goto err_unpin;
 7483		}
 7484
 7485		err = bpf_program_pin_instance(prog, buf, i);
 7486		if (err)
 7487			goto err_unpin;
 7488	}
 7489
 7490	return 0;
 7491
 7492err_unpin:
 7493	for (i = i - 1; i >= 0; i--) {
 7494		char buf[PATH_MAX];
 7495		int len;
 7496
 7497		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
 7498		if (len < 0)
 7499			continue;
 7500		else if (len >= PATH_MAX)
 7501			continue;
 7502
 7503		bpf_program_unpin_instance(prog, buf, i);
 7504	}
 7505
 7506	rmdir(path);
 7507
 7508	return libbpf_err(err);
 7509}
 7510
 7511int bpf_program__unpin(struct bpf_program *prog, const char *path)
 7512{
 7513	int i, err;
 7514
 7515	err = check_path(path);
 7516	if (err)
 7517		return libbpf_err(err);
 7518
 7519	if (prog == NULL) {
 7520		pr_warn("invalid program pointer\n");
 7521		return libbpf_err(-EINVAL);
 7522	}
 7523
 7524	if (prog->instances.nr <= 0) {
 7525		pr_warn("no instances of prog %s to pin\n", prog->name);
 7526		return libbpf_err(-EINVAL);
 7527	}
 7528
 7529	if (prog->instances.nr == 1) {
 7530		/* don't create subdirs when pinning single instance */
 7531		return bpf_program_unpin_instance(prog, path, 0);
 7532	}
 7533
 7534	for (i = 0; i < prog->instances.nr; i++) {
 7535		char buf[PATH_MAX];
 7536		int len;
 7537
 7538		len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
 7539		if (len < 0)
 7540			return libbpf_err(-EINVAL);
 7541		else if (len >= PATH_MAX)
 7542			return libbpf_err(-ENAMETOOLONG);
 7543
 7544		err = bpf_program_unpin_instance(prog, buf, i);
 7545		if (err)
 7546			return err;
 7547	}
 7548
 7549	err = rmdir(path);
 7550	if (err)
 7551		return libbpf_err(-errno);
 7552
 7553	return 0;
 7554}
 7555
 7556int bpf_map__pin(struct bpf_map *map, const char *path)
 7557{
 7558	char *cp, errmsg[STRERR_BUFSIZE];
 7559	int err;
 7560
 7561	if (map == NULL) {
 7562		pr_warn("invalid map pointer\n");
 7563		return libbpf_err(-EINVAL);
 7564	}
 7565
 7566	if (map->pin_path) {
 7567		if (path && strcmp(path, map->pin_path)) {
 7568			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 7569				bpf_map__name(map), map->pin_path, path);
 7570			return libbpf_err(-EINVAL);
 7571		} else if (map->pinned) {
 7572			pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
 7573				 bpf_map__name(map), map->pin_path);
 7574			return 0;
 7575		}
 7576	} else {
 7577		if (!path) {
 7578			pr_warn("missing a path to pin map '%s' at\n",
 7579				bpf_map__name(map));
 7580			return libbpf_err(-EINVAL);
 7581		} else if (map->pinned) {
 7582			pr_warn("map '%s' already pinned\n", bpf_map__name(map));
 7583			return libbpf_err(-EEXIST);
 7584		}
 7585
 7586		map->pin_path = strdup(path);
 7587		if (!map->pin_path) {
 7588			err = -errno;
 7589			goto out_err;
 7590		}
 7591	}
 7592
 7593	err = make_parent_dir(map->pin_path);
 7594	if (err)
 7595		return libbpf_err(err);
 7596
 7597	err = check_path(map->pin_path);
 7598	if (err)
 7599		return libbpf_err(err);
 7600
 7601	if (bpf_obj_pin(map->fd, map->pin_path)) {
 7602		err = -errno;
 7603		goto out_err;
 7604	}
 7605
 7606	map->pinned = true;
 7607	pr_debug("pinned map '%s'\n", map->pin_path);
 7608
 7609	return 0;
 7610
 7611out_err:
 7612	cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 7613	pr_warn("failed to pin map: %s\n", cp);
 7614	return libbpf_err(err);
 7615}
 7616
 7617int bpf_map__unpin(struct bpf_map *map, const char *path)
 7618{
 7619	int err;
 7620
 7621	if (map == NULL) {
 7622		pr_warn("invalid map pointer\n");
 7623		return libbpf_err(-EINVAL);
 7624	}
 7625
 7626	if (map->pin_path) {
 7627		if (path && strcmp(path, map->pin_path)) {
 7628			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 7629				bpf_map__name(map), map->pin_path, path);
 7630			return libbpf_err(-EINVAL);
 7631		}
 7632		path = map->pin_path;
 7633	} else if (!path) {
 7634		pr_warn("no path to unpin map '%s' from\n",
 7635			bpf_map__name(map));
 7636		return libbpf_err(-EINVAL);
 7637	}
 7638
 7639	err = check_path(path);
 7640	if (err)
 7641		return libbpf_err(err);
 7642
 7643	err = unlink(path);
 7644	if (err != 0)
 7645		return libbpf_err(-errno);
 7646
 7647	map->pinned = false;
 7648	pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
 7649
 7650	return 0;
 7651}
 7652
 7653int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
 7654{
 7655	char *new = NULL;
 7656
 7657	if (path) {
 7658		new = strdup(path);
 7659		if (!new)
 7660			return libbpf_err(-errno);
 7661	}
 7662
 7663	free(map->pin_path);
 7664	map->pin_path = new;
 7665	return 0;
 7666}
 7667
 7668const char *bpf_map__get_pin_path(const struct bpf_map *map)
 7669{
 7670	return map->pin_path;
 7671}
 7672
 7673const char *bpf_map__pin_path(const struct bpf_map *map)
 7674{
 7675	return map->pin_path;
 7676}
 7677
 7678bool bpf_map__is_pinned(const struct bpf_map *map)
 7679{
 7680	return map->pinned;
 7681}
 7682
 7683static void sanitize_pin_path(char *s)
 7684{
 7685	/* bpffs disallows periods in path names */
 7686	while (*s) {
 7687		if (*s == '.')
 7688			*s = '_';
 7689		s++;
 7690	}
 7691}
 7692
 7693int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 7694{
 7695	struct bpf_map *map;
 7696	int err;
 7697
 7698	if (!obj)
 7699		return libbpf_err(-ENOENT);
 7700
 7701	if (!obj->loaded) {
 7702		pr_warn("object not yet loaded; load it first\n");
 7703		return libbpf_err(-ENOENT);
 7704	}
 7705
 7706	bpf_object__for_each_map(map, obj) {
 7707		char *pin_path = NULL;
 7708		char buf[PATH_MAX];
 7709
 7710		if (path) {
 7711			int len;
 7712
 7713			len = snprintf(buf, PATH_MAX, "%s/%s", path,
 7714				       bpf_map__name(map));
 7715			if (len < 0) {
 7716				err = -EINVAL;
 7717				goto err_unpin_maps;
 7718			} else if (len >= PATH_MAX) {
 7719				err = -ENAMETOOLONG;
 7720				goto err_unpin_maps;
 7721			}
 7722			sanitize_pin_path(buf);
 7723			pin_path = buf;
 7724		} else if (!map->pin_path) {
 7725			continue;
 7726		}
 7727
 7728		err = bpf_map__pin(map, pin_path);
 7729		if (err)
 7730			goto err_unpin_maps;
 7731	}
 7732
 7733	return 0;
 7734
 7735err_unpin_maps:
 7736	while ((map = bpf_map__prev(map, obj))) {
 7737		if (!map->pin_path)
 7738			continue;
 7739
 7740		bpf_map__unpin(map, NULL);
 7741	}
 7742
 7743	return libbpf_err(err);
 7744}
 7745
 7746int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 7747{
 7748	struct bpf_map *map;
 7749	int err;
 7750
 7751	if (!obj)
 7752		return libbpf_err(-ENOENT);
 7753
 7754	bpf_object__for_each_map(map, obj) {
 7755		char *pin_path = NULL;
 7756		char buf[PATH_MAX];
 7757
 7758		if (path) {
 7759			int len;
 7760
 7761			len = snprintf(buf, PATH_MAX, "%s/%s", path,
 7762				       bpf_map__name(map));
 7763			if (len < 0)
 7764				return libbpf_err(-EINVAL);
 7765			else if (len >= PATH_MAX)
 7766				return libbpf_err(-ENAMETOOLONG);
 7767			sanitize_pin_path(buf);
 7768			pin_path = buf;
 7769		} else if (!map->pin_path) {
 7770			continue;
 7771		}
 7772
 7773		err = bpf_map__unpin(map, pin_path);
 7774		if (err)
 7775			return libbpf_err(err);
 7776	}
 7777
 7778	return 0;
 7779}
 7780
 7781int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 7782{
 7783	struct bpf_program *prog;
 7784	int err;
 7785
 7786	if (!obj)
 7787		return libbpf_err(-ENOENT);
 7788
 7789	if (!obj->loaded) {
 7790		pr_warn("object not yet loaded; load it first\n");
 7791		return libbpf_err(-ENOENT);
 7792	}
 7793
 7794	bpf_object__for_each_program(prog, obj) {
 7795		char buf[PATH_MAX];
 7796		int len;
 7797
 7798		len = snprintf(buf, PATH_MAX, "%s/%s", path,
 7799			       prog->pin_name);
 7800		if (len < 0) {
 7801			err = -EINVAL;
 7802			goto err_unpin_programs;
 7803		} else if (len >= PATH_MAX) {
 7804			err = -ENAMETOOLONG;
 7805			goto err_unpin_programs;
 7806		}
 7807
 7808		err = bpf_program__pin(prog, buf);
 7809		if (err)
 7810			goto err_unpin_programs;
 7811	}
 7812
 7813	return 0;
 7814
 7815err_unpin_programs:
 7816	while ((prog = bpf_program__prev(prog, obj))) {
 7817		char buf[PATH_MAX];
 7818		int len;
 7819
 7820		len = snprintf(buf, PATH_MAX, "%s/%s", path,
 7821			       prog->pin_name);
 7822		if (len < 0)
 7823			continue;
 7824		else if (len >= PATH_MAX)
 7825			continue;
 7826
 7827		bpf_program__unpin(prog, buf);
 7828	}
 7829
 7830	return libbpf_err(err);
 7831}
 7832
 7833int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 7834{
 7835	struct bpf_program *prog;
 7836	int err;
 7837
 7838	if (!obj)
 7839		return libbpf_err(-ENOENT);
 7840
 7841	bpf_object__for_each_program(prog, obj) {
 7842		char buf[PATH_MAX];
 7843		int len;
 7844
 7845		len = snprintf(buf, PATH_MAX, "%s/%s", path,
 7846			       prog->pin_name);
 7847		if (len < 0)
 7848			return libbpf_err(-EINVAL);
 7849		else if (len >= PATH_MAX)
 7850			return libbpf_err(-ENAMETOOLONG);
 7851
 7852		err = bpf_program__unpin(prog, buf);
 7853		if (err)
 7854			return libbpf_err(err);
 7855	}
 7856
 7857	return 0;
 7858}
 7859
 7860int bpf_object__pin(struct bpf_object *obj, const char *path)
 7861{
 7862	int err;
 7863
 7864	err = bpf_object__pin_maps(obj, path);
 7865	if (err)
 7866		return libbpf_err(err);
 7867
 7868	err = bpf_object__pin_programs(obj, path);
 7869	if (err) {
 7870		bpf_object__unpin_maps(obj, path);
 7871		return libbpf_err(err);
 7872	}
 7873
 7874	return 0;
 7875}
 7876
 7877static void bpf_map__destroy(struct bpf_map *map)
 7878{
 7879	if (map->clear_priv)
 7880		map->clear_priv(map, map->priv);
 7881	map->priv = NULL;
 7882	map->clear_priv = NULL;
 7883
 7884	if (map->inner_map) {
 7885		bpf_map__destroy(map->inner_map);
 7886		zfree(&map->inner_map);
 7887	}
 7888
 7889	zfree(&map->init_slots);
 7890	map->init_slots_sz = 0;
 7891
 7892	if (map->mmaped) {
 7893		munmap(map->mmaped, bpf_map_mmap_sz(map));
 7894		map->mmaped = NULL;
 7895	}
 7896
 7897	if (map->st_ops) {
 7898		zfree(&map->st_ops->data);
 7899		zfree(&map->st_ops->progs);
 7900		zfree(&map->st_ops->kern_func_off);
 7901		zfree(&map->st_ops);
 7902	}
 7903
 7904	zfree(&map->name);
 7905	zfree(&map->real_name);
 7906	zfree(&map->pin_path);
 7907
 7908	if (map->fd >= 0)
 7909		zclose(map->fd);
 7910}
 7911
 7912void bpf_object__close(struct bpf_object *obj)
 7913{
 7914	size_t i;
 7915
 7916	if (IS_ERR_OR_NULL(obj))
 7917		return;
 7918
 7919	if (obj->clear_priv)
 7920		obj->clear_priv(obj, obj->priv);
 7921
 7922	bpf_gen__free(obj->gen_loader);
 7923	bpf_object__elf_finish(obj);
 7924	bpf_object_unload(obj);
 7925	btf__free(obj->btf);
 7926	btf_ext__free(obj->btf_ext);
 7927
 7928	for (i = 0; i < obj->nr_maps; i++)
 7929		bpf_map__destroy(&obj->maps[i]);
 7930
 7931	zfree(&obj->btf_custom_path);
 7932	zfree(&obj->kconfig);
 7933	zfree(&obj->externs);
 7934	obj->nr_extern = 0;
 7935
 7936	zfree(&obj->maps);
 7937	obj->nr_maps = 0;
 7938
 7939	if (obj->programs && obj->nr_programs) {
 7940		for (i = 0; i < obj->nr_programs; i++)
 7941			bpf_program__exit(&obj->programs[i]);
 7942	}
 7943	zfree(&obj->programs);
 7944
 7945	list_del(&obj->list);
 7946	free(obj);
 7947}
 7948
 7949struct bpf_object *
 7950bpf_object__next(struct bpf_object *prev)
 7951{
 7952	struct bpf_object *next;
 7953	bool strict = (libbpf_mode & LIBBPF_STRICT_NO_OBJECT_LIST);
 7954
 7955	if (strict)
 7956		return NULL;
 7957
 7958	if (!prev)
 7959		next = list_first_entry(&bpf_objects_list,
 7960					struct bpf_object,
 7961					list);
 7962	else
 7963		next = list_next_entry(prev, list);
 7964
 7965	/* Empty list is noticed here so don't need checking on entry. */
 7966	if (&next->list == &bpf_objects_list)
 7967		return NULL;
 7968
 7969	return next;
 7970}
 7971
 7972const char *bpf_object__name(const struct bpf_object *obj)
 7973{
 7974	return obj ? obj->name : libbpf_err_ptr(-EINVAL);
 7975}
 7976
 7977unsigned int bpf_object__kversion(const struct bpf_object *obj)
 7978{
 7979	return obj ? obj->kern_version : 0;
 7980}
 7981
 7982struct btf *bpf_object__btf(const struct bpf_object *obj)
 7983{
 7984	return obj ? obj->btf : NULL;
 7985}
 7986
 7987int bpf_object__btf_fd(const struct bpf_object *obj)
 7988{
 7989	return obj->btf ? btf__fd(obj->btf) : -1;
 7990}
 7991
 7992int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
 7993{
 7994	if (obj->loaded)
 7995		return libbpf_err(-EINVAL);
 7996
 7997	obj->kern_version = kern_version;
 7998
 7999	return 0;
 8000}
 8001
 8002int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 8003			 bpf_object_clear_priv_t clear_priv)
 8004{
 8005	if (obj->priv && obj->clear_priv)
 8006		obj->clear_priv(obj, obj->priv);
 8007
 8008	obj->priv = priv;
 8009	obj->clear_priv = clear_priv;
 8010	return 0;
 8011}
 8012
 8013void *bpf_object__priv(const struct bpf_object *obj)
 8014{
 8015	return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
 8016}
 8017
 8018int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
 8019{
 8020	struct bpf_gen *gen;
 8021
 8022	if (!opts)
 8023		return -EFAULT;
 8024	if (!OPTS_VALID(opts, gen_loader_opts))
 8025		return -EINVAL;
 8026	gen = calloc(sizeof(*gen), 1);
 8027	if (!gen)
 8028		return -ENOMEM;
 8029	gen->opts = opts;
 8030	obj->gen_loader = gen;
 8031	return 0;
 8032}
 8033
 8034static struct bpf_program *
 8035__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
 8036		    bool forward)
 8037{
 8038	size_t nr_programs = obj->nr_programs;
 8039	ssize_t idx;
 8040
 8041	if (!nr_programs)
 8042		return NULL;
 8043
 8044	if (!p)
 8045		/* Iter from the beginning */
 8046		return forward ? &obj->programs[0] :
 8047			&obj->programs[nr_programs - 1];
 8048
 8049	if (p->obj != obj) {
 8050		pr_warn("error: program handler doesn't match object\n");
 8051		return errno = EINVAL, NULL;
 8052	}
 8053
 8054	idx = (p - obj->programs) + (forward ? 1 : -1);
 8055	if (idx >= obj->nr_programs || idx < 0)
 8056		return NULL;
 8057	return &obj->programs[idx];
 8058}
 8059
 8060struct bpf_program *
 8061bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
 8062{
 8063	return bpf_object__next_program(obj, prev);
 8064}
 8065
 8066struct bpf_program *
 8067bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
 8068{
 8069	struct bpf_program *prog = prev;
 8070
 8071	do {
 8072		prog = __bpf_program__iter(prog, obj, true);
 8073	} while (prog && prog_is_subprog(obj, prog));
 8074
 8075	return prog;
 8076}
 8077
 8078struct bpf_program *
 8079bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
 8080{
 8081	return bpf_object__prev_program(obj, next);
 8082}
 8083
 8084struct bpf_program *
 8085bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
 8086{
 8087	struct bpf_program *prog = next;
 8088
 8089	do {
 8090		prog = __bpf_program__iter(prog, obj, false);
 8091	} while (prog && prog_is_subprog(obj, prog));
 8092
 8093	return prog;
 8094}
 8095
 8096int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 8097			  bpf_program_clear_priv_t clear_priv)
 8098{
 8099	if (prog->priv && prog->clear_priv)
 8100		prog->clear_priv(prog, prog->priv);
 8101
 8102	prog->priv = priv;
 8103	prog->clear_priv = clear_priv;
 8104	return 0;
 8105}
 8106
 8107void *bpf_program__priv(const struct bpf_program *prog)
 8108{
 8109	return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
 8110}
 8111
 8112void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
 8113{
 8114	prog->prog_ifindex = ifindex;
 8115}
 8116
 8117const char *bpf_program__name(const struct bpf_program *prog)
 8118{
 8119	return prog->name;
 8120}
 8121
 8122const char *bpf_program__section_name(const struct bpf_program *prog)
 8123{
 8124	return prog->sec_name;
 8125}
 8126
 8127const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
 8128{
 8129	const char *title;
 8130
 8131	title = prog->sec_name;
 8132	if (needs_copy) {
 8133		title = strdup(title);
 8134		if (!title) {
 8135			pr_warn("failed to strdup program title\n");
 8136			return libbpf_err_ptr(-ENOMEM);
 8137		}
 8138	}
 8139
 8140	return title;
 8141}
 8142
 8143bool bpf_program__autoload(const struct bpf_program *prog)
 8144{
 8145	return prog->load;
 8146}
 8147
 8148int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 8149{
 8150	if (prog->obj->loaded)
 8151		return libbpf_err(-EINVAL);
 8152
 8153	prog->load = autoload;
 8154	return 0;
 8155}
 8156
 8157int bpf_program__fd(const struct bpf_program *prog)
 8158{
 8159	return bpf_program__nth_fd(prog, 0);
 8160}
 8161
 8162size_t bpf_program__size(const struct bpf_program *prog)
 8163{
 8164	return prog->insns_cnt * BPF_INSN_SZ;
 8165}
 8166
 8167const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
 8168{
 8169	return prog->insns;
 8170}
 8171
 8172size_t bpf_program__insn_cnt(const struct bpf_program *prog)
 8173{
 8174	return prog->insns_cnt;
 8175}
 8176
 8177int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
 8178			  bpf_program_prep_t prep)
 8179{
 8180	int *instances_fds;
 8181
 8182	if (nr_instances <= 0 || !prep)
 8183		return libbpf_err(-EINVAL);
 8184
 8185	if (prog->instances.nr > 0 || prog->instances.fds) {
 8186		pr_warn("Can't set pre-processor after loading\n");
 8187		return libbpf_err(-EINVAL);
 8188	}
 8189
 8190	instances_fds = malloc(sizeof(int) * nr_instances);
 8191	if (!instances_fds) {
 8192		pr_warn("alloc memory failed for fds\n");
 8193		return libbpf_err(-ENOMEM);
 8194	}
 8195
 8196	/* fill all fd with -1 */
 8197	memset(instances_fds, -1, sizeof(int) * nr_instances);
 8198
 8199	prog->instances.nr = nr_instances;
 8200	prog->instances.fds = instances_fds;
 8201	prog->preprocessor = prep;
 8202	return 0;
 8203}
 8204
 8205int bpf_program__nth_fd(const struct bpf_program *prog, int n)
 8206{
 8207	int fd;
 8208
 8209	if (!prog)
 8210		return libbpf_err(-EINVAL);
 8211
 8212	if (n >= prog->instances.nr || n < 0) {
 8213		pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
 8214			n, prog->name, prog->instances.nr);
 8215		return libbpf_err(-EINVAL);
 8216	}
 8217
 8218	fd = prog->instances.fds[n];
 8219	if (fd < 0) {
 8220		pr_warn("%dth instance of program '%s' is invalid\n",
 8221			n, prog->name);
 8222		return libbpf_err(-ENOENT);
 8223	}
 8224
 8225	return fd;
 8226}
 8227
 8228enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
 8229{
 8230	return prog->type;
 8231}
 8232
 8233void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 8234{
 8235	prog->type = type;
 8236}
 8237
 8238static bool bpf_program__is_type(const struct bpf_program *prog,
 8239				 enum bpf_prog_type type)
 8240{
 8241	return prog ? (prog->type == type) : false;
 8242}
 8243
 8244#define BPF_PROG_TYPE_FNS(NAME, TYPE)				\
 8245int bpf_program__set_##NAME(struct bpf_program *prog)		\
 8246{								\
 8247	if (!prog)						\
 8248		return libbpf_err(-EINVAL);			\
 8249	bpf_program__set_type(prog, TYPE);			\
 8250	return 0;						\
 8251}								\
 8252								\
 8253bool bpf_program__is_##NAME(const struct bpf_program *prog)	\
 8254{								\
 8255	return bpf_program__is_type(prog, TYPE);		\
 8256}								\
 8257
 8258BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
 8259BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
 8260BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
 8261BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
 8262BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
 8263BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
 8264BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 8265BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 8266BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 8267BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING);
 8268BPF_PROG_TYPE_FNS(struct_ops, BPF_PROG_TYPE_STRUCT_OPS);
 8269BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
 8270BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
 8271
 8272enum bpf_attach_type
 8273bpf_program__get_expected_attach_type(const struct bpf_program *prog)
 8274{
 8275	return prog->expected_attach_type;
 8276}
 8277
 8278void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 8279					   enum bpf_attach_type type)
 8280{
 8281	prog->expected_attach_type = type;
 8282}
 8283
 8284#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {			    \
 8285	.sec = sec_pfx,							    \
 8286	.prog_type = BPF_PROG_TYPE_##ptype,				    \
 8287	.expected_attach_type = atype,					    \
 8288	.cookie = (long)(flags),					    \
 8289	.preload_fn = libbpf_preload_prog,				    \
 8290	__VA_ARGS__							    \
 8291}
 8292
 8293static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie);
 8294static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie);
 8295static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie);
 8296static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie);
 8297static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie);
 8298static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie);
 8299
 8300static const struct bpf_sec_def section_defs[] = {
 8301	SEC_DEF("socket",		SOCKET_FILTER, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8302	SEC_DEF("sk_reuseport/migrate",	SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8303	SEC_DEF("sk_reuseport",		SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8304	SEC_DEF("kprobe/",		KPROBE,	0, SEC_NONE, attach_kprobe),
 8305	SEC_DEF("uprobe/",		KPROBE,	0, SEC_NONE),
 8306	SEC_DEF("kretprobe/",		KPROBE, 0, SEC_NONE, attach_kprobe),
 8307	SEC_DEF("uretprobe/",		KPROBE, 0, SEC_NONE),
 8308	SEC_DEF("tc",			SCHED_CLS, 0, SEC_NONE),
 8309	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8310	SEC_DEF("action",		SCHED_ACT, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8311	SEC_DEF("tracepoint/",		TRACEPOINT, 0, SEC_NONE, attach_tp),
 8312	SEC_DEF("tp/",			TRACEPOINT, 0, SEC_NONE, attach_tp),
 8313	SEC_DEF("raw_tracepoint/",	RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
 8314	SEC_DEF("raw_tp/",		RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
 8315	SEC_DEF("raw_tracepoint.w/",	RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
 8316	SEC_DEF("raw_tp.w/",		RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
 8317	SEC_DEF("tp_btf/",		TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
 8318	SEC_DEF("fentry/",		TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
 8319	SEC_DEF("fmod_ret/",		TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
 8320	SEC_DEF("fexit/",		TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
 8321	SEC_DEF("fentry.s/",		TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 8322	SEC_DEF("fmod_ret.s/",		TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 8323	SEC_DEF("fexit.s/",		TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 8324	SEC_DEF("freplace/",		EXT, 0, SEC_ATTACH_BTF, attach_trace),
 8325	SEC_DEF("lsm/",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 8326	SEC_DEF("lsm.s/",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 8327	SEC_DEF("iter/",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
 8328	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 8329	SEC_DEF("xdp_devmap/",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 8330	SEC_DEF("xdp_cpumap/",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
 8331	SEC_DEF("xdp",			XDP, BPF_XDP, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8332	SEC_DEF("perf_event",		PERF_EVENT, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8333	SEC_DEF("lwt_in",		LWT_IN, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8334	SEC_DEF("lwt_out",		LWT_OUT, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8335	SEC_DEF("lwt_xmit",		LWT_XMIT, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8336	SEC_DEF("lwt_seg6local",	LWT_SEG6LOCAL, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8337	SEC_DEF("cgroup_skb/ingress",	CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8338	SEC_DEF("cgroup_skb/egress",	CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8339	SEC_DEF("cgroup/skb",		CGROUP_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8340	SEC_DEF("cgroup/sock_create",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8341	SEC_DEF("cgroup/sock_release",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8342	SEC_DEF("cgroup/sock",		CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8343	SEC_DEF("cgroup/post_bind4",	CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8344	SEC_DEF("cgroup/post_bind6",	CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8345	SEC_DEF("cgroup/dev",		CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8346	SEC_DEF("sockops",		SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8347	SEC_DEF("sk_skb/stream_parser",	SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8348	SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8349	SEC_DEF("sk_skb",		SK_SKB, 0, SEC_NONE | SEC_SLOPPY_PFX),
 8350	SEC_DEF("sk_msg",		SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8351	SEC_DEF("lirc_mode2",		LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8352	SEC_DEF("flow_dissector",	FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT | SEC_SLOPPY_PFX),
 8353	SEC_DEF("cgroup/bind4",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8354	SEC_DEF("cgroup/bind6",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8355	SEC_DEF("cgroup/connect4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8356	SEC_DEF("cgroup/connect6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8357	SEC_DEF("cgroup/sendmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8358	SEC_DEF("cgroup/sendmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8359	SEC_DEF("cgroup/recvmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8360	SEC_DEF("cgroup/recvmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8361	SEC_DEF("cgroup/getpeername4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8362	SEC_DEF("cgroup/getpeername6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8363	SEC_DEF("cgroup/getsockname4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8364	SEC_DEF("cgroup/getsockname6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8365	SEC_DEF("cgroup/sysctl",	CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8366	SEC_DEF("cgroup/getsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8367	SEC_DEF("cgroup/setsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8368	SEC_DEF("struct_ops+",		STRUCT_OPS, 0, SEC_NONE),
 8369	SEC_DEF("sk_lookup",		SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE | SEC_SLOPPY_PFX),
 8370};
 8371
 8372#define MAX_TYPE_NAME_SIZE 32
 8373
 8374static const struct bpf_sec_def *find_sec_def(const char *sec_name)
 8375{
 8376	const struct bpf_sec_def *sec_def;
 8377	enum sec_def_flags sec_flags;
 8378	int i, n = ARRAY_SIZE(section_defs), len;
 8379	bool strict = libbpf_mode & LIBBPF_STRICT_SEC_NAME;
 8380
 8381	for (i = 0; i < n; i++) {
 8382		sec_def = &section_defs[i];
 8383		sec_flags = sec_def->cookie;
 8384		len = strlen(sec_def->sec);
 8385
 8386		/* "type/" always has to have proper SEC("type/extras") form */
 8387		if (sec_def->sec[len - 1] == '/') {
 8388			if (str_has_pfx(sec_name, sec_def->sec))
 8389				return sec_def;
 8390			continue;
 8391		}
 8392
 8393		/* "type+" means it can be either exact SEC("type") or
 8394		 * well-formed SEC("type/extras") with proper '/' separator
 8395		 */
 8396		if (sec_def->sec[len - 1] == '+') {
 8397			len--;
 8398			/* not even a prefix */
 8399			if (strncmp(sec_name, sec_def->sec, len) != 0)
 8400				continue;
 8401			/* exact match or has '/' separator */
 8402			if (sec_name[len] == '\0' || sec_name[len] == '/')
 8403				return sec_def;
 8404			continue;
 8405		}
 8406
 8407		/* SEC_SLOPPY_PFX definitions are allowed to be just prefix
 8408		 * matches, unless strict section name mode
 8409		 * (LIBBPF_STRICT_SEC_NAME) is enabled, in which case the
 8410		 * match has to be exact.
 8411		 */
 8412		if ((sec_flags & SEC_SLOPPY_PFX) && !strict)  {
 8413			if (str_has_pfx(sec_name, sec_def->sec))
 8414				return sec_def;
 8415			continue;
 8416		}
 8417
 8418		/* Definitions not marked SEC_SLOPPY_PFX (e.g.,
 8419		 * SEC("syscall")) are exact matches in both modes.
 8420		 */
 8421		if (strcmp(sec_name, sec_def->sec) == 0)
 8422			return sec_def;
 8423	}
 8424	return NULL;
 8425}
 8426
 8427static char *libbpf_get_type_names(bool attach_type)
 8428{
 8429	int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
 8430	char *buf;
 8431
 8432	buf = malloc(len);
 8433	if (!buf)
 8434		return NULL;
 8435
 8436	buf[0] = '\0';
 8437	/* Forge string buf with all available names */
 8438	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
 8439		const struct bpf_sec_def *sec_def = &section_defs[i];
 8440
 8441		if (attach_type) {
 8442			if (sec_def->preload_fn != libbpf_preload_prog)
 8443				continue;
 8444
 8445			if (!(sec_def->cookie & SEC_ATTACHABLE))
 8446				continue;
 8447		}
 8448
 8449		if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
 8450			free(buf);
 8451			return NULL;
 8452		}
 8453		strcat(buf, " ");
 8454		strcat(buf, section_defs[i].sec);
 8455	}
 8456
 8457	return buf;
 8458}
 8459
 8460int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 8461			     enum bpf_attach_type *expected_attach_type)
 8462{
 8463	const struct bpf_sec_def *sec_def;
 8464	char *type_names;
 8465
 8466	if (!name)
 8467		return libbpf_err(-EINVAL);
 8468
 8469	sec_def = find_sec_def(name);
 8470	if (sec_def) {
 8471		*prog_type = sec_def->prog_type;
 8472		*expected_attach_type = sec_def->expected_attach_type;
 8473		return 0;
 8474	}
 8475
 8476	pr_debug("failed to guess program type from ELF section '%s'\n", name);
 8477	type_names = libbpf_get_type_names(false);
 8478	if (type_names != NULL) {
 8479		pr_debug("supported section(type) names are:%s\n", type_names);
 8480		free(type_names);
 8481	}
 8482
 8483	return libbpf_err(-ESRCH);
 8484}
 8485
 8486static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
 8487						     size_t offset)
 8488{
 8489	struct bpf_map *map;
 8490	size_t i;
 8491
 8492	for (i = 0; i < obj->nr_maps; i++) {
 8493		map = &obj->maps[i];
 8494		if (!bpf_map__is_struct_ops(map))
 8495			continue;
 8496		if (map->sec_offset <= offset &&
 8497		    offset - map->sec_offset < map->def.value_size)
 8498			return map;
 8499	}
 8500
 8501	return NULL;
 8502}
 8503
 8504/* Collect the reloc from ELF and populate the st_ops->progs[] */
 8505static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 8506					    Elf64_Shdr *shdr, Elf_Data *data)
 8507{
 8508	const struct btf_member *member;
 8509	struct bpf_struct_ops *st_ops;
 8510	struct bpf_program *prog;
 8511	unsigned int shdr_idx;
 8512	const struct btf *btf;
 8513	struct bpf_map *map;
 8514	unsigned int moff, insn_idx;
 8515	const char *name;
 8516	__u32 member_idx;
 8517	Elf64_Sym *sym;
 8518	Elf64_Rel *rel;
 8519	int i, nrels;
 8520
 8521	btf = obj->btf;
 8522	nrels = shdr->sh_size / shdr->sh_entsize;
 8523	for (i = 0; i < nrels; i++) {
 8524		rel = elf_rel_by_idx(data, i);
 8525		if (!rel) {
 8526			pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
 8527			return -LIBBPF_ERRNO__FORMAT;
 8528		}
 8529
 8530		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
 8531		if (!sym) {
 8532			pr_warn("struct_ops reloc: symbol %zx not found\n",
 8533				(size_t)ELF64_R_SYM(rel->r_info));
 8534			return -LIBBPF_ERRNO__FORMAT;
 8535		}
 8536
 8537		name = elf_sym_str(obj, sym->st_name) ?: "<?>";
 8538		map = find_struct_ops_map_by_offset(obj, rel->r_offset);
 8539		if (!map) {
 8540			pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
 8541				(size_t)rel->r_offset);
 8542			return -EINVAL;
 8543		}
 8544
 8545		moff = rel->r_offset - map->sec_offset;
 8546		shdr_idx = sym->st_shndx;
 8547		st_ops = map->st_ops;
 8548		pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
 8549			 map->name,
 8550			 (long long)(rel->r_info >> 32),
 8551			 (long long)sym->st_value,
 8552			 shdr_idx, (size_t)rel->r_offset,
 8553			 map->sec_offset, sym->st_name, name);
 8554
 8555		if (shdr_idx >= SHN_LORESERVE) {
 8556			pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
 8557				map->name, (size_t)rel->r_offset, shdr_idx);
 8558			return -LIBBPF_ERRNO__RELOC;
 8559		}
 8560		if (sym->st_value % BPF_INSN_SZ) {
 8561			pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
 8562				map->name, (unsigned long long)sym->st_value);
 8563			return -LIBBPF_ERRNO__FORMAT;
 8564		}
 8565		insn_idx = sym->st_value / BPF_INSN_SZ;
 8566
 8567		member = find_member_by_offset(st_ops->type, moff * 8);
 8568		if (!member) {
 8569			pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
 8570				map->name, moff);
 8571			return -EINVAL;
 8572		}
 8573		member_idx = member - btf_members(st_ops->type);
 8574		name = btf__name_by_offset(btf, member->name_off);
 8575
 8576		if (!resolve_func_ptr(btf, member->type, NULL)) {
 8577			pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
 8578				map->name, name);
 8579			return -EINVAL;
 8580		}
 8581
 8582		prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
 8583		if (!prog) {
 8584			pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
 8585				map->name, shdr_idx, name);
 8586			return -EINVAL;
 8587		}
 8588
 8589		/* prevent the use of BPF prog with invalid type */
 8590		if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
 8591			pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
 8592				map->name, prog->name);
 8593			return -EINVAL;
 8594		}
 8595
 8596		/* if we haven't yet processed this BPF program, record proper
 8597		 * attach_btf_id and member_idx
 8598		 */
 8599		if (!prog->attach_btf_id) {
 8600			prog->attach_btf_id = st_ops->type_id;
 8601			prog->expected_attach_type = member_idx;
 8602		}
 8603
 8604		/* struct_ops BPF prog can be re-used between multiple
 8605		 * .struct_ops as long as it's the same struct_ops struct
 8606		 * definition and the same function pointer field
 8607		 */
 8608		if (prog->attach_btf_id != st_ops->type_id ||
 8609		    prog->expected_attach_type != member_idx) {
 8610			pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
 8611				map->name, prog->name, prog->sec_name, prog->type,
 8612				prog->attach_btf_id, prog->expected_attach_type, name);
 8613			return -EINVAL;
 8614		}
 8615
 8616		st_ops->progs[member_idx] = prog;
 8617	}
 8618
 8619	return 0;
 8620}
 8621
 8622#define BTF_TRACE_PREFIX "btf_trace_"
 8623#define BTF_LSM_PREFIX "bpf_lsm_"
 8624#define BTF_ITER_PREFIX "bpf_iter_"
 8625#define BTF_MAX_NAME_SIZE 128
 8626
 8627void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
 8628				const char **prefix, int *kind)
 8629{
 8630	switch (attach_type) {
 8631	case BPF_TRACE_RAW_TP:
 8632		*prefix = BTF_TRACE_PREFIX;
 8633		*kind = BTF_KIND_TYPEDEF;
 8634		break;
 8635	case BPF_LSM_MAC:
 8636		*prefix = BTF_LSM_PREFIX;
 8637		*kind = BTF_KIND_FUNC;
 8638		break;
 8639	case BPF_TRACE_ITER:
 8640		*prefix = BTF_ITER_PREFIX;
 8641		*kind = BTF_KIND_FUNC;
 8642		break;
 8643	default:
 8644		*prefix = "";
 8645		*kind = BTF_KIND_FUNC;
 8646	}
 8647}
 8648
 8649static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 8650				   const char *name, __u32 kind)
 8651{
 8652	char btf_type_name[BTF_MAX_NAME_SIZE];
 8653	int ret;
 8654
 8655	ret = snprintf(btf_type_name, sizeof(btf_type_name),
 8656		       "%s%s", prefix, name);
 8657	/* snprintf returns the number of characters written excluding the
 8658	 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
 8659	 * indicates truncation.
 8660	 */
 8661	if (ret < 0 || ret >= sizeof(btf_type_name))
 8662		return -ENAMETOOLONG;
 8663	return btf__find_by_name_kind(btf, btf_type_name, kind);
 8664}
 8665
 8666static inline int find_attach_btf_id(struct btf *btf, const char *name,
 8667				     enum bpf_attach_type attach_type)
 8668{
 8669	const char *prefix;
 8670	int kind;
 8671
 8672	btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
 8673	return find_btf_by_prefix_kind(btf, prefix, name, kind);
 8674}
 8675
 8676int libbpf_find_vmlinux_btf_id(const char *name,
 8677			       enum bpf_attach_type attach_type)
 8678{
 8679	struct btf *btf;
 8680	int err;
 8681
 8682	btf = btf__load_vmlinux_btf();
 8683	err = libbpf_get_error(btf);
 8684	if (err) {
 8685		pr_warn("vmlinux BTF is not found\n");
 8686		return libbpf_err(err);
 8687	}
 8688
 8689	err = find_attach_btf_id(btf, name, attach_type);
 8690	if (err <= 0)
 8691		pr_warn("%s is not found in vmlinux BTF\n", name);
 8692
 8693	btf__free(btf);
 8694	return libbpf_err(err);
 8695}
 8696
 8697static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 8698{
 8699	struct bpf_prog_info info = {};
 8700	__u32 info_len = sizeof(info);
 8701	struct btf *btf;
 8702	int err;
 8703
 8704	err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
 8705	if (err) {
 8706		pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n",
 8707			attach_prog_fd, err);
 8708		return err;
 8709	}
 8710
 8711	err = -EINVAL;
 8712	if (!info.btf_id) {
 8713		pr_warn("The target program doesn't have BTF\n");
 8714		goto out;
 8715	}
 8716	btf = btf__load_from_kernel_by_id(info.btf_id);
 8717	err = libbpf_get_error(btf);
 8718	if (err) {
 8719		pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
 8720		goto out;
 8721	}
 8722	err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 8723	btf__free(btf);
 8724	if (err <= 0) {
 8725		pr_warn("%s is not found in prog's BTF\n", name);
 8726		goto out;
 8727	}
 8728out:
 8729	return err;
 8730}
 8731
 8732static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
 8733			      enum bpf_attach_type attach_type,
 8734			      int *btf_obj_fd, int *btf_type_id)
 8735{
 8736	int ret, i;
 8737
 8738	ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
 8739	if (ret > 0) {
 8740		*btf_obj_fd = 0; /* vmlinux BTF */
 8741		*btf_type_id = ret;
 8742		return 0;
 8743	}
 8744	if (ret != -ENOENT)
 8745		return ret;
 8746
 8747	ret = load_module_btfs(obj);
 8748	if (ret)
 8749		return ret;
 8750
 8751	for (i = 0; i < obj->btf_module_cnt; i++) {
 8752		const struct module_btf *mod = &obj->btf_modules[i];
 8753
 8754		ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
 8755		if (ret > 0) {
 8756			*btf_obj_fd = mod->fd;
 8757			*btf_type_id = ret;
 8758			return 0;
 8759		}
 8760		if (ret == -ENOENT)
 8761			continue;
 8762
 8763		return ret;
 8764	}
 8765
 8766	return -ESRCH;
 8767}
 8768
 8769static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
 8770				     int *btf_obj_fd, int *btf_type_id)
 8771{
 8772	enum bpf_attach_type attach_type = prog->expected_attach_type;
 8773	__u32 attach_prog_fd = prog->attach_prog_fd;
 8774	int err = 0;
 8775
 8776	/* BPF program's BTF ID */
 8777	if (attach_prog_fd) {
 8778		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
 8779		if (err < 0) {
 8780			pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
 8781				 attach_prog_fd, attach_name, err);
 8782			return err;
 8783		}
 8784		*btf_obj_fd = 0;
 8785		*btf_type_id = err;
 8786		return 0;
 8787	}
 8788
 8789	/* kernel/module BTF ID */
 8790	if (prog->obj->gen_loader) {
 8791		bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
 8792		*btf_obj_fd = 0;
 8793		*btf_type_id = 1;
 8794	} else {
 8795		err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
 8796	}
 8797	if (err) {
 8798		pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
 8799		return err;
 8800	}
 8801	return 0;
 8802}
 8803
 8804int libbpf_attach_type_by_name(const char *name,
 8805			       enum bpf_attach_type *attach_type)
 8806{
 8807	char *type_names;
 8808	const struct bpf_sec_def *sec_def;
 8809
 8810	if (!name)
 8811		return libbpf_err(-EINVAL);
 8812
 8813	sec_def = find_sec_def(name);
 8814	if (!sec_def) {
 8815		pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
 8816		type_names = libbpf_get_type_names(true);
 8817		if (type_names != NULL) {
 8818			pr_debug("attachable section(type) names are:%s\n", type_names);
 8819			free(type_names);
 8820		}
 8821
 8822		return libbpf_err(-EINVAL);
 8823	}
 8824
 8825	if (sec_def->preload_fn != libbpf_preload_prog)
 8826		return libbpf_err(-EINVAL);
 8827	if (!(sec_def->cookie & SEC_ATTACHABLE))
 8828		return libbpf_err(-EINVAL);
 8829
 8830	*attach_type = sec_def->expected_attach_type;
 8831	return 0;
 8832}
 8833
 8834int bpf_map__fd(const struct bpf_map *map)
 8835{
 8836	return map ? map->fd : libbpf_err(-EINVAL);
 8837}
 8838
 8839const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
 8840{
 8841	return map ? &map->def : libbpf_err_ptr(-EINVAL);
 8842}
 8843
 8844static bool map_uses_real_name(const struct bpf_map *map)
 8845{
 8846	/* Since libbpf started to support custom .data.* and .rodata.* maps,
 8847	 * their user-visible name differs from kernel-visible name. Users see
 8848	 * such map's corresponding ELF section name as a map name.
 8849	 * This check distinguishes .data/.rodata from .data.* and .rodata.*
 8850	 * maps to know which name has to be returned to the user.
 8851	 */
 8852	if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
 8853		return true;
 8854	if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
 8855		return true;
 8856	return false;
 8857}
 8858
 8859const char *bpf_map__name(const struct bpf_map *map)
 8860{
 8861	if (!map)
 8862		return NULL;
 8863
 8864	if (map_uses_real_name(map))
 8865		return map->real_name;
 8866
 8867	return map->name;
 8868}
 8869
 8870enum bpf_map_type bpf_map__type(const struct bpf_map *map)
 8871{
 8872	return map->def.type;
 8873}
 8874
 8875int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
 8876{
 8877	if (map->fd >= 0)
 8878		return libbpf_err(-EBUSY);
 8879	map->def.type = type;
 8880	return 0;
 8881}
 8882
 8883__u32 bpf_map__map_flags(const struct bpf_map *map)
 8884{
 8885	return map->def.map_flags;
 8886}
 8887
 8888int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
 8889{
 8890	if (map->fd >= 0)
 8891		return libbpf_err(-EBUSY);
 8892	map->def.map_flags = flags;
 8893	return 0;
 8894}
 8895
 8896__u64 bpf_map__map_extra(const struct bpf_map *map)
 8897{
 8898	return map->map_extra;
 8899}
 8900
 8901int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
 8902{
 8903	if (map->fd >= 0)
 8904		return libbpf_err(-EBUSY);
 8905	map->map_extra = map_extra;
 8906	return 0;
 8907}
 8908
 8909__u32 bpf_map__numa_node(const struct bpf_map *map)
 8910{
 8911	return map->numa_node;
 8912}
 8913
 8914int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
 8915{
 8916	if (map->fd >= 0)
 8917		return libbpf_err(-EBUSY);
 8918	map->numa_node = numa_node;
 8919	return 0;
 8920}
 8921
 8922__u32 bpf_map__key_size(const struct bpf_map *map)
 8923{
 8924	return map->def.key_size;
 8925}
 8926
 8927int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
 8928{
 8929	if (map->fd >= 0)
 8930		return libbpf_err(-EBUSY);
 8931	map->def.key_size = size;
 8932	return 0;
 8933}
 8934
 8935__u32 bpf_map__value_size(const struct bpf_map *map)
 8936{
 8937	return map->def.value_size;
 8938}
 8939
 8940int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 8941{
 8942	if (map->fd >= 0)
 8943		return libbpf_err(-EBUSY);
 8944	map->def.value_size = size;
 8945	return 0;
 8946}
 8947
 8948__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
 8949{
 8950	return map ? map->btf_key_type_id : 0;
 8951}
 8952
 8953__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
 8954{
 8955	return map ? map->btf_value_type_id : 0;
 8956}
 8957
 8958int bpf_map__set_priv(struct bpf_map *map, void *priv,
 8959		     bpf_map_clear_priv_t clear_priv)
 8960{
 8961	if (!map)
 8962		return libbpf_err(-EINVAL);
 8963
 8964	if (map->priv) {
 8965		if (map->clear_priv)
 8966			map->clear_priv(map, map->priv);
 8967	}
 8968
 8969	map->priv = priv;
 8970	map->clear_priv = clear_priv;
 8971	return 0;
 8972}
 8973
 8974void *bpf_map__priv(const struct bpf_map *map)
 8975{
 8976	return map ? map->priv : libbpf_err_ptr(-EINVAL);
 8977}
 8978
 8979int bpf_map__set_initial_value(struct bpf_map *map,
 8980			       const void *data, size_t size)
 8981{
 8982	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
 8983	    size != map->def.value_size || map->fd >= 0)
 8984		return libbpf_err(-EINVAL);
 8985
 8986	memcpy(map->mmaped, data, size);
 8987	return 0;
 8988}
 8989
 8990const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
 8991{
 8992	if (!map->mmaped)
 8993		return NULL;
 8994	*psize = map->def.value_size;
 8995	return map->mmaped;
 8996}
 8997
 8998bool bpf_map__is_offload_neutral(const struct bpf_map *map)
 8999{
 9000	return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
 9001}
 9002
 9003bool bpf_map__is_internal(const struct bpf_map *map)
 9004{
 9005	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 9006}
 9007
 9008__u32 bpf_map__ifindex(const struct bpf_map *map)
 9009{
 9010	return map->map_ifindex;
 9011}
 9012
 9013int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 9014{
 9015	if (map->fd >= 0)
 9016		return libbpf_err(-EBUSY);
 9017	map->map_ifindex = ifindex;
 9018	return 0;
 9019}
 9020
 9021int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 9022{
 9023	if (!bpf_map_type__is_map_in_map(map->def.type)) {
 9024		pr_warn("error: unsupported map type\n");
 9025		return libbpf_err(-EINVAL);
 9026	}
 9027	if (map->inner_map_fd != -1) {
 9028		pr_warn("error: inner_map_fd already specified\n");
 9029		return libbpf_err(-EINVAL);
 9030	}
 9031	zfree(&map->inner_map);
 9032	map->inner_map_fd = fd;
 9033	return 0;
 9034}
 9035
 9036static struct bpf_map *
 9037__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 9038{
 9039	ssize_t idx;
 9040	struct bpf_map *s, *e;
 9041
 9042	if (!obj || !obj->maps)
 9043		return errno = EINVAL, NULL;
 9044
 9045	s = obj->maps;
 9046	e = obj->maps + obj->nr_maps;
 9047
 9048	if ((m < s) || (m >= e)) {
 9049		pr_warn("error in %s: map handler doesn't belong to object\n",
 9050			 __func__);
 9051		return errno = EINVAL, NULL;
 9052	}
 9053
 9054	idx = (m - obj->maps) + i;
 9055	if (idx >= obj->nr_maps || idx < 0)
 9056		return NULL;
 9057	return &obj->maps[idx];
 9058}
 9059
 9060struct bpf_map *
 9061bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
 9062{
 9063	return bpf_object__next_map(obj, prev);
 9064}
 9065
 9066struct bpf_map *
 9067bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
 9068{
 9069	if (prev == NULL)
 9070		return obj->maps;
 9071
 9072	return __bpf_map__iter(prev, obj, 1);
 9073}
 9074
 9075struct bpf_map *
 9076bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
 9077{
 9078	return bpf_object__prev_map(obj, next);
 9079}
 9080
 9081struct bpf_map *
 9082bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
 9083{
 9084	if (next == NULL) {
 9085		if (!obj->nr_maps)
 9086			return NULL;
 9087		return obj->maps + obj->nr_maps - 1;
 9088	}
 9089
 9090	return __bpf_map__iter(next, obj, -1);
 9091}
 9092
 9093struct bpf_map *
 9094bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
 9095{
 9096	struct bpf_map *pos;
 9097
 9098	bpf_object__for_each_map(pos, obj) {
 9099		/* if it's a special internal map name (which always starts
 9100		 * with dot) then check if that special name matches the
 9101		 * real map name (ELF section name)
 9102		 */
 9103		if (name[0] == '.') {
 9104			if (pos->real_name && strcmp(pos->real_name, name) == 0)
 9105				return pos;
 9106			continue;
 9107		}
 9108		/* otherwise map name has to be an exact match */
 9109		if (map_uses_real_name(pos)) {
 9110			if (strcmp(pos->real_name, name) == 0)
 9111				return pos;
 9112			continue;
 9113		}
 9114		if (strcmp(pos->name, name) == 0)
 9115			return pos;
 9116	}
 9117	return errno = ENOENT, NULL;
 9118}
 9119
 9120int
 9121bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 9122{
 9123	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
 9124}
 9125
 9126struct bpf_map *
 9127bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
 9128{
 9129	return libbpf_err_ptr(-ENOTSUP);
 9130}
 9131
 9132long libbpf_get_error(const void *ptr)
 9133{
 9134	if (!IS_ERR_OR_NULL(ptr))
 9135		return 0;
 9136
 9137	if (IS_ERR(ptr))
 9138		errno = -PTR_ERR(ptr);
 9139
 9140	/* If ptr == NULL, then errno should be already set by the failing
 9141	 * API, because libbpf never returns NULL on success and it now always
 9142	 * sets errno on error. So no extra errno handling for ptr == NULL
 9143	 * case.
 9144	 */
 9145	return -errno;
 9146}
 9147
 9148int bpf_prog_load(const char *file, enum bpf_prog_type type,
 9149		  struct bpf_object **pobj, int *prog_fd)
 9150{
 9151	struct bpf_prog_load_attr attr;
 9152
 9153	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
 9154	attr.file = file;
 9155	attr.prog_type = type;
 9156	attr.expected_attach_type = 0;
 9157
 9158	return bpf_prog_load_xattr(&attr, pobj, prog_fd);
 9159}
 9160
 9161int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 9162			struct bpf_object **pobj, int *prog_fd)
 9163{
 9164	struct bpf_object_open_attr open_attr = {};
 9165	struct bpf_program *prog, *first_prog = NULL;
 9166	struct bpf_object *obj;
 9167	struct bpf_map *map;
 9168	int err;
 9169
 9170	if (!attr)
 9171		return libbpf_err(-EINVAL);
 9172	if (!attr->file)
 9173		return libbpf_err(-EINVAL);
 9174
 9175	open_attr.file = attr->file;
 9176	open_attr.prog_type = attr->prog_type;
 9177
 9178	obj = bpf_object__open_xattr(&open_attr);
 9179	err = libbpf_get_error(obj);
 9180	if (err)
 9181		return libbpf_err(-ENOENT);
 9182
 9183	bpf_object__for_each_program(prog, obj) {
 9184		enum bpf_attach_type attach_type = attr->expected_attach_type;
 9185		/*
 9186		 * to preserve backwards compatibility, bpf_prog_load treats
 9187		 * attr->prog_type, if specified, as an override to whatever
 9188		 * bpf_object__open guessed
 9189		 */
 9190		if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) {
 9191			bpf_program__set_type(prog, attr->prog_type);
 9192			bpf_program__set_expected_attach_type(prog,
 9193							      attach_type);
 9194		}
 9195		if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) {
 9196			/*
 9197			 * we haven't guessed from section name and user
 9198			 * didn't provide a fallback type, too bad...
 9199			 */
 9200			bpf_object__close(obj);
 9201			return libbpf_err(-EINVAL);
 9202		}
 9203
 9204		prog->prog_ifindex = attr->ifindex;
 9205		prog->log_level = attr->log_level;
 9206		prog->prog_flags |= attr->prog_flags;
 9207		if (!first_prog)
 9208			first_prog = prog;
 9209	}
 9210
 9211	bpf_object__for_each_map(map, obj) {
 9212		if (!bpf_map__is_offload_neutral(map))
 9213			map->map_ifindex = attr->ifindex;
 9214	}
 9215
 9216	if (!first_prog) {
 9217		pr_warn("object file doesn't contain bpf program\n");
 9218		bpf_object__close(obj);
 9219		return libbpf_err(-ENOENT);
 9220	}
 9221
 9222	err = bpf_object__load(obj);
 9223	if (err) {
 9224		bpf_object__close(obj);
 9225		return libbpf_err(err);
 9226	}
 9227
 9228	*pobj = obj;
 9229	*prog_fd = bpf_program__fd(first_prog);
 9230	return 0;
 9231}
 9232
 9233struct bpf_link {
 9234	int (*detach)(struct bpf_link *link);
 9235	void (*dealloc)(struct bpf_link *link);
 9236	char *pin_path;		/* NULL, if not pinned */
 9237	int fd;			/* hook FD, -1 if not applicable */
 9238	bool disconnected;
 9239};
 9240
 9241/* Replace link's underlying BPF program with the new one */
 9242int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 9243{
 9244	int ret;
 9245
 9246	ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
 9247	return libbpf_err_errno(ret);
 9248}
 9249
 9250/* Release "ownership" of underlying BPF resource (typically, BPF program
 9251 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
 9252 * link, when destructed through bpf_link__destroy() call won't attempt to
 9253 * detach/unregisted that BPF resource. This is useful in situations where,
 9254 * say, attached BPF program has to outlive userspace program that attached it
 9255 * in the system. Depending on type of BPF program, though, there might be
 9256 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
 9257 * exit of userspace program doesn't trigger automatic detachment and clean up
 9258 * inside the kernel.
 9259 */
 9260void bpf_link__disconnect(struct bpf_link *link)
 9261{
 9262	link->disconnected = true;
 9263}
 9264
 9265int bpf_link__destroy(struct bpf_link *link)
 9266{
 9267	int err = 0;
 9268
 9269	if (IS_ERR_OR_NULL(link))
 9270		return 0;
 9271
 9272	if (!link->disconnected && link->detach)
 9273		err = link->detach(link);
 9274	if (link->pin_path)
 9275		free(link->pin_path);
 9276	if (link->dealloc)
 9277		link->dealloc(link);
 9278	else
 9279		free(link);
 9280
 9281	return libbpf_err(err);
 9282}
 9283
 9284int bpf_link__fd(const struct bpf_link *link)
 9285{
 9286	return link->fd;
 9287}
 9288
 9289const char *bpf_link__pin_path(const struct bpf_link *link)
 9290{
 9291	return link->pin_path;
 9292}
 9293
 9294static int bpf_link__detach_fd(struct bpf_link *link)
 9295{
 9296	return libbpf_err_errno(close(link->fd));
 9297}
 9298
 9299struct bpf_link *bpf_link__open(const char *path)
 9300{
 9301	struct bpf_link *link;
 9302	int fd;
 9303
 9304	fd = bpf_obj_get(path);
 9305	if (fd < 0) {
 9306		fd = -errno;
 9307		pr_warn("failed to open link at %s: %d\n", path, fd);
 9308		return libbpf_err_ptr(fd);
 9309	}
 9310
 9311	link = calloc(1, sizeof(*link));
 9312	if (!link) {
 9313		close(fd);
 9314		return libbpf_err_ptr(-ENOMEM);
 9315	}
 9316	link->detach = &bpf_link__detach_fd;
 9317	link->fd = fd;
 9318
 9319	link->pin_path = strdup(path);
 9320	if (!link->pin_path) {
 9321		bpf_link__destroy(link);
 9322		return libbpf_err_ptr(-ENOMEM);
 9323	}
 9324
 9325	return link;
 9326}
 9327
 9328int bpf_link__detach(struct bpf_link *link)
 9329{
 9330	return bpf_link_detach(link->fd) ? -errno : 0;
 9331}
 9332
 9333int bpf_link__pin(struct bpf_link *link, const char *path)
 9334{
 9335	int err;
 9336
 9337	if (link->pin_path)
 9338		return libbpf_err(-EBUSY);
 9339	err = make_parent_dir(path);
 9340	if (err)
 9341		return libbpf_err(err);
 9342	err = check_path(path);
 9343	if (err)
 9344		return libbpf_err(err);
 9345
 9346	link->pin_path = strdup(path);
 9347	if (!link->pin_path)
 9348		return libbpf_err(-ENOMEM);
 9349
 9350	if (bpf_obj_pin(link->fd, link->pin_path)) {
 9351		err = -errno;
 9352		zfree(&link->pin_path);
 9353		return libbpf_err(err);
 9354	}
 9355
 9356	pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
 9357	return 0;
 9358}
 9359
 9360int bpf_link__unpin(struct bpf_link *link)
 9361{
 9362	int err;
 9363
 9364	if (!link->pin_path)
 9365		return libbpf_err(-EINVAL);
 9366
 9367	err = unlink(link->pin_path);
 9368	if (err != 0)
 9369		return -errno;
 9370
 9371	pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
 9372	zfree(&link->pin_path);
 9373	return 0;
 9374}
 9375
 9376struct bpf_link_perf {
 9377	struct bpf_link link;
 9378	int perf_event_fd;
 9379	/* legacy kprobe support: keep track of probe identifier and type */
 9380	char *legacy_probe_name;
 9381	bool legacy_is_kprobe;
 9382	bool legacy_is_retprobe;
 9383};
 9384
 9385static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
 9386static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
 9387
 9388static int bpf_link_perf_detach(struct bpf_link *link)
 9389{
 9390	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
 9391	int err = 0;
 9392
 9393	if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
 9394		err = -errno;
 9395
 9396	if (perf_link->perf_event_fd != link->fd)
 9397		close(perf_link->perf_event_fd);
 9398	close(link->fd);
 9399
 9400	/* legacy uprobe/kprobe needs to be removed after perf event fd closure */
 9401	if (perf_link->legacy_probe_name) {
 9402		if (perf_link->legacy_is_kprobe) {
 9403			err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
 9404							 perf_link->legacy_is_retprobe);
 9405		} else {
 9406			err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
 9407							 perf_link->legacy_is_retprobe);
 9408		}
 9409	}
 9410
 9411	return err;
 9412}
 9413
 9414static void bpf_link_perf_dealloc(struct bpf_link *link)
 9415{
 9416	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
 9417
 9418	free(perf_link->legacy_probe_name);
 9419	free(perf_link);
 9420}
 9421
 9422struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
 9423						     const struct bpf_perf_event_opts *opts)
 9424{
 9425	char errmsg[STRERR_BUFSIZE];
 9426	struct bpf_link_perf *link;
 9427	int prog_fd, link_fd = -1, err;
 9428
 9429	if (!OPTS_VALID(opts, bpf_perf_event_opts))
 9430		return libbpf_err_ptr(-EINVAL);
 9431
 9432	if (pfd < 0) {
 9433		pr_warn("prog '%s': invalid perf event FD %d\n",
 9434			prog->name, pfd);
 9435		return libbpf_err_ptr(-EINVAL);
 9436	}
 9437	prog_fd = bpf_program__fd(prog);
 9438	if (prog_fd < 0) {
 9439		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
 9440			prog->name);
 9441		return libbpf_err_ptr(-EINVAL);
 9442	}
 9443
 9444	link = calloc(1, sizeof(*link));
 9445	if (!link)
 9446		return libbpf_err_ptr(-ENOMEM);
 9447	link->link.detach = &bpf_link_perf_detach;
 9448	link->link.dealloc = &bpf_link_perf_dealloc;
 9449	link->perf_event_fd = pfd;
 9450
 9451	if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
 9452		DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
 9453			.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
 9454
 9455		link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
 9456		if (link_fd < 0) {
 9457			err = -errno;
 9458			pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
 9459				prog->name, pfd,
 9460				err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9461			goto err_out;
 9462		}
 9463		link->link.fd = link_fd;
 9464	} else {
 9465		if (OPTS_GET(opts, bpf_cookie, 0)) {
 9466			pr_warn("prog '%s': user context value is not supported\n", prog->name);
 9467			err = -EOPNOTSUPP;
 9468			goto err_out;
 9469		}
 9470
 9471		if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
 9472			err = -errno;
 9473			pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
 9474				prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9475			if (err == -EPROTO)
 9476				pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
 9477					prog->name, pfd);
 9478			goto err_out;
 9479		}
 9480		link->link.fd = pfd;
 9481	}
 9482	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
 9483		err = -errno;
 9484		pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
 9485			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9486		goto err_out;
 9487	}
 9488
 9489	return &link->link;
 9490err_out:
 9491	if (link_fd >= 0)
 9492		close(link_fd);
 9493	free(link);
 9494	return libbpf_err_ptr(err);
 9495}
 9496
 9497struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
 9498{
 9499	return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
 9500}
 9501
 9502/*
 9503 * this function is expected to parse integer in the range of [0, 2^31-1] from
 9504 * given file using scanf format string fmt. If actual parsed value is
 9505 * negative, the result might be indistinguishable from error
 9506 */
 9507static int parse_uint_from_file(const char *file, const char *fmt)
 9508{
 9509	char buf[STRERR_BUFSIZE];
 9510	int err, ret;
 9511	FILE *f;
 9512
 9513	f = fopen(file, "r");
 9514	if (!f) {
 9515		err = -errno;
 9516		pr_debug("failed to open '%s': %s\n", file,
 9517			 libbpf_strerror_r(err, buf, sizeof(buf)));
 9518		return err;
 9519	}
 9520	err = fscanf(f, fmt, &ret);
 9521	if (err != 1) {
 9522		err = err == EOF ? -EIO : -errno;
 9523		pr_debug("failed to parse '%s': %s\n", file,
 9524			libbpf_strerror_r(err, buf, sizeof(buf)));
 9525		fclose(f);
 9526		return err;
 9527	}
 9528	fclose(f);
 9529	return ret;
 9530}
 9531
 9532static int determine_kprobe_perf_type(void)
 9533{
 9534	const char *file = "/sys/bus/event_source/devices/kprobe/type";
 9535
 9536	return parse_uint_from_file(file, "%d\n");
 9537}
 9538
 9539static int determine_uprobe_perf_type(void)
 9540{
 9541	const char *file = "/sys/bus/event_source/devices/uprobe/type";
 9542
 9543	return parse_uint_from_file(file, "%d\n");
 9544}
 9545
 9546static int determine_kprobe_retprobe_bit(void)
 9547{
 9548	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
 9549
 9550	return parse_uint_from_file(file, "config:%d\n");
 9551}
 9552
 9553static int determine_uprobe_retprobe_bit(void)
 9554{
 9555	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
 9556
 9557	return parse_uint_from_file(file, "config:%d\n");
 9558}
 9559
 9560#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
 9561#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
 9562
 9563static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 9564				 uint64_t offset, int pid, size_t ref_ctr_off)
 9565{
 9566	struct perf_event_attr attr = {};
 9567	char errmsg[STRERR_BUFSIZE];
 9568	int type, pfd, err;
 9569
 9570	if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
 9571		return -EINVAL;
 9572
 9573	type = uprobe ? determine_uprobe_perf_type()
 9574		      : determine_kprobe_perf_type();
 9575	if (type < 0) {
 9576		pr_warn("failed to determine %s perf type: %s\n",
 9577			uprobe ? "uprobe" : "kprobe",
 9578			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
 9579		return type;
 9580	}
 9581	if (retprobe) {
 9582		int bit = uprobe ? determine_uprobe_retprobe_bit()
 9583				 : determine_kprobe_retprobe_bit();
 9584
 9585		if (bit < 0) {
 9586			pr_warn("failed to determine %s retprobe bit: %s\n",
 9587				uprobe ? "uprobe" : "kprobe",
 9588				libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
 9589			return bit;
 9590		}
 9591		attr.config |= 1 << bit;
 9592	}
 9593	attr.size = sizeof(attr);
 9594	attr.type = type;
 9595	attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
 9596	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
 9597	attr.config2 = offset;		 /* kprobe_addr or probe_offset */
 9598
 9599	/* pid filter is meaningful only for uprobes */
 9600	pfd = syscall(__NR_perf_event_open, &attr,
 9601		      pid < 0 ? -1 : pid /* pid */,
 9602		      pid == -1 ? 0 : -1 /* cpu */,
 9603		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
 9604	if (pfd < 0) {
 9605		err = -errno;
 9606		pr_warn("%s perf_event_open() failed: %s\n",
 9607			uprobe ? "uprobe" : "kprobe",
 9608			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9609		return err;
 9610	}
 9611	return pfd;
 9612}
 9613
 9614static int append_to_file(const char *file, const char *fmt, ...)
 9615{
 9616	int fd, n, err = 0;
 9617	va_list ap;
 9618
 9619	fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
 9620	if (fd < 0)
 9621		return -errno;
 9622
 9623	va_start(ap, fmt);
 9624	n = vdprintf(fd, fmt, ap);
 9625	va_end(ap);
 9626
 9627	if (n < 0)
 9628		err = -errno;
 9629
 9630	close(fd);
 9631	return err;
 9632}
 9633
 9634static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
 9635					 const char *kfunc_name, size_t offset)
 9636{
 9637	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset);
 9638}
 9639
 9640static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
 9641				   const char *kfunc_name, size_t offset)
 9642{
 9643	const char *file = "/sys/kernel/debug/tracing/kprobe_events";
 9644
 9645	return append_to_file(file, "%c:%s/%s %s+0x%zx",
 9646			      retprobe ? 'r' : 'p',
 9647			      retprobe ? "kretprobes" : "kprobes",
 9648			      probe_name, kfunc_name, offset);
 9649}
 9650
 9651static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
 9652{
 9653	const char *file = "/sys/kernel/debug/tracing/kprobe_events";
 9654
 9655	return append_to_file(file, "-:%s/%s", retprobe ? "kretprobes" : "kprobes", probe_name);
 9656}
 9657
 9658static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
 9659{
 9660	char file[256];
 9661
 9662	snprintf(file, sizeof(file),
 9663		 "/sys/kernel/debug/tracing/events/%s/%s/id",
 9664		 retprobe ? "kretprobes" : "kprobes", probe_name);
 9665
 9666	return parse_uint_from_file(file, "%d\n");
 9667}
 9668
 9669static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
 9670					 const char *kfunc_name, size_t offset, int pid)
 9671{
 9672	struct perf_event_attr attr = {};
 9673	char errmsg[STRERR_BUFSIZE];
 9674	int type, pfd, err;
 9675
 9676	err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
 9677	if (err < 0) {
 9678		pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
 9679			kfunc_name, offset,
 9680			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9681		return err;
 9682	}
 9683	type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
 9684	if (type < 0) {
 9685		pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
 9686			kfunc_name, offset,
 9687			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
 9688		return type;
 9689	}
 9690	attr.size = sizeof(attr);
 9691	attr.config = type;
 9692	attr.type = PERF_TYPE_TRACEPOINT;
 9693
 9694	pfd = syscall(__NR_perf_event_open, &attr,
 9695		      pid < 0 ? -1 : pid, /* pid */
 9696		      pid == -1 ? 0 : -1, /* cpu */
 9697		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
 9698	if (pfd < 0) {
 9699		err = -errno;
 9700		pr_warn("legacy kprobe perf_event_open() failed: %s\n",
 9701			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9702		return err;
 9703	}
 9704	return pfd;
 9705}
 9706
 9707struct bpf_link *
 9708bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
 9709				const char *func_name,
 9710				const struct bpf_kprobe_opts *opts)
 9711{
 9712	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
 9713	char errmsg[STRERR_BUFSIZE];
 9714	char *legacy_probe = NULL;
 9715	struct bpf_link *link;
 9716	size_t offset;
 9717	bool retprobe, legacy;
 9718	int pfd, err;
 9719
 9720	if (!OPTS_VALID(opts, bpf_kprobe_opts))
 9721		return libbpf_err_ptr(-EINVAL);
 9722
 9723	retprobe = OPTS_GET(opts, retprobe, false);
 9724	offset = OPTS_GET(opts, offset, 0);
 9725	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
 9726
 9727	legacy = determine_kprobe_perf_type() < 0;
 9728	if (!legacy) {
 9729		pfd = perf_event_open_probe(false /* uprobe */, retprobe,
 9730					    func_name, offset,
 9731					    -1 /* pid */, 0 /* ref_ctr_off */);
 9732	} else {
 9733		char probe_name[256];
 9734
 9735		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
 9736					     func_name, offset);
 9737
 9738		legacy_probe = strdup(func_name);
 9739		if (!legacy_probe)
 9740			return libbpf_err_ptr(-ENOMEM);
 9741
 9742		pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
 9743						    offset, -1 /* pid */);
 9744	}
 9745	if (pfd < 0) {
 9746		err = -errno;
 9747		pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
 9748			prog->name, retprobe ? "kretprobe" : "kprobe",
 9749			func_name, offset,
 9750			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9751		goto err_out;
 9752	}
 9753	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
 9754	err = libbpf_get_error(link);
 9755	if (err) {
 9756		close(pfd);
 9757		pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
 9758			prog->name, retprobe ? "kretprobe" : "kprobe",
 9759			func_name, offset,
 9760			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9761		goto err_out;
 9762	}
 9763	if (legacy) {
 9764		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
 9765
 9766		perf_link->legacy_probe_name = legacy_probe;
 9767		perf_link->legacy_is_kprobe = true;
 9768		perf_link->legacy_is_retprobe = retprobe;
 9769	}
 9770
 9771	return link;
 9772err_out:
 9773	free(legacy_probe);
 9774	return libbpf_err_ptr(err);
 9775}
 9776
 9777struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
 9778					    bool retprobe,
 9779					    const char *func_name)
 9780{
 9781	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
 9782		.retprobe = retprobe,
 9783	);
 9784
 9785	return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
 9786}
 9787
 9788static struct bpf_link *attach_kprobe(const struct bpf_program *prog, long cookie)
 9789{
 9790	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
 9791	unsigned long offset = 0;
 9792	struct bpf_link *link;
 9793	const char *func_name;
 9794	char *func;
 9795	int n, err;
 9796
 9797	opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
 9798	if (opts.retprobe)
 9799		func_name = prog->sec_name + sizeof("kretprobe/") - 1;
 9800	else
 9801		func_name = prog->sec_name + sizeof("kprobe/") - 1;
 9802
 9803	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
 9804	if (n < 1) {
 9805		err = -EINVAL;
 9806		pr_warn("kprobe name is invalid: %s\n", func_name);
 9807		return libbpf_err_ptr(err);
 9808	}
 9809	if (opts.retprobe && offset != 0) {
 9810		free(func);
 9811		err = -EINVAL;
 9812		pr_warn("kretprobes do not support offset specification\n");
 9813		return libbpf_err_ptr(err);
 9814	}
 9815
 9816	opts.offset = offset;
 9817	link = bpf_program__attach_kprobe_opts(prog, func, &opts);
 9818	free(func);
 9819	return link;
 9820}
 9821
 9822static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
 9823					 const char *binary_path, uint64_t offset)
 9824{
 9825	int i;
 9826
 9827	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
 9828
 9829	/* sanitize binary_path in the probe name */
 9830	for (i = 0; buf[i]; i++) {
 9831		if (!isalnum(buf[i]))
 9832			buf[i] = '_';
 9833	}
 9834}
 9835
 9836static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
 9837					  const char *binary_path, size_t offset)
 9838{
 9839	const char *file = "/sys/kernel/debug/tracing/uprobe_events";
 9840
 9841	return append_to_file(file, "%c:%s/%s %s:0x%zx",
 9842			      retprobe ? 'r' : 'p',
 9843			      retprobe ? "uretprobes" : "uprobes",
 9844			      probe_name, binary_path, offset);
 9845}
 9846
 9847static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
 9848{
 9849	const char *file = "/sys/kernel/debug/tracing/uprobe_events";
 9850
 9851	return append_to_file(file, "-:%s/%s", retprobe ? "uretprobes" : "uprobes", probe_name);
 9852}
 9853
 9854static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
 9855{
 9856	char file[512];
 9857
 9858	snprintf(file, sizeof(file),
 9859		 "/sys/kernel/debug/tracing/events/%s/%s/id",
 9860		 retprobe ? "uretprobes" : "uprobes", probe_name);
 9861
 9862	return parse_uint_from_file(file, "%d\n");
 9863}
 9864
 9865static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
 9866					 const char *binary_path, size_t offset, int pid)
 9867{
 9868	struct perf_event_attr attr;
 9869	int type, pfd, err;
 9870
 9871	err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
 9872	if (err < 0) {
 9873		pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
 9874			binary_path, (size_t)offset, err);
 9875		return err;
 9876	}
 9877	type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
 9878	if (type < 0) {
 9879		pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
 9880			binary_path, offset, err);
 9881		return type;
 9882	}
 9883
 9884	memset(&attr, 0, sizeof(attr));
 9885	attr.size = sizeof(attr);
 9886	attr.config = type;
 9887	attr.type = PERF_TYPE_TRACEPOINT;
 9888
 9889	pfd = syscall(__NR_perf_event_open, &attr,
 9890		      pid < 0 ? -1 : pid, /* pid */
 9891		      pid == -1 ? 0 : -1, /* cpu */
 9892		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
 9893	if (pfd < 0) {
 9894		err = -errno;
 9895		pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
 9896		return err;
 9897	}
 9898	return pfd;
 9899}
 9900
 9901LIBBPF_API struct bpf_link *
 9902bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
 9903				const char *binary_path, size_t func_offset,
 9904				const struct bpf_uprobe_opts *opts)
 9905{
 9906	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
 9907	char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
 9908	struct bpf_link *link;
 9909	size_t ref_ctr_off;
 9910	int pfd, err;
 9911	bool retprobe, legacy;
 9912
 9913	if (!OPTS_VALID(opts, bpf_uprobe_opts))
 9914		return libbpf_err_ptr(-EINVAL);
 9915
 9916	retprobe = OPTS_GET(opts, retprobe, false);
 9917	ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
 9918	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
 9919
 9920	legacy = determine_uprobe_perf_type() < 0;
 9921	if (!legacy) {
 9922		pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
 9923					    func_offset, pid, ref_ctr_off);
 9924	} else {
 9925		char probe_name[512];
 9926
 9927		if (ref_ctr_off)
 9928			return libbpf_err_ptr(-EINVAL);
 9929
 9930		gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
 9931					     binary_path, func_offset);
 9932
 9933		legacy_probe = strdup(probe_name);
 9934		if (!legacy_probe)
 9935			return libbpf_err_ptr(-ENOMEM);
 9936
 9937		pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
 9938						    binary_path, func_offset, pid);
 9939	}
 9940	if (pfd < 0) {
 9941		err = -errno;
 9942		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
 9943			prog->name, retprobe ? "uretprobe" : "uprobe",
 9944			binary_path, func_offset,
 9945			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9946		goto err_out;
 9947	}
 9948
 9949	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
 9950	err = libbpf_get_error(link);
 9951	if (err) {
 9952		close(pfd);
 9953		pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
 9954			prog->name, retprobe ? "uretprobe" : "uprobe",
 9955			binary_path, func_offset,
 9956			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9957		goto err_out;
 9958	}
 9959	if (legacy) {
 9960		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
 9961
 9962		perf_link->legacy_probe_name = legacy_probe;
 9963		perf_link->legacy_is_kprobe = false;
 9964		perf_link->legacy_is_retprobe = retprobe;
 9965	}
 9966	return link;
 9967err_out:
 9968	free(legacy_probe);
 9969	return libbpf_err_ptr(err);
 9970
 9971}
 9972
 9973struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
 9974					    bool retprobe, pid_t pid,
 9975					    const char *binary_path,
 9976					    size_t func_offset)
 9977{
 9978	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
 9979
 9980	return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
 9981}
 9982
 9983static int determine_tracepoint_id(const char *tp_category,
 9984				   const char *tp_name)
 9985{
 9986	char file[PATH_MAX];
 9987	int ret;
 9988
 9989	ret = snprintf(file, sizeof(file),
 9990		       "/sys/kernel/debug/tracing/events/%s/%s/id",
 9991		       tp_category, tp_name);
 9992	if (ret < 0)
 9993		return -errno;
 9994	if (ret >= sizeof(file)) {
 9995		pr_debug("tracepoint %s/%s path is too long\n",
 9996			 tp_category, tp_name);
 9997		return -E2BIG;
 9998	}
 9999	return parse_uint_from_file(file, "%d\n");
10000}
10001
10002static int perf_event_open_tracepoint(const char *tp_category,
10003				      const char *tp_name)
10004{
10005	struct perf_event_attr attr = {};
10006	char errmsg[STRERR_BUFSIZE];
10007	int tp_id, pfd, err;
10008
10009	tp_id = determine_tracepoint_id(tp_category, tp_name);
10010	if (tp_id < 0) {
10011		pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
10012			tp_category, tp_name,
10013			libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
10014		return tp_id;
10015	}
10016
10017	attr.type = PERF_TYPE_TRACEPOINT;
10018	attr.size = sizeof(attr);
10019	attr.config = tp_id;
10020
10021	pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
10022		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10023	if (pfd < 0) {
10024		err = -errno;
10025		pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
10026			tp_category, tp_name,
10027			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10028		return err;
10029	}
10030	return pfd;
10031}
10032
10033struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
10034						     const char *tp_category,
10035						     const char *tp_name,
10036						     const struct bpf_tracepoint_opts *opts)
10037{
10038	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10039	char errmsg[STRERR_BUFSIZE];
10040	struct bpf_link *link;
10041	int pfd, err;
10042
10043	if (!OPTS_VALID(opts, bpf_tracepoint_opts))
10044		return libbpf_err_ptr(-EINVAL);
10045
10046	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10047
10048	pfd = perf_event_open_tracepoint(tp_category, tp_name);
10049	if (pfd < 0) {
10050		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
10051			prog->name, tp_category, tp_name,
10052			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10053		return libbpf_err_ptr(pfd);
10054	}
10055	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10056	err = libbpf_get_error(link);
10057	if (err) {
10058		close(pfd);
10059		pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
10060			prog->name, tp_category, tp_name,
10061			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10062		return libbpf_err_ptr(err);
10063	}
10064	return link;
10065}
10066
10067struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
10068						const char *tp_category,
10069						const char *tp_name)
10070{
10071	return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
10072}
10073
10074static struct bpf_link *attach_tp(const struct bpf_program *prog, long cookie)
10075{
10076	char *sec_name, *tp_cat, *tp_name;
10077	struct bpf_link *link;
10078
10079	sec_name = strdup(prog->sec_name);
10080	if (!sec_name)
10081		return libbpf_err_ptr(-ENOMEM);
10082
10083	/* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
10084	if (str_has_pfx(prog->sec_name, "tp/"))
10085		tp_cat = sec_name + sizeof("tp/") - 1;
10086	else
10087		tp_cat = sec_name + sizeof("tracepoint/") - 1;
10088	tp_name = strchr(tp_cat, '/');
10089	if (!tp_name) {
10090		free(sec_name);
10091		return libbpf_err_ptr(-EINVAL);
10092	}
10093	*tp_name = '\0';
10094	tp_name++;
10095
10096	link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
10097	free(sec_name);
10098	return link;
10099}
10100
10101struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
10102						    const char *tp_name)
10103{
10104	char errmsg[STRERR_BUFSIZE];
10105	struct bpf_link *link;
10106	int prog_fd, pfd;
10107
10108	prog_fd = bpf_program__fd(prog);
10109	if (prog_fd < 0) {
10110		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10111		return libbpf_err_ptr(-EINVAL);
10112	}
10113
10114	link = calloc(1, sizeof(*link));
10115	if (!link)
10116		return libbpf_err_ptr(-ENOMEM);
10117	link->detach = &bpf_link__detach_fd;
10118
10119	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
10120	if (pfd < 0) {
10121		pfd = -errno;
10122		free(link);
10123		pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
10124			prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10125		return libbpf_err_ptr(pfd);
10126	}
10127	link->fd = pfd;
10128	return link;
10129}
10130
10131static struct bpf_link *attach_raw_tp(const struct bpf_program *prog, long cookie)
10132{
10133	static const char *const prefixes[] = {
10134		"raw_tp/",
10135		"raw_tracepoint/",
10136		"raw_tp.w/",
10137		"raw_tracepoint.w/",
10138	};
10139	size_t i;
10140	const char *tp_name = NULL;
10141
10142	for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
10143		if (str_has_pfx(prog->sec_name, prefixes[i])) {
10144			tp_name = prog->sec_name + strlen(prefixes[i]);
10145			break;
10146		}
10147	}
10148	if (!tp_name) {
10149		pr_warn("prog '%s': invalid section name '%s'\n",
10150			prog->name, prog->sec_name);
10151		return libbpf_err_ptr(-EINVAL);
10152	}
10153
10154	return bpf_program__attach_raw_tracepoint(prog, tp_name);
10155}
10156
10157/* Common logic for all BPF program types that attach to a btf_id */
10158static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog)
10159{
10160	char errmsg[STRERR_BUFSIZE];
10161	struct bpf_link *link;
10162	int prog_fd, pfd;
10163
10164	prog_fd = bpf_program__fd(prog);
10165	if (prog_fd < 0) {
10166		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10167		return libbpf_err_ptr(-EINVAL);
10168	}
10169
10170	link = calloc(1, sizeof(*link));
10171	if (!link)
10172		return libbpf_err_ptr(-ENOMEM);
10173	link->detach = &bpf_link__detach_fd;
10174
10175	pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
10176	if (pfd < 0) {
10177		pfd = -errno;
10178		free(link);
10179		pr_warn("prog '%s': failed to attach: %s\n",
10180			prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
10181		return libbpf_err_ptr(pfd);
10182	}
10183	link->fd = pfd;
10184	return (struct bpf_link *)link;
10185}
10186
10187struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
10188{
10189	return bpf_program__attach_btf_id(prog);
10190}
10191
10192struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
10193{
10194	return bpf_program__attach_btf_id(prog);
10195}
10196
10197static struct bpf_link *attach_trace(const struct bpf_program *prog, long cookie)
10198{
10199	return bpf_program__attach_trace(prog);
10200}
10201
10202static struct bpf_link *attach_lsm(const struct bpf_program *prog, long cookie)
10203{
10204	return bpf_program__attach_lsm(prog);
10205}
10206
10207static struct bpf_link *
10208bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
10209		       const char *target_name)
10210{
10211	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
10212			    .target_btf_id = btf_id);
10213	enum bpf_attach_type attach_type;
10214	char errmsg[STRERR_BUFSIZE];
10215	struct bpf_link *link;
10216	int prog_fd, link_fd;
10217
10218	prog_fd = bpf_program__fd(prog);
10219	if (prog_fd < 0) {
10220		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10221		return libbpf_err_ptr(-EINVAL);
10222	}
10223
10224	link = calloc(1, sizeof(*link));
10225	if (!link)
10226		return libbpf_err_ptr(-ENOMEM);
10227	link->detach = &bpf_link__detach_fd;
10228
10229	attach_type = bpf_program__get_expected_attach_type(prog);
10230	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
10231	if (link_fd < 0) {
10232		link_fd = -errno;
10233		free(link);
10234		pr_warn("prog '%s': failed to attach to %s: %s\n",
10235			prog->name, target_name,
10236			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10237		return libbpf_err_ptr(link_fd);
10238	}
10239	link->fd = link_fd;
10240	return link;
10241}
10242
10243struct bpf_link *
10244bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
10245{
10246	return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
10247}
10248
10249struct bpf_link *
10250bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
10251{
10252	return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
10253}
10254
10255struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
10256{
10257	/* target_fd/target_ifindex use the same field in LINK_CREATE */
10258	return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
10259}
10260
10261struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
10262					      int target_fd,
10263					      const char *attach_func_name)
10264{
10265	int btf_id;
10266
10267	if (!!target_fd != !!attach_func_name) {
10268		pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
10269			prog->name);
10270		return libbpf_err_ptr(-EINVAL);
10271	}
10272
10273	if (prog->type != BPF_PROG_TYPE_EXT) {
10274		pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
10275			prog->name);
10276		return libbpf_err_ptr(-EINVAL);
10277	}
10278
10279	if (target_fd) {
10280		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
10281		if (btf_id < 0)
10282			return libbpf_err_ptr(btf_id);
10283
10284		return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
10285	} else {
10286		/* no target, so use raw_tracepoint_open for compatibility
10287		 * with old kernels
10288		 */
10289		return bpf_program__attach_trace(prog);
10290	}
10291}
10292
10293struct bpf_link *
10294bpf_program__attach_iter(const struct bpf_program *prog,
10295			 const struct bpf_iter_attach_opts *opts)
10296{
10297	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
10298	char errmsg[STRERR_BUFSIZE];
10299	struct bpf_link *link;
10300	int prog_fd, link_fd;
10301	__u32 target_fd = 0;
10302
10303	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
10304		return libbpf_err_ptr(-EINVAL);
10305
10306	link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
10307	link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
10308
10309	prog_fd = bpf_program__fd(prog);
10310	if (prog_fd < 0) {
10311		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
10312		return libbpf_err_ptr(-EINVAL);
10313	}
10314
10315	link = calloc(1, sizeof(*link));
10316	if (!link)
10317		return libbpf_err_ptr(-ENOMEM);
10318	link->detach = &bpf_link__detach_fd;
10319
10320	link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
10321				  &link_create_opts);
10322	if (link_fd < 0) {
10323		link_fd = -errno;
10324		free(link);
10325		pr_warn("prog '%s': failed to attach to iterator: %s\n",
10326			prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
10327		return libbpf_err_ptr(link_fd);
10328	}
10329	link->fd = link_fd;
10330	return link;
10331}
10332
10333static struct bpf_link *attach_iter(const struct bpf_program *prog, long cookie)
10334{
10335	return bpf_program__attach_iter(prog, NULL);
10336}
10337
10338struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
10339{
10340	if (!prog->sec_def || !prog->sec_def->attach_fn)
10341		return libbpf_err_ptr(-ESRCH);
10342
10343	return prog->sec_def->attach_fn(prog, prog->sec_def->cookie);
10344}
10345
10346static int bpf_link__detach_struct_ops(struct bpf_link *link)
10347{
10348	__u32 zero = 0;
10349
10350	if (bpf_map_delete_elem(link->fd, &zero))
10351		return -errno;
10352
10353	return 0;
10354}
10355
10356struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
10357{
10358	struct bpf_struct_ops *st_ops;
10359	struct bpf_link *link;
10360	__u32 i, zero = 0;
10361	int err;
10362
10363	if (!bpf_map__is_struct_ops(map) || map->fd == -1)
10364		return libbpf_err_ptr(-EINVAL);
10365
10366	link = calloc(1, sizeof(*link));
10367	if (!link)
10368		return libbpf_err_ptr(-EINVAL);
10369
10370	st_ops = map->st_ops;
10371	for (i = 0; i < btf_vlen(st_ops->type); i++) {
10372		struct bpf_program *prog = st_ops->progs[i];
10373		void *kern_data;
10374		int prog_fd;
10375
10376		if (!prog)
10377			continue;
10378
10379		prog_fd = bpf_program__fd(prog);
10380		kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
10381		*(unsigned long *)kern_data = prog_fd;
10382	}
10383
10384	err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
10385	if (err) {
10386		err = -errno;
10387		free(link);
10388		return libbpf_err_ptr(err);
10389	}
10390
10391	link->detach = bpf_link__detach_struct_ops;
10392	link->fd = map->fd;
10393
10394	return link;
10395}
10396
10397enum bpf_perf_event_ret
10398bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
10399			   void **copy_mem, size_t *copy_size,
10400			   bpf_perf_event_print_t fn, void *private_data)
10401{
10402	struct perf_event_mmap_page *header = mmap_mem;
10403	__u64 data_head = ring_buffer_read_head(header);
10404	__u64 data_tail = header->data_tail;
10405	void *base = ((__u8 *)header) + page_size;
10406	int ret = LIBBPF_PERF_EVENT_CONT;
10407	struct perf_event_header *ehdr;
10408	size_t ehdr_size;
10409
10410	while (data_head != data_tail) {
10411		ehdr = base + (data_tail & (mmap_size - 1));
10412		ehdr_size = ehdr->size;
10413
10414		if (((void *)ehdr) + ehdr_size > base + mmap_size) {
10415			void *copy_start = ehdr;
10416			size_t len_first = base + mmap_size - copy_start;
10417			size_t len_secnd = ehdr_size - len_first;
10418
10419			if (*copy_size < ehdr_size) {
10420				free(*copy_mem);
10421				*copy_mem = malloc(ehdr_size);
10422				if (!*copy_mem) {
10423					*copy_size = 0;
10424					ret = LIBBPF_PERF_EVENT_ERROR;
10425					break;
10426				}
10427				*copy_size = ehdr_size;
10428			}
10429
10430			memcpy(*copy_mem, copy_start, len_first);
10431			memcpy(*copy_mem + len_first, base, len_secnd);
10432			ehdr = *copy_mem;
10433		}
10434
10435		ret = fn(ehdr, private_data);
10436		data_tail += ehdr_size;
10437		if (ret != LIBBPF_PERF_EVENT_CONT)
10438			break;
10439	}
10440
10441	ring_buffer_write_tail(header, data_tail);
10442	return libbpf_err(ret);
10443}
10444
10445struct perf_buffer;
10446
10447struct perf_buffer_params {
10448	struct perf_event_attr *attr;
10449	/* if event_cb is specified, it takes precendence */
10450	perf_buffer_event_fn event_cb;
10451	/* sample_cb and lost_cb are higher-level common-case callbacks */
10452	perf_buffer_sample_fn sample_cb;
10453	perf_buffer_lost_fn lost_cb;
10454	void *ctx;
10455	int cpu_cnt;
10456	int *cpus;
10457	int *map_keys;
10458};
10459
10460struct perf_cpu_buf {
10461	struct perf_buffer *pb;
10462	void *base; /* mmap()'ed memory */
10463	void *buf; /* for reconstructing segmented data */
10464	size_t buf_size;
10465	int fd;
10466	int cpu;
10467	int map_key;
10468};
10469
10470struct perf_buffer {
10471	perf_buffer_event_fn event_cb;
10472	perf_buffer_sample_fn sample_cb;
10473	perf_buffer_lost_fn lost_cb;
10474	void *ctx; /* passed into callbacks */
10475
10476	size_t page_size;
10477	size_t mmap_size;
10478	struct perf_cpu_buf **cpu_bufs;
10479	struct epoll_event *events;
10480	int cpu_cnt; /* number of allocated CPU buffers */
10481	int epoll_fd; /* perf event FD */
10482	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
10483};
10484
10485static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
10486				      struct perf_cpu_buf *cpu_buf)
10487{
10488	if (!cpu_buf)
10489		return;
10490	if (cpu_buf->base &&
10491	    munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
10492		pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
10493	if (cpu_buf->fd >= 0) {
10494		ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
10495		close(cpu_buf->fd);
10496	}
10497	free(cpu_buf->buf);
10498	free(cpu_buf);
10499}
10500
10501void perf_buffer__free(struct perf_buffer *pb)
10502{
10503	int i;
10504
10505	if (IS_ERR_OR_NULL(pb))
10506		return;
10507	if (pb->cpu_bufs) {
10508		for (i = 0; i < pb->cpu_cnt; i++) {
10509			struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10510
10511			if (!cpu_buf)
10512				continue;
10513
10514			bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
10515			perf_buffer__free_cpu_buf(pb, cpu_buf);
10516		}
10517		free(pb->cpu_bufs);
10518	}
10519	if (pb->epoll_fd >= 0)
10520		close(pb->epoll_fd);
10521	free(pb->events);
10522	free(pb);
10523}
10524
10525static struct perf_cpu_buf *
10526perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
10527			  int cpu, int map_key)
10528{
10529	struct perf_cpu_buf *cpu_buf;
10530	char msg[STRERR_BUFSIZE];
10531	int err;
10532
10533	cpu_buf = calloc(1, sizeof(*cpu_buf));
10534	if (!cpu_buf)
10535		return ERR_PTR(-ENOMEM);
10536
10537	cpu_buf->pb = pb;
10538	cpu_buf->cpu = cpu;
10539	cpu_buf->map_key = map_key;
10540
10541	cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
10542			      -1, PERF_FLAG_FD_CLOEXEC);
10543	if (cpu_buf->fd < 0) {
10544		err = -errno;
10545		pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
10546			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10547		goto error;
10548	}
10549
10550	cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
10551			     PROT_READ | PROT_WRITE, MAP_SHARED,
10552			     cpu_buf->fd, 0);
10553	if (cpu_buf->base == MAP_FAILED) {
10554		cpu_buf->base = NULL;
10555		err = -errno;
10556		pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
10557			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10558		goto error;
10559	}
10560
10561	if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10562		err = -errno;
10563		pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
10564			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
10565		goto error;
10566	}
10567
10568	return cpu_buf;
10569
10570error:
10571	perf_buffer__free_cpu_buf(pb, cpu_buf);
10572	return (struct perf_cpu_buf *)ERR_PTR(err);
10573}
10574
10575static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10576					      struct perf_buffer_params *p);
10577
10578struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
10579				     const struct perf_buffer_opts *opts)
10580{
10581	struct perf_buffer_params p = {};
10582	struct perf_event_attr attr = { 0, };
10583
10584	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
10585	attr.type = PERF_TYPE_SOFTWARE;
10586	attr.sample_type = PERF_SAMPLE_RAW;
10587	attr.sample_period = 1;
10588	attr.wakeup_events = 1;
10589
10590	p.attr = &attr;
10591	p.sample_cb = opts ? opts->sample_cb : NULL;
10592	p.lost_cb = opts ? opts->lost_cb : NULL;
10593	p.ctx = opts ? opts->ctx : NULL;
10594
10595	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
10596}
10597
10598struct perf_buffer *
10599perf_buffer__new_raw(int map_fd, size_t page_cnt,
10600		     const struct perf_buffer_raw_opts *opts)
10601{
10602	struct perf_buffer_params p = {};
10603
10604	p.attr = opts->attr;
10605	p.event_cb = opts->event_cb;
10606	p.ctx = opts->ctx;
10607	p.cpu_cnt = opts->cpu_cnt;
10608	p.cpus = opts->cpus;
10609	p.map_keys = opts->map_keys;
10610
10611	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
10612}
10613
10614static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
10615					      struct perf_buffer_params *p)
10616{
10617	const char *online_cpus_file = "/sys/devices/system/cpu/online";
10618	struct bpf_map_info map;
10619	char msg[STRERR_BUFSIZE];
10620	struct perf_buffer *pb;
10621	bool *online = NULL;
10622	__u32 map_info_len;
10623	int err, i, j, n;
10624
10625	if (page_cnt & (page_cnt - 1)) {
10626		pr_warn("page count should be power of two, but is %zu\n",
10627			page_cnt);
10628		return ERR_PTR(-EINVAL);
10629	}
10630
10631	/* best-effort sanity checks */
10632	memset(&map, 0, sizeof(map));
10633	map_info_len = sizeof(map);
10634	err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
10635	if (err) {
10636		err = -errno;
10637		/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
10638		 * -EBADFD, -EFAULT, or -E2BIG on real error
10639		 */
10640		if (err != -EINVAL) {
10641			pr_warn("failed to get map info for map FD %d: %s\n",
10642				map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
10643			return ERR_PTR(err);
10644		}
10645		pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
10646			 map_fd);
10647	} else {
10648		if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
10649			pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
10650				map.name);
10651			return ERR_PTR(-EINVAL);
10652		}
10653	}
10654
10655	pb = calloc(1, sizeof(*pb));
10656	if (!pb)
10657		return ERR_PTR(-ENOMEM);
10658
10659	pb->event_cb = p->event_cb;
10660	pb->sample_cb = p->sample_cb;
10661	pb->lost_cb = p->lost_cb;
10662	pb->ctx = p->ctx;
10663
10664	pb->page_size = getpagesize();
10665	pb->mmap_size = pb->page_size * page_cnt;
10666	pb->map_fd = map_fd;
10667
10668	pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
10669	if (pb->epoll_fd < 0) {
10670		err = -errno;
10671		pr_warn("failed to create epoll instance: %s\n",
10672			libbpf_strerror_r(err, msg, sizeof(msg)));
10673		goto error;
10674	}
10675
10676	if (p->cpu_cnt > 0) {
10677		pb->cpu_cnt = p->cpu_cnt;
10678	} else {
10679		pb->cpu_cnt = libbpf_num_possible_cpus();
10680		if (pb->cpu_cnt < 0) {
10681			err = pb->cpu_cnt;
10682			goto error;
10683		}
10684		if (map.max_entries && map.max_entries < pb->cpu_cnt)
10685			pb->cpu_cnt = map.max_entries;
10686	}
10687
10688	pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
10689	if (!pb->events) {
10690		err = -ENOMEM;
10691		pr_warn("failed to allocate events: out of memory\n");
10692		goto error;
10693	}
10694	pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
10695	if (!pb->cpu_bufs) {
10696		err = -ENOMEM;
10697		pr_warn("failed to allocate buffers: out of memory\n");
10698		goto error;
10699	}
10700
10701	err = parse_cpu_mask_file(online_cpus_file, &online, &n);
10702	if (err) {
10703		pr_warn("failed to get online CPU mask: %d\n", err);
10704		goto error;
10705	}
10706
10707	for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
10708		struct perf_cpu_buf *cpu_buf;
10709		int cpu, map_key;
10710
10711		cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
10712		map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
10713
10714		/* in case user didn't explicitly requested particular CPUs to
10715		 * be attached to, skip offline/not present CPUs
10716		 */
10717		if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
10718			continue;
10719
10720		cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
10721		if (IS_ERR(cpu_buf)) {
10722			err = PTR_ERR(cpu_buf);
10723			goto error;
10724		}
10725
10726		pb->cpu_bufs[j] = cpu_buf;
10727
10728		err = bpf_map_update_elem(pb->map_fd, &map_key,
10729					  &cpu_buf->fd, 0);
10730		if (err) {
10731			err = -errno;
10732			pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
10733				cpu, map_key, cpu_buf->fd,
10734				libbpf_strerror_r(err, msg, sizeof(msg)));
10735			goto error;
10736		}
10737
10738		pb->events[j].events = EPOLLIN;
10739		pb->events[j].data.ptr = cpu_buf;
10740		if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
10741			      &pb->events[j]) < 0) {
10742			err = -errno;
10743			pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
10744				cpu, cpu_buf->fd,
10745				libbpf_strerror_r(err, msg, sizeof(msg)));
10746			goto error;
10747		}
10748		j++;
10749	}
10750	pb->cpu_cnt = j;
10751	free(online);
10752
10753	return pb;
10754
10755error:
10756	free(online);
10757	if (pb)
10758		perf_buffer__free(pb);
10759	return ERR_PTR(err);
10760}
10761
10762struct perf_sample_raw {
10763	struct perf_event_header header;
10764	uint32_t size;
10765	char data[];
10766};
10767
10768struct perf_sample_lost {
10769	struct perf_event_header header;
10770	uint64_t id;
10771	uint64_t lost;
10772	uint64_t sample_id;
10773};
10774
10775static enum bpf_perf_event_ret
10776perf_buffer__process_record(struct perf_event_header *e, void *ctx)
10777{
10778	struct perf_cpu_buf *cpu_buf = ctx;
10779	struct perf_buffer *pb = cpu_buf->pb;
10780	void *data = e;
10781
10782	/* user wants full control over parsing perf event */
10783	if (pb->event_cb)
10784		return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
10785
10786	switch (e->type) {
10787	case PERF_RECORD_SAMPLE: {
10788		struct perf_sample_raw *s = data;
10789
10790		if (pb->sample_cb)
10791			pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
10792		break;
10793	}
10794	case PERF_RECORD_LOST: {
10795		struct perf_sample_lost *s = data;
10796
10797		if (pb->lost_cb)
10798			pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
10799		break;
10800	}
10801	default:
10802		pr_warn("unknown perf sample type %d\n", e->type);
10803		return LIBBPF_PERF_EVENT_ERROR;
10804	}
10805	return LIBBPF_PERF_EVENT_CONT;
10806}
10807
10808static int perf_buffer__process_records(struct perf_buffer *pb,
10809					struct perf_cpu_buf *cpu_buf)
10810{
10811	enum bpf_perf_event_ret ret;
10812
10813	ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
10814					 pb->page_size, &cpu_buf->buf,
10815					 &cpu_buf->buf_size,
10816					 perf_buffer__process_record, cpu_buf);
10817	if (ret != LIBBPF_PERF_EVENT_CONT)
10818		return ret;
10819	return 0;
10820}
10821
10822int perf_buffer__epoll_fd(const struct perf_buffer *pb)
10823{
10824	return pb->epoll_fd;
10825}
10826
10827int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
10828{
10829	int i, cnt, err;
10830
10831	cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
10832	if (cnt < 0)
10833		return -errno;
10834
10835	for (i = 0; i < cnt; i++) {
10836		struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
10837
10838		err = perf_buffer__process_records(pb, cpu_buf);
10839		if (err) {
10840			pr_warn("error while processing records: %d\n", err);
10841			return libbpf_err(err);
10842		}
10843	}
10844	return cnt;
10845}
10846
10847/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
10848 * manager.
10849 */
10850size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
10851{
10852	return pb->cpu_cnt;
10853}
10854
10855/*
10856 * Return perf_event FD of a ring buffer in *buf_idx* slot of
10857 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
10858 * select()/poll()/epoll() Linux syscalls.
10859 */
10860int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
10861{
10862	struct perf_cpu_buf *cpu_buf;
10863
10864	if (buf_idx >= pb->cpu_cnt)
10865		return libbpf_err(-EINVAL);
10866
10867	cpu_buf = pb->cpu_bufs[buf_idx];
10868	if (!cpu_buf)
10869		return libbpf_err(-ENOENT);
10870
10871	return cpu_buf->fd;
10872}
10873
10874/*
10875 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
10876 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
10877 * consume, do nothing and return success.
10878 * Returns:
10879 *   - 0 on success;
10880 *   - <0 on failure.
10881 */
10882int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
10883{
10884	struct perf_cpu_buf *cpu_buf;
10885
10886	if (buf_idx >= pb->cpu_cnt)
10887		return libbpf_err(-EINVAL);
10888
10889	cpu_buf = pb->cpu_bufs[buf_idx];
10890	if (!cpu_buf)
10891		return libbpf_err(-ENOENT);
10892
10893	return perf_buffer__process_records(pb, cpu_buf);
10894}
10895
10896int perf_buffer__consume(struct perf_buffer *pb)
10897{
10898	int i, err;
10899
10900	for (i = 0; i < pb->cpu_cnt; i++) {
10901		struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
10902
10903		if (!cpu_buf)
10904			continue;
10905
10906		err = perf_buffer__process_records(pb, cpu_buf);
10907		if (err) {
10908			pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
10909			return libbpf_err(err);
10910		}
10911	}
10912	return 0;
10913}
10914
10915struct bpf_prog_info_array_desc {
10916	int	array_offset;	/* e.g. offset of jited_prog_insns */
10917	int	count_offset;	/* e.g. offset of jited_prog_len */
10918	int	size_offset;	/* > 0: offset of rec size,
10919				 * < 0: fix size of -size_offset
10920				 */
10921};
10922
10923static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
10924	[BPF_PROG_INFO_JITED_INSNS] = {
10925		offsetof(struct bpf_prog_info, jited_prog_insns),
10926		offsetof(struct bpf_prog_info, jited_prog_len),
10927		-1,
10928	},
10929	[BPF_PROG_INFO_XLATED_INSNS] = {
10930		offsetof(struct bpf_prog_info, xlated_prog_insns),
10931		offsetof(struct bpf_prog_info, xlated_prog_len),
10932		-1,
10933	},
10934	[BPF_PROG_INFO_MAP_IDS] = {
10935		offsetof(struct bpf_prog_info, map_ids),
10936		offsetof(struct bpf_prog_info, nr_map_ids),
10937		-(int)sizeof(__u32),
10938	},
10939	[BPF_PROG_INFO_JITED_KSYMS] = {
10940		offsetof(struct bpf_prog_info, jited_ksyms),
10941		offsetof(struct bpf_prog_info, nr_jited_ksyms),
10942		-(int)sizeof(__u64),
10943	},
10944	[BPF_PROG_INFO_JITED_FUNC_LENS] = {
10945		offsetof(struct bpf_prog_info, jited_func_lens),
10946		offsetof(struct bpf_prog_info, nr_jited_func_lens),
10947		-(int)sizeof(__u32),
10948	},
10949	[BPF_PROG_INFO_FUNC_INFO] = {
10950		offsetof(struct bpf_prog_info, func_info),
10951		offsetof(struct bpf_prog_info, nr_func_info),
10952		offsetof(struct bpf_prog_info, func_info_rec_size),
10953	},
10954	[BPF_PROG_INFO_LINE_INFO] = {
10955		offsetof(struct bpf_prog_info, line_info),
10956		offsetof(struct bpf_prog_info, nr_line_info),
10957		offsetof(struct bpf_prog_info, line_info_rec_size),
10958	},
10959	[BPF_PROG_INFO_JITED_LINE_INFO] = {
10960		offsetof(struct bpf_prog_info, jited_line_info),
10961		offsetof(struct bpf_prog_info, nr_jited_line_info),
10962		offsetof(struct bpf_prog_info, jited_line_info_rec_size),
10963	},
10964	[BPF_PROG_INFO_PROG_TAGS] = {
10965		offsetof(struct bpf_prog_info, prog_tags),
10966		offsetof(struct bpf_prog_info, nr_prog_tags),
10967		-(int)sizeof(__u8) * BPF_TAG_SIZE,
10968	},
10969
10970};
10971
10972static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info,
10973					   int offset)
10974{
10975	__u32 *array = (__u32 *)info;
10976
10977	if (offset >= 0)
10978		return array[offset / sizeof(__u32)];
10979	return -(int)offset;
10980}
10981
10982static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info,
10983					   int offset)
10984{
10985	__u64 *array = (__u64 *)info;
10986
10987	if (offset >= 0)
10988		return array[offset / sizeof(__u64)];
10989	return -(int)offset;
10990}
10991
10992static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
10993					 __u32 val)
10994{
10995	__u32 *array = (__u32 *)info;
10996
10997	if (offset >= 0)
10998		array[offset / sizeof(__u32)] = val;
10999}
11000
11001static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
11002					 __u64 val)
11003{
11004	__u64 *array = (__u64 *)info;
11005
11006	if (offset >= 0)
11007		array[offset / sizeof(__u64)] = val;
11008}
11009
11010struct bpf_prog_info_linear *
11011bpf_program__get_prog_info_linear(int fd, __u64 arrays)
11012{
11013	struct bpf_prog_info_linear *info_linear;
11014	struct bpf_prog_info info = {};
11015	__u32 info_len = sizeof(info);
11016	__u32 data_len = 0;
11017	int i, err;
11018	void *ptr;
11019
11020	if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
11021		return libbpf_err_ptr(-EINVAL);
11022
11023	/* step 1: get array dimensions */
11024	err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
11025	if (err) {
11026		pr_debug("can't get prog info: %s", strerror(errno));
11027		return libbpf_err_ptr(-EFAULT);
11028	}
11029
11030	/* step 2: calculate total size of all arrays */
11031	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11032		bool include_array = (arrays & (1UL << i)) > 0;
11033		struct bpf_prog_info_array_desc *desc;
11034		__u32 count, size;
11035
11036		desc = bpf_prog_info_array_desc + i;
11037
11038		/* kernel is too old to support this field */
11039		if (info_len < desc->array_offset + sizeof(__u32) ||
11040		    info_len < desc->count_offset + sizeof(__u32) ||
11041		    (desc->size_offset > 0 && info_len < desc->size_offset))
11042			include_array = false;
11043
11044		if (!include_array) {
11045			arrays &= ~(1UL << i);	/* clear the bit */
11046			continue;
11047		}
11048
11049		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11050		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11051
11052		data_len += count * size;
11053	}
11054
11055	/* step 3: allocate continuous memory */
11056	data_len = roundup(data_len, sizeof(__u64));
11057	info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
11058	if (!info_linear)
11059		return libbpf_err_ptr(-ENOMEM);
11060
11061	/* step 4: fill data to info_linear->info */
11062	info_linear->arrays = arrays;
11063	memset(&info_linear->info, 0, sizeof(info));
11064	ptr = info_linear->data;
11065
11066	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11067		struct bpf_prog_info_array_desc *desc;
11068		__u32 count, size;
11069
11070		if ((arrays & (1UL << i)) == 0)
11071			continue;
11072
11073		desc  = bpf_prog_info_array_desc + i;
11074		count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11075		size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11076		bpf_prog_info_set_offset_u32(&info_linear->info,
11077					     desc->count_offset, count);
11078		bpf_prog_info_set_offset_u32(&info_linear->info,
11079					     desc->size_offset, size);
11080		bpf_prog_info_set_offset_u64(&info_linear->info,
11081					     desc->array_offset,
11082					     ptr_to_u64(ptr));
11083		ptr += count * size;
11084	}
11085
11086	/* step 5: call syscall again to get required arrays */
11087	err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
11088	if (err) {
11089		pr_debug("can't get prog info: %s", strerror(errno));
11090		free(info_linear);
11091		return libbpf_err_ptr(-EFAULT);
11092	}
11093
11094	/* step 6: verify the data */
11095	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11096		struct bpf_prog_info_array_desc *desc;
11097		__u32 v1, v2;
11098
11099		if ((arrays & (1UL << i)) == 0)
11100			continue;
11101
11102		desc = bpf_prog_info_array_desc + i;
11103		v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
11104		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
11105						   desc->count_offset);
11106		if (v1 != v2)
11107			pr_warn("%s: mismatch in element count\n", __func__);
11108
11109		v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
11110		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
11111						   desc->size_offset);
11112		if (v1 != v2)
11113			pr_warn("%s: mismatch in rec size\n", __func__);
11114	}
11115
11116	/* step 7: update info_len and data_len */
11117	info_linear->info_len = sizeof(struct bpf_prog_info);
11118	info_linear->data_len = data_len;
11119
11120	return info_linear;
11121}
11122
11123void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
11124{
11125	int i;
11126
11127	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11128		struct bpf_prog_info_array_desc *desc;
11129		__u64 addr, offs;
11130
11131		if ((info_linear->arrays & (1UL << i)) == 0)
11132			continue;
11133
11134		desc = bpf_prog_info_array_desc + i;
11135		addr = bpf_prog_info_read_offset_u64(&info_linear->info,
11136						     desc->array_offset);
11137		offs = addr - ptr_to_u64(info_linear->data);
11138		bpf_prog_info_set_offset_u64(&info_linear->info,
11139					     desc->array_offset, offs);
11140	}
11141}
11142
11143void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
11144{
11145	int i;
11146
11147	for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
11148		struct bpf_prog_info_array_desc *desc;
11149		__u64 addr, offs;
11150
11151		if ((info_linear->arrays & (1UL << i)) == 0)
11152			continue;
11153
11154		desc = bpf_prog_info_array_desc + i;
11155		offs = bpf_prog_info_read_offset_u64(&info_linear->info,
11156						     desc->array_offset);
11157		addr = offs + ptr_to_u64(info_linear->data);
11158		bpf_prog_info_set_offset_u64(&info_linear->info,
11159					     desc->array_offset, addr);
11160	}
11161}
11162
11163int bpf_program__set_attach_target(struct bpf_program *prog,
11164				   int attach_prog_fd,
11165				   const char *attach_func_name)
11166{
11167	int btf_obj_fd = 0, btf_id = 0, err;
11168
11169	if (!prog || attach_prog_fd < 0)
11170		return libbpf_err(-EINVAL);
11171
11172	if (prog->obj->loaded)
11173		return libbpf_err(-EINVAL);
11174
11175	if (attach_prog_fd && !attach_func_name) {
11176		/* remember attach_prog_fd and let bpf_program__load() find
11177		 * BTF ID during the program load
11178		 */
11179		prog->attach_prog_fd = attach_prog_fd;
11180		return 0;
11181	}
11182
11183	if (attach_prog_fd) {
11184		btf_id = libbpf_find_prog_btf_id(attach_func_name,
11185						 attach_prog_fd);
11186		if (btf_id < 0)
11187			return libbpf_err(btf_id);
11188	} else {
11189		if (!attach_func_name)
11190			return libbpf_err(-EINVAL);
11191
11192		/* load btf_vmlinux, if not yet */
11193		err = bpf_object__load_vmlinux_btf(prog->obj, true);
11194		if (err)
11195			return libbpf_err(err);
11196		err = find_kernel_btf_id(prog->obj, attach_func_name,
11197					 prog->expected_attach_type,
11198					 &btf_obj_fd, &btf_id);
11199		if (err)
11200			return libbpf_err(err);
11201	}
11202
11203	prog->attach_btf_id = btf_id;
11204	prog->attach_btf_obj_fd = btf_obj_fd;
11205	prog->attach_prog_fd = attach_prog_fd;
11206	return 0;
11207}
11208
11209int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
11210{
11211	int err = 0, n, len, start, end = -1;
11212	bool *tmp;
11213
11214	*mask = NULL;
11215	*mask_sz = 0;
11216
11217	/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
11218	while (*s) {
11219		if (*s == ',' || *s == '\n') {
11220			s++;
11221			continue;
11222		}
11223		n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
11224		if (n <= 0 || n > 2) {
11225			pr_warn("Failed to get CPU range %s: %d\n", s, n);
11226			err = -EINVAL;
11227			goto cleanup;
11228		} else if (n == 1) {
11229			end = start;
11230		}
11231		if (start < 0 || start > end) {
11232			pr_warn("Invalid CPU range [%d,%d] in %s\n",
11233				start, end, s);
11234			err = -EINVAL;
11235			goto cleanup;
11236		}
11237		tmp = realloc(*mask, end + 1);
11238		if (!tmp) {
11239			err = -ENOMEM;
11240			goto cleanup;
11241		}
11242		*mask = tmp;
11243		memset(tmp + *mask_sz, 0, start - *mask_sz);
11244		memset(tmp + start, 1, end - start + 1);
11245		*mask_sz = end + 1;
11246		s += len;
11247	}
11248	if (!*mask_sz) {
11249		pr_warn("Empty CPU range\n");
11250		return -EINVAL;
11251	}
11252	return 0;
11253cleanup:
11254	free(*mask);
11255	*mask = NULL;
11256	return err;
11257}
11258
11259int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
11260{
11261	int fd, err = 0, len;
11262	char buf[128];
11263
11264	fd = open(fcpu, O_RDONLY | O_CLOEXEC);
11265	if (fd < 0) {
11266		err = -errno;
11267		pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
11268		return err;
11269	}
11270	len = read(fd, buf, sizeof(buf));
11271	close(fd);
11272	if (len <= 0) {
11273		err = len ? -errno : -EINVAL;
11274		pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
11275		return err;
11276	}
11277	if (len >= sizeof(buf)) {
11278		pr_warn("CPU mask is too big in file %s\n", fcpu);
11279		return -E2BIG;
11280	}
11281	buf[len] = '\0';
11282
11283	return parse_cpu_mask_str(buf, mask, mask_sz);
11284}
11285
11286int libbpf_num_possible_cpus(void)
11287{
11288	static const char *fcpu = "/sys/devices/system/cpu/possible";
11289	static int cpus;
11290	int err, n, i, tmp_cpus;
11291	bool *mask;
11292
11293	tmp_cpus = READ_ONCE(cpus);
11294	if (tmp_cpus > 0)
11295		return tmp_cpus;
11296
11297	err = parse_cpu_mask_file(fcpu, &mask, &n);
11298	if (err)
11299		return libbpf_err(err);
11300
11301	tmp_cpus = 0;
11302	for (i = 0; i < n; i++) {
11303		if (mask[i])
11304			tmp_cpus++;
11305	}
11306	free(mask);
11307
11308	WRITE_ONCE(cpus, tmp_cpus);
11309	return tmp_cpus;
11310}
11311
11312int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
11313			      const struct bpf_object_open_opts *opts)
11314{
11315	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
11316		.object_name = s->name,
11317	);
11318	struct bpf_object *obj;
11319	int i, err;
11320
11321	/* Attempt to preserve opts->object_name, unless overriden by user
11322	 * explicitly. Overwriting object name for skeletons is discouraged,
11323	 * as it breaks global data maps, because they contain object name
11324	 * prefix as their own map name prefix. When skeleton is generated,
11325	 * bpftool is making an assumption that this name will stay the same.
11326	 */
11327	if (opts) {
11328		memcpy(&skel_opts, opts, sizeof(*opts));
11329		if (!opts->object_name)
11330			skel_opts.object_name = s->name;
11331	}
11332
11333	obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
11334	err = libbpf_get_error(obj);
11335	if (err) {
11336		pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
11337			s->name, err);
11338		return libbpf_err(err);
11339	}
11340
11341	*s->obj = obj;
11342
11343	for (i = 0; i < s->map_cnt; i++) {
11344		struct bpf_map **map = s->maps[i].map;
11345		const char *name = s->maps[i].name;
11346		void **mmaped = s->maps[i].mmaped;
11347
11348		*map = bpf_object__find_map_by_name(obj, name);
11349		if (!*map) {
11350			pr_warn("failed to find skeleton map '%s'\n", name);
11351			return libbpf_err(-ESRCH);
11352		}
11353
11354		/* externs shouldn't be pre-setup from user code */
11355		if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
11356			*mmaped = (*map)->mmaped;
11357	}
11358
11359	for (i = 0; i < s->prog_cnt; i++) {
11360		struct bpf_program **prog = s->progs[i].prog;
11361		const char *name = s->progs[i].name;
11362
11363		*prog = bpf_object__find_program_by_name(obj, name);
11364		if (!*prog) {
11365			pr_warn("failed to find skeleton program '%s'\n", name);
11366			return libbpf_err(-ESRCH);
11367		}
11368	}
11369
11370	return 0;
11371}
11372
11373int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
11374{
11375	int i, err;
11376
11377	err = bpf_object__load(*s->obj);
11378	if (err) {
11379		pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
11380		return libbpf_err(err);
11381	}
11382
11383	for (i = 0; i < s->map_cnt; i++) {
11384		struct bpf_map *map = *s->maps[i].map;
11385		size_t mmap_sz = bpf_map_mmap_sz(map);
11386		int prot, map_fd = bpf_map__fd(map);
11387		void **mmaped = s->maps[i].mmaped;
11388
11389		if (!mmaped)
11390			continue;
11391
11392		if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
11393			*mmaped = NULL;
11394			continue;
11395		}
11396
11397		if (map->def.map_flags & BPF_F_RDONLY_PROG)
11398			prot = PROT_READ;
11399		else
11400			prot = PROT_READ | PROT_WRITE;
11401
11402		/* Remap anonymous mmap()-ed "map initialization image" as
11403		 * a BPF map-backed mmap()-ed memory, but preserving the same
11404		 * memory address. This will cause kernel to change process'
11405		 * page table to point to a different piece of kernel memory,
11406		 * but from userspace point of view memory address (and its
11407		 * contents, being identical at this point) will stay the
11408		 * same. This mapping will be released by bpf_object__close()
11409		 * as per normal clean up procedure, so we don't need to worry
11410		 * about it from skeleton's clean up perspective.
11411		 */
11412		*mmaped = mmap(map->mmaped, mmap_sz, prot,
11413				MAP_SHARED | MAP_FIXED, map_fd, 0);
11414		if (*mmaped == MAP_FAILED) {
11415			err = -errno;
11416			*mmaped = NULL;
11417			pr_warn("failed to re-mmap() map '%s': %d\n",
11418				 bpf_map__name(map), err);
11419			return libbpf_err(err);
11420		}
11421	}
11422
11423	return 0;
11424}
11425
11426int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
11427{
11428	int i, err;
11429
11430	for (i = 0; i < s->prog_cnt; i++) {
11431		struct bpf_program *prog = *s->progs[i].prog;
11432		struct bpf_link **link = s->progs[i].link;
11433
11434		if (!prog->load)
11435			continue;
11436
11437		/* auto-attaching not supported for this program */
11438		if (!prog->sec_def || !prog->sec_def->attach_fn)
11439			continue;
11440
11441		*link = bpf_program__attach(prog);
11442		err = libbpf_get_error(*link);
11443		if (err) {
11444			pr_warn("failed to auto-attach program '%s': %d\n",
11445				bpf_program__name(prog), err);
11446			return libbpf_err(err);
11447		}
11448	}
11449
11450	return 0;
11451}
11452
11453void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
11454{
11455	int i;
11456
11457	for (i = 0; i < s->prog_cnt; i++) {
11458		struct bpf_link **link = s->progs[i].link;
11459
11460		bpf_link__destroy(*link);
11461		*link = NULL;
11462	}
11463}
11464
11465void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
11466{
11467	if (s->progs)
11468		bpf_object__detach_skeleton(s);
11469	if (s->obj)
11470		bpf_object__close(*s->obj);
11471	free(s->maps);
11472	free(s->progs);
11473	free(s);
11474}