tools/lib/bpf/libbpf.c at v6.0-rc2 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / tools / lib / bpf / libbpf.c
at v6.0-rc2 12388 lines 333 kB view raw
    1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
    2
    3/*
    4 * Common eBPF ELF object loading operations.
    5 *
    6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
    7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
    8 * Copyright (C) 2015 Huawei Inc.
    9 * Copyright (C) 2017 Nicira, Inc.
   10 * Copyright (C) 2019 Isovalent, Inc.
   11 */
   12
   13#ifndef _GNU_SOURCE
   14#define _GNU_SOURCE
   15#endif
   16#include <stdlib.h>
   17#include <stdio.h>
   18#include <stdarg.h>
   19#include <libgen.h>
   20#include <inttypes.h>
   21#include <limits.h>
   22#include <string.h>
   23#include <unistd.h>
   24#include <endian.h>
   25#include <fcntl.h>
   26#include <errno.h>
   27#include <ctype.h>
   28#include <asm/unistd.h>
   29#include <linux/err.h>
   30#include <linux/kernel.h>
   31#include <linux/bpf.h>
   32#include <linux/btf.h>
   33#include <linux/filter.h>
   34#include <linux/limits.h>
   35#include <linux/perf_event.h>
   36#include <linux/ring_buffer.h>
   37#include <linux/version.h>
   38#include <sys/epoll.h>
   39#include <sys/ioctl.h>
   40#include <sys/mman.h>
   41#include <sys/stat.h>
   42#include <sys/types.h>
   43#include <sys/vfs.h>
   44#include <sys/utsname.h>
   45#include <sys/resource.h>
   46#include <libelf.h>
   47#include <gelf.h>
   48#include <zlib.h>
   49
   50#include "libbpf.h"
   51#include "bpf.h"
   52#include "btf.h"
   53#include "str_error.h"
   54#include "libbpf_internal.h"
   55#include "hashmap.h"
   56#include "bpf_gen_internal.h"
   57
   58#ifndef BPF_FS_MAGIC
   59#define BPF_FS_MAGIC		0xcafe4a11
   60#endif
   61
   62#define BPF_INSN_SZ (sizeof(struct bpf_insn))
   63
   64/* vsprintf() in __base_pr() uses nonliteral format string. It may break
   65 * compilation if user enables corresponding warning. Disable it explicitly.
   66 */
   67#pragma GCC diagnostic ignored "-Wformat-nonliteral"
   68
   69#define __printf(a, b)	__attribute__((format(printf, a, b)))
   70
   71static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
   72static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
   73
   74static const char * const attach_type_name[] = {
   75	[BPF_CGROUP_INET_INGRESS]	= "cgroup_inet_ingress",
   76	[BPF_CGROUP_INET_EGRESS]	= "cgroup_inet_egress",
   77	[BPF_CGROUP_INET_SOCK_CREATE]	= "cgroup_inet_sock_create",
   78	[BPF_CGROUP_INET_SOCK_RELEASE]	= "cgroup_inet_sock_release",
   79	[BPF_CGROUP_SOCK_OPS]		= "cgroup_sock_ops",
   80	[BPF_CGROUP_DEVICE]		= "cgroup_device",
   81	[BPF_CGROUP_INET4_BIND]		= "cgroup_inet4_bind",
   82	[BPF_CGROUP_INET6_BIND]		= "cgroup_inet6_bind",
   83	[BPF_CGROUP_INET4_CONNECT]	= "cgroup_inet4_connect",
   84	[BPF_CGROUP_INET6_CONNECT]	= "cgroup_inet6_connect",
   85	[BPF_CGROUP_INET4_POST_BIND]	= "cgroup_inet4_post_bind",
   86	[BPF_CGROUP_INET6_POST_BIND]	= "cgroup_inet6_post_bind",
   87	[BPF_CGROUP_INET4_GETPEERNAME]	= "cgroup_inet4_getpeername",
   88	[BPF_CGROUP_INET6_GETPEERNAME]	= "cgroup_inet6_getpeername",
   89	[BPF_CGROUP_INET4_GETSOCKNAME]	= "cgroup_inet4_getsockname",
   90	[BPF_CGROUP_INET6_GETSOCKNAME]	= "cgroup_inet6_getsockname",
   91	[BPF_CGROUP_UDP4_SENDMSG]	= "cgroup_udp4_sendmsg",
   92	[BPF_CGROUP_UDP6_SENDMSG]	= "cgroup_udp6_sendmsg",
   93	[BPF_CGROUP_SYSCTL]		= "cgroup_sysctl",
   94	[BPF_CGROUP_UDP4_RECVMSG]	= "cgroup_udp4_recvmsg",
   95	[BPF_CGROUP_UDP6_RECVMSG]	= "cgroup_udp6_recvmsg",
   96	[BPF_CGROUP_GETSOCKOPT]		= "cgroup_getsockopt",
   97	[BPF_CGROUP_SETSOCKOPT]		= "cgroup_setsockopt",
   98	[BPF_SK_SKB_STREAM_PARSER]	= "sk_skb_stream_parser",
   99	[BPF_SK_SKB_STREAM_VERDICT]	= "sk_skb_stream_verdict",
  100	[BPF_SK_SKB_VERDICT]		= "sk_skb_verdict",
  101	[BPF_SK_MSG_VERDICT]		= "sk_msg_verdict",
  102	[BPF_LIRC_MODE2]		= "lirc_mode2",
  103	[BPF_FLOW_DISSECTOR]		= "flow_dissector",
  104	[BPF_TRACE_RAW_TP]		= "trace_raw_tp",
  105	[BPF_TRACE_FENTRY]		= "trace_fentry",
  106	[BPF_TRACE_FEXIT]		= "trace_fexit",
  107	[BPF_MODIFY_RETURN]		= "modify_return",
  108	[BPF_LSM_MAC]			= "lsm_mac",
  109	[BPF_LSM_CGROUP]		= "lsm_cgroup",
  110	[BPF_SK_LOOKUP]			= "sk_lookup",
  111	[BPF_TRACE_ITER]		= "trace_iter",
  112	[BPF_XDP_DEVMAP]		= "xdp_devmap",
  113	[BPF_XDP_CPUMAP]		= "xdp_cpumap",
  114	[BPF_XDP]			= "xdp",
  115	[BPF_SK_REUSEPORT_SELECT]	= "sk_reuseport_select",
  116	[BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]	= "sk_reuseport_select_or_migrate",
  117	[BPF_PERF_EVENT]		= "perf_event",
  118	[BPF_TRACE_KPROBE_MULTI]	= "trace_kprobe_multi",
  119};
  120
  121static const char * const link_type_name[] = {
  122	[BPF_LINK_TYPE_UNSPEC]			= "unspec",
  123	[BPF_LINK_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
  124	[BPF_LINK_TYPE_TRACING]			= "tracing",
  125	[BPF_LINK_TYPE_CGROUP]			= "cgroup",
  126	[BPF_LINK_TYPE_ITER]			= "iter",
  127	[BPF_LINK_TYPE_NETNS]			= "netns",
  128	[BPF_LINK_TYPE_XDP]			= "xdp",
  129	[BPF_LINK_TYPE_PERF_EVENT]		= "perf_event",
  130	[BPF_LINK_TYPE_KPROBE_MULTI]		= "kprobe_multi",
  131	[BPF_LINK_TYPE_STRUCT_OPS]		= "struct_ops",
  132};
  133
  134static const char * const map_type_name[] = {
  135	[BPF_MAP_TYPE_UNSPEC]			= "unspec",
  136	[BPF_MAP_TYPE_HASH]			= "hash",
  137	[BPF_MAP_TYPE_ARRAY]			= "array",
  138	[BPF_MAP_TYPE_PROG_ARRAY]		= "prog_array",
  139	[BPF_MAP_TYPE_PERF_EVENT_ARRAY]		= "perf_event_array",
  140	[BPF_MAP_TYPE_PERCPU_HASH]		= "percpu_hash",
  141	[BPF_MAP_TYPE_PERCPU_ARRAY]		= "percpu_array",
  142	[BPF_MAP_TYPE_STACK_TRACE]		= "stack_trace",
  143	[BPF_MAP_TYPE_CGROUP_ARRAY]		= "cgroup_array",
  144	[BPF_MAP_TYPE_LRU_HASH]			= "lru_hash",
  145	[BPF_MAP_TYPE_LRU_PERCPU_HASH]		= "lru_percpu_hash",
  146	[BPF_MAP_TYPE_LPM_TRIE]			= "lpm_trie",
  147	[BPF_MAP_TYPE_ARRAY_OF_MAPS]		= "array_of_maps",
  148	[BPF_MAP_TYPE_HASH_OF_MAPS]		= "hash_of_maps",
  149	[BPF_MAP_TYPE_DEVMAP]			= "devmap",
  150	[BPF_MAP_TYPE_DEVMAP_HASH]		= "devmap_hash",
  151	[BPF_MAP_TYPE_SOCKMAP]			= "sockmap",
  152	[BPF_MAP_TYPE_CPUMAP]			= "cpumap",
  153	[BPF_MAP_TYPE_XSKMAP]			= "xskmap",
  154	[BPF_MAP_TYPE_SOCKHASH]			= "sockhash",
  155	[BPF_MAP_TYPE_CGROUP_STORAGE]		= "cgroup_storage",
  156	[BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]	= "reuseport_sockarray",
  157	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
  158	[BPF_MAP_TYPE_QUEUE]			= "queue",
  159	[BPF_MAP_TYPE_STACK]			= "stack",
  160	[BPF_MAP_TYPE_SK_STORAGE]		= "sk_storage",
  161	[BPF_MAP_TYPE_STRUCT_OPS]		= "struct_ops",
  162	[BPF_MAP_TYPE_RINGBUF]			= "ringbuf",
  163	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
  164	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
  165	[BPF_MAP_TYPE_BLOOM_FILTER]		= "bloom_filter",
  166};
  167
  168static const char * const prog_type_name[] = {
  169	[BPF_PROG_TYPE_UNSPEC]			= "unspec",
  170	[BPF_PROG_TYPE_SOCKET_FILTER]		= "socket_filter",
  171	[BPF_PROG_TYPE_KPROBE]			= "kprobe",
  172	[BPF_PROG_TYPE_SCHED_CLS]		= "sched_cls",
  173	[BPF_PROG_TYPE_SCHED_ACT]		= "sched_act",
  174	[BPF_PROG_TYPE_TRACEPOINT]		= "tracepoint",
  175	[BPF_PROG_TYPE_XDP]			= "xdp",
  176	[BPF_PROG_TYPE_PERF_EVENT]		= "perf_event",
  177	[BPF_PROG_TYPE_CGROUP_SKB]		= "cgroup_skb",
  178	[BPF_PROG_TYPE_CGROUP_SOCK]		= "cgroup_sock",
  179	[BPF_PROG_TYPE_LWT_IN]			= "lwt_in",
  180	[BPF_PROG_TYPE_LWT_OUT]			= "lwt_out",
  181	[BPF_PROG_TYPE_LWT_XMIT]		= "lwt_xmit",
  182	[BPF_PROG_TYPE_SOCK_OPS]		= "sock_ops",
  183	[BPF_PROG_TYPE_SK_SKB]			= "sk_skb",
  184	[BPF_PROG_TYPE_CGROUP_DEVICE]		= "cgroup_device",
  185	[BPF_PROG_TYPE_SK_MSG]			= "sk_msg",
  186	[BPF_PROG_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
  187	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR]	= "cgroup_sock_addr",
  188	[BPF_PROG_TYPE_LWT_SEG6LOCAL]		= "lwt_seg6local",
  189	[BPF_PROG_TYPE_LIRC_MODE2]		= "lirc_mode2",
  190	[BPF_PROG_TYPE_SK_REUSEPORT]		= "sk_reuseport",
  191	[BPF_PROG_TYPE_FLOW_DISSECTOR]		= "flow_dissector",
  192	[BPF_PROG_TYPE_CGROUP_SYSCTL]		= "cgroup_sysctl",
  193	[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE]	= "raw_tracepoint_writable",
  194	[BPF_PROG_TYPE_CGROUP_SOCKOPT]		= "cgroup_sockopt",
  195	[BPF_PROG_TYPE_TRACING]			= "tracing",
  196	[BPF_PROG_TYPE_STRUCT_OPS]		= "struct_ops",
  197	[BPF_PROG_TYPE_EXT]			= "ext",
  198	[BPF_PROG_TYPE_LSM]			= "lsm",
  199	[BPF_PROG_TYPE_SK_LOOKUP]		= "sk_lookup",
  200	[BPF_PROG_TYPE_SYSCALL]			= "syscall",
  201};
  202
  203static int __base_pr(enum libbpf_print_level level, const char *format,
  204		     va_list args)
  205{
  206	if (level == LIBBPF_DEBUG)
  207		return 0;
  208
  209	return vfprintf(stderr, format, args);
  210}
  211
  212static libbpf_print_fn_t __libbpf_pr = __base_pr;
  213
  214libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
  215{
  216	libbpf_print_fn_t old_print_fn = __libbpf_pr;
  217
  218	__libbpf_pr = fn;
  219	return old_print_fn;
  220}
  221
  222__printf(2, 3)
  223void libbpf_print(enum libbpf_print_level level, const char *format, ...)
  224{
  225	va_list args;
  226
  227	if (!__libbpf_pr)
  228		return;
  229
  230	va_start(args, format);
  231	__libbpf_pr(level, format, args);
  232	va_end(args);
  233}
  234
  235static void pr_perm_msg(int err)
  236{
  237	struct rlimit limit;
  238	char buf[100];
  239
  240	if (err != -EPERM || geteuid() != 0)
  241		return;
  242
  243	err = getrlimit(RLIMIT_MEMLOCK, &limit);
  244	if (err)
  245		return;
  246
  247	if (limit.rlim_cur == RLIM_INFINITY)
  248		return;
  249
  250	if (limit.rlim_cur < 1024)
  251		snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
  252	else if (limit.rlim_cur < 1024*1024)
  253		snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
  254	else
  255		snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
  256
  257	pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
  258		buf);
  259}
  260
  261#define STRERR_BUFSIZE  128
  262
  263/* Copied from tools/perf/util/util.h */
  264#ifndef zfree
  265# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
  266#endif
  267
  268#ifndef zclose
  269# define zclose(fd) ({			\
  270	int ___err = 0;			\
  271	if ((fd) >= 0)			\
  272		___err = close((fd));	\
  273	fd = -1;			\
  274	___err; })
  275#endif
  276
  277static inline __u64 ptr_to_u64(const void *ptr)
  278{
  279	return (__u64) (unsigned long) ptr;
  280}
  281
  282int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
  283{
  284	/* as of v1.0 libbpf_set_strict_mode() is a no-op */
  285	return 0;
  286}
  287
  288__u32 libbpf_major_version(void)
  289{
  290	return LIBBPF_MAJOR_VERSION;
  291}
  292
  293__u32 libbpf_minor_version(void)
  294{
  295	return LIBBPF_MINOR_VERSION;
  296}
  297
  298const char *libbpf_version_string(void)
  299{
  300#define __S(X) #X
  301#define _S(X) __S(X)
  302	return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
  303#undef _S
  304#undef __S
  305}
  306
  307enum reloc_type {
  308	RELO_LD64,
  309	RELO_CALL,
  310	RELO_DATA,
  311	RELO_EXTERN_VAR,
  312	RELO_EXTERN_FUNC,
  313	RELO_SUBPROG_ADDR,
  314	RELO_CORE,
  315};
  316
  317struct reloc_desc {
  318	enum reloc_type type;
  319	int insn_idx;
  320	union {
  321		const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
  322		struct {
  323			int map_idx;
  324			int sym_off;
  325		};
  326	};
  327};
  328
  329/* stored as sec_def->cookie for all libbpf-supported SEC()s */
  330enum sec_def_flags {
  331	SEC_NONE = 0,
  332	/* expected_attach_type is optional, if kernel doesn't support that */
  333	SEC_EXP_ATTACH_OPT = 1,
  334	/* legacy, only used by libbpf_get_type_names() and
  335	 * libbpf_attach_type_by_name(), not used by libbpf itself at all.
  336	 * This used to be associated with cgroup (and few other) BPF programs
  337	 * that were attachable through BPF_PROG_ATTACH command. Pretty
  338	 * meaningless nowadays, though.
  339	 */
  340	SEC_ATTACHABLE = 2,
  341	SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
  342	/* attachment target is specified through BTF ID in either kernel or
  343	 * other BPF program's BTF object */
  344	SEC_ATTACH_BTF = 4,
  345	/* BPF program type allows sleeping/blocking in kernel */
  346	SEC_SLEEPABLE = 8,
  347	/* BPF program support non-linear XDP buffer */
  348	SEC_XDP_FRAGS = 16,
  349};
  350
  351struct bpf_sec_def {
  352	char *sec;
  353	enum bpf_prog_type prog_type;
  354	enum bpf_attach_type expected_attach_type;
  355	long cookie;
  356	int handler_id;
  357
  358	libbpf_prog_setup_fn_t prog_setup_fn;
  359	libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
  360	libbpf_prog_attach_fn_t prog_attach_fn;
  361};
  362
  363/*
  364 * bpf_prog should be a better name but it has been used in
  365 * linux/filter.h.
  366 */
  367struct bpf_program {
  368	char *name;
  369	char *sec_name;
  370	size_t sec_idx;
  371	const struct bpf_sec_def *sec_def;
  372	/* this program's instruction offset (in number of instructions)
  373	 * within its containing ELF section
  374	 */
  375	size_t sec_insn_off;
  376	/* number of original instructions in ELF section belonging to this
  377	 * program, not taking into account subprogram instructions possible
  378	 * appended later during relocation
  379	 */
  380	size_t sec_insn_cnt;
  381	/* Offset (in number of instructions) of the start of instruction
  382	 * belonging to this BPF program  within its containing main BPF
  383	 * program. For the entry-point (main) BPF program, this is always
  384	 * zero. For a sub-program, this gets reset before each of main BPF
  385	 * programs are processed and relocated and is used to determined
  386	 * whether sub-program was already appended to the main program, and
  387	 * if yes, at which instruction offset.
  388	 */
  389	size_t sub_insn_off;
  390
  391	/* instructions that belong to BPF program; insns[0] is located at
  392	 * sec_insn_off instruction within its ELF section in ELF file, so
  393	 * when mapping ELF file instruction index to the local instruction,
  394	 * one needs to subtract sec_insn_off; and vice versa.
  395	 */
  396	struct bpf_insn *insns;
  397	/* actual number of instruction in this BPF program's image; for
  398	 * entry-point BPF programs this includes the size of main program
  399	 * itself plus all the used sub-programs, appended at the end
  400	 */
  401	size_t insns_cnt;
  402
  403	struct reloc_desc *reloc_desc;
  404	int nr_reloc;
  405
  406	/* BPF verifier log settings */
  407	char *log_buf;
  408	size_t log_size;
  409	__u32 log_level;
  410
  411	struct bpf_object *obj;
  412
  413	int fd;
  414	bool autoload;
  415	bool mark_btf_static;
  416	enum bpf_prog_type type;
  417	enum bpf_attach_type expected_attach_type;
  418
  419	int prog_ifindex;
  420	__u32 attach_btf_obj_fd;
  421	__u32 attach_btf_id;
  422	__u32 attach_prog_fd;
  423
  424	void *func_info;
  425	__u32 func_info_rec_size;
  426	__u32 func_info_cnt;
  427
  428	void *line_info;
  429	__u32 line_info_rec_size;
  430	__u32 line_info_cnt;
  431	__u32 prog_flags;
  432};
  433
  434struct bpf_struct_ops {
  435	const char *tname;
  436	const struct btf_type *type;
  437	struct bpf_program **progs;
  438	__u32 *kern_func_off;
  439	/* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
  440	void *data;
  441	/* e.g. struct bpf_struct_ops_tcp_congestion_ops in
  442	 *      btf_vmlinux's format.
  443	 * struct bpf_struct_ops_tcp_congestion_ops {
  444	 *	[... some other kernel fields ...]
  445	 *	struct tcp_congestion_ops data;
  446	 * }
  447	 * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
  448	 * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
  449	 * from "data".
  450	 */
  451	void *kern_vdata;
  452	__u32 type_id;
  453};
  454
  455#define DATA_SEC ".data"
  456#define BSS_SEC ".bss"
  457#define RODATA_SEC ".rodata"
  458#define KCONFIG_SEC ".kconfig"
  459#define KSYMS_SEC ".ksyms"
  460#define STRUCT_OPS_SEC ".struct_ops"
  461
  462enum libbpf_map_type {
  463	LIBBPF_MAP_UNSPEC,
  464	LIBBPF_MAP_DATA,
  465	LIBBPF_MAP_BSS,
  466	LIBBPF_MAP_RODATA,
  467	LIBBPF_MAP_KCONFIG,
  468};
  469
  470struct bpf_map_def {
  471	unsigned int type;
  472	unsigned int key_size;
  473	unsigned int value_size;
  474	unsigned int max_entries;
  475	unsigned int map_flags;
  476};
  477
  478struct bpf_map {
  479	struct bpf_object *obj;
  480	char *name;
  481	/* real_name is defined for special internal maps (.rodata*,
  482	 * .data*, .bss, .kconfig) and preserves their original ELF section
  483	 * name. This is important to be be able to find corresponding BTF
  484	 * DATASEC information.
  485	 */
  486	char *real_name;
  487	int fd;
  488	int sec_idx;
  489	size_t sec_offset;
  490	int map_ifindex;
  491	int inner_map_fd;
  492	struct bpf_map_def def;
  493	__u32 numa_node;
  494	__u32 btf_var_idx;
  495	__u32 btf_key_type_id;
  496	__u32 btf_value_type_id;
  497	__u32 btf_vmlinux_value_type_id;
  498	enum libbpf_map_type libbpf_type;
  499	void *mmaped;
  500	struct bpf_struct_ops *st_ops;
  501	struct bpf_map *inner_map;
  502	void **init_slots;
  503	int init_slots_sz;
  504	char *pin_path;
  505	bool pinned;
  506	bool reused;
  507	bool autocreate;
  508	__u64 map_extra;
  509};
  510
  511enum extern_type {
  512	EXT_UNKNOWN,
  513	EXT_KCFG,
  514	EXT_KSYM,
  515};
  516
  517enum kcfg_type {
  518	KCFG_UNKNOWN,
  519	KCFG_CHAR,
  520	KCFG_BOOL,
  521	KCFG_INT,
  522	KCFG_TRISTATE,
  523	KCFG_CHAR_ARR,
  524};
  525
  526struct extern_desc {
  527	enum extern_type type;
  528	int sym_idx;
  529	int btf_id;
  530	int sec_btf_id;
  531	const char *name;
  532	bool is_set;
  533	bool is_weak;
  534	union {
  535		struct {
  536			enum kcfg_type type;
  537			int sz;
  538			int align;
  539			int data_off;
  540			bool is_signed;
  541		} kcfg;
  542		struct {
  543			unsigned long long addr;
  544
  545			/* target btf_id of the corresponding kernel var. */
  546			int kernel_btf_obj_fd;
  547			int kernel_btf_id;
  548
  549			/* local btf_id of the ksym extern's type. */
  550			__u32 type_id;
  551			/* BTF fd index to be patched in for insn->off, this is
  552			 * 0 for vmlinux BTF, index in obj->fd_array for module
  553			 * BTF
  554			 */
  555			__s16 btf_fd_idx;
  556		} ksym;
  557	};
  558};
  559
  560struct module_btf {
  561	struct btf *btf;
  562	char *name;
  563	__u32 id;
  564	int fd;
  565	int fd_array_idx;
  566};
  567
  568enum sec_type {
  569	SEC_UNUSED = 0,
  570	SEC_RELO,
  571	SEC_BSS,
  572	SEC_DATA,
  573	SEC_RODATA,
  574};
  575
  576struct elf_sec_desc {
  577	enum sec_type sec_type;
  578	Elf64_Shdr *shdr;
  579	Elf_Data *data;
  580};
  581
  582struct elf_state {
  583	int fd;
  584	const void *obj_buf;
  585	size_t obj_buf_sz;
  586	Elf *elf;
  587	Elf64_Ehdr *ehdr;
  588	Elf_Data *symbols;
  589	Elf_Data *st_ops_data;
  590	size_t shstrndx; /* section index for section name strings */
  591	size_t strtabidx;
  592	struct elf_sec_desc *secs;
  593	int sec_cnt;
  594	int maps_shndx;
  595	int btf_maps_shndx;
  596	__u32 btf_maps_sec_btf_id;
  597	int text_shndx;
  598	int symbols_shndx;
  599	int st_ops_shndx;
  600};
  601
  602struct usdt_manager;
  603
  604struct bpf_object {
  605	char name[BPF_OBJ_NAME_LEN];
  606	char license[64];
  607	__u32 kern_version;
  608
  609	struct bpf_program *programs;
  610	size_t nr_programs;
  611	struct bpf_map *maps;
  612	size_t nr_maps;
  613	size_t maps_cap;
  614
  615	char *kconfig;
  616	struct extern_desc *externs;
  617	int nr_extern;
  618	int kconfig_map_idx;
  619
  620	bool loaded;
  621	bool has_subcalls;
  622	bool has_rodata;
  623
  624	struct bpf_gen *gen_loader;
  625
  626	/* Information when doing ELF related work. Only valid if efile.elf is not NULL */
  627	struct elf_state efile;
  628
  629	struct btf *btf;
  630	struct btf_ext *btf_ext;
  631
  632	/* Parse and load BTF vmlinux if any of the programs in the object need
  633	 * it at load time.
  634	 */
  635	struct btf *btf_vmlinux;
  636	/* Path to the custom BTF to be used for BPF CO-RE relocations as an
  637	 * override for vmlinux BTF.
  638	 */
  639	char *btf_custom_path;
  640	/* vmlinux BTF override for CO-RE relocations */
  641	struct btf *btf_vmlinux_override;
  642	/* Lazily initialized kernel module BTFs */
  643	struct module_btf *btf_modules;
  644	bool btf_modules_loaded;
  645	size_t btf_module_cnt;
  646	size_t btf_module_cap;
  647
  648	/* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
  649	char *log_buf;
  650	size_t log_size;
  651	__u32 log_level;
  652
  653	int *fd_array;
  654	size_t fd_array_cap;
  655	size_t fd_array_cnt;
  656
  657	struct usdt_manager *usdt_man;
  658
  659	char path[];
  660};
  661
  662static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
  663static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
  664static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
  665static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
  666static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
  667static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
  668static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
  669static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
  670static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
  671
  672void bpf_program__unload(struct bpf_program *prog)
  673{
  674	if (!prog)
  675		return;
  676
  677	zclose(prog->fd);
  678
  679	zfree(&prog->func_info);
  680	zfree(&prog->line_info);
  681}
  682
  683static void bpf_program__exit(struct bpf_program *prog)
  684{
  685	if (!prog)
  686		return;
  687
  688	bpf_program__unload(prog);
  689	zfree(&prog->name);
  690	zfree(&prog->sec_name);
  691	zfree(&prog->insns);
  692	zfree(&prog->reloc_desc);
  693
  694	prog->nr_reloc = 0;
  695	prog->insns_cnt = 0;
  696	prog->sec_idx = -1;
  697}
  698
  699static bool insn_is_subprog_call(const struct bpf_insn *insn)
  700{
  701	return BPF_CLASS(insn->code) == BPF_JMP &&
  702	       BPF_OP(insn->code) == BPF_CALL &&
  703	       BPF_SRC(insn->code) == BPF_K &&
  704	       insn->src_reg == BPF_PSEUDO_CALL &&
  705	       insn->dst_reg == 0 &&
  706	       insn->off == 0;
  707}
  708
  709static bool is_call_insn(const struct bpf_insn *insn)
  710{
  711	return insn->code == (BPF_JMP | BPF_CALL);
  712}
  713
  714static bool insn_is_pseudo_func(struct bpf_insn *insn)
  715{
  716	return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
  717}
  718
  719static int
  720bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
  721		      const char *name, size_t sec_idx, const char *sec_name,
  722		      size_t sec_off, void *insn_data, size_t insn_data_sz)
  723{
  724	if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
  725		pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
  726			sec_name, name, sec_off, insn_data_sz);
  727		return -EINVAL;
  728	}
  729
  730	memset(prog, 0, sizeof(*prog));
  731	prog->obj = obj;
  732
  733	prog->sec_idx = sec_idx;
  734	prog->sec_insn_off = sec_off / BPF_INSN_SZ;
  735	prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
  736	/* insns_cnt can later be increased by appending used subprograms */
  737	prog->insns_cnt = prog->sec_insn_cnt;
  738
  739	prog->type = BPF_PROG_TYPE_UNSPEC;
  740	prog->fd = -1;
  741
  742	/* libbpf's convention for SEC("?abc...") is that it's just like
  743	 * SEC("abc...") but the corresponding bpf_program starts out with
  744	 * autoload set to false.
  745	 */
  746	if (sec_name[0] == '?') {
  747		prog->autoload = false;
  748		/* from now on forget there was ? in section name */
  749		sec_name++;
  750	} else {
  751		prog->autoload = true;
  752	}
  753
  754	/* inherit object's log_level */
  755	prog->log_level = obj->log_level;
  756
  757	prog->sec_name = strdup(sec_name);
  758	if (!prog->sec_name)
  759		goto errout;
  760
  761	prog->name = strdup(name);
  762	if (!prog->name)
  763		goto errout;
  764
  765	prog->insns = malloc(insn_data_sz);
  766	if (!prog->insns)
  767		goto errout;
  768	memcpy(prog->insns, insn_data, insn_data_sz);
  769
  770	return 0;
  771errout:
  772	pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
  773	bpf_program__exit(prog);
  774	return -ENOMEM;
  775}
  776
  777static int
  778bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
  779			 const char *sec_name, int sec_idx)
  780{
  781	Elf_Data *symbols = obj->efile.symbols;
  782	struct bpf_program *prog, *progs;
  783	void *data = sec_data->d_buf;
  784	size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
  785	int nr_progs, err, i;
  786	const char *name;
  787	Elf64_Sym *sym;
  788
  789	progs = obj->programs;
  790	nr_progs = obj->nr_programs;
  791	nr_syms = symbols->d_size / sizeof(Elf64_Sym);
  792	sec_off = 0;
  793
  794	for (i = 0; i < nr_syms; i++) {
  795		sym = elf_sym_by_idx(obj, i);
  796
  797		if (sym->st_shndx != sec_idx)
  798			continue;
  799		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
  800			continue;
  801
  802		prog_sz = sym->st_size;
  803		sec_off = sym->st_value;
  804
  805		name = elf_sym_str(obj, sym->st_name);
  806		if (!name) {
  807			pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
  808				sec_name, sec_off);
  809			return -LIBBPF_ERRNO__FORMAT;
  810		}
  811
  812		if (sec_off + prog_sz > sec_sz) {
  813			pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
  814				sec_name, sec_off);
  815			return -LIBBPF_ERRNO__FORMAT;
  816		}
  817
  818		if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
  819			pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
  820			return -ENOTSUP;
  821		}
  822
  823		pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
  824			 sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
  825
  826		progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
  827		if (!progs) {
  828			/*
  829			 * In this case the original obj->programs
  830			 * is still valid, so don't need special treat for
  831			 * bpf_close_object().
  832			 */
  833			pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
  834				sec_name, name);
  835			return -ENOMEM;
  836		}
  837		obj->programs = progs;
  838
  839		prog = &progs[nr_progs];
  840
  841		err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
  842					    sec_off, data + sec_off, prog_sz);
  843		if (err)
  844			return err;
  845
  846		/* if function is a global/weak symbol, but has restricted
  847		 * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
  848		 * as static to enable more permissive BPF verification mode
  849		 * with more outside context available to BPF verifier
  850		 */
  851		if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
  852		    && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
  853			|| ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
  854			prog->mark_btf_static = true;
  855
  856		nr_progs++;
  857		obj->nr_programs = nr_progs;
  858	}
  859
  860	return 0;
  861}
  862
  863__u32 get_kernel_version(void)
  864{
  865	/* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
  866	 * but Ubuntu provides /proc/version_signature file, as described at
  867	 * https://ubuntu.com/kernel, with an example contents below, which we
  868	 * can use to get a proper LINUX_VERSION_CODE.
  869	 *
  870	 *   Ubuntu 5.4.0-12.15-generic 5.4.8
  871	 *
  872	 * In the above, 5.4.8 is what kernel is actually expecting, while
  873	 * uname() call will return 5.4.0 in info.release.
  874	 */
  875	const char *ubuntu_kver_file = "/proc/version_signature";
  876	__u32 major, minor, patch;
  877	struct utsname info;
  878
  879	if (access(ubuntu_kver_file, R_OK) == 0) {
  880		FILE *f;
  881
  882		f = fopen(ubuntu_kver_file, "r");
  883		if (f) {
  884			if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) {
  885				fclose(f);
  886				return KERNEL_VERSION(major, minor, patch);
  887			}
  888			fclose(f);
  889		}
  890		/* something went wrong, fall back to uname() approach */
  891	}
  892
  893	uname(&info);
  894	if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
  895		return 0;
  896	return KERNEL_VERSION(major, minor, patch);
  897}
  898
  899static const struct btf_member *
  900find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
  901{
  902	struct btf_member *m;
  903	int i;
  904
  905	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
  906		if (btf_member_bit_offset(t, i) == bit_offset)
  907			return m;
  908	}
  909
  910	return NULL;
  911}
  912
  913static const struct btf_member *
  914find_member_by_name(const struct btf *btf, const struct btf_type *t,
  915		    const char *name)
  916{
  917	struct btf_member *m;
  918	int i;
  919
  920	for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
  921		if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
  922			return m;
  923	}
  924
  925	return NULL;
  926}
  927
  928#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
  929static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
  930				   const char *name, __u32 kind);
  931
  932static int
  933find_struct_ops_kern_types(const struct btf *btf, const char *tname,
  934			   const struct btf_type **type, __u32 *type_id,
  935			   const struct btf_type **vtype, __u32 *vtype_id,
  936			   const struct btf_member **data_member)
  937{
  938	const struct btf_type *kern_type, *kern_vtype;
  939	const struct btf_member *kern_data_member;
  940	__s32 kern_vtype_id, kern_type_id;
  941	__u32 i;
  942
  943	kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
  944	if (kern_type_id < 0) {
  945		pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
  946			tname);
  947		return kern_type_id;
  948	}
  949	kern_type = btf__type_by_id(btf, kern_type_id);
  950
  951	/* Find the corresponding "map_value" type that will be used
  952	 * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
  953	 * find "struct bpf_struct_ops_tcp_congestion_ops" from the
  954	 * btf_vmlinux.
  955	 */
  956	kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
  957						tname, BTF_KIND_STRUCT);
  958	if (kern_vtype_id < 0) {
  959		pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
  960			STRUCT_OPS_VALUE_PREFIX, tname);
  961		return kern_vtype_id;
  962	}
  963	kern_vtype = btf__type_by_id(btf, kern_vtype_id);
  964
  965	/* Find "struct tcp_congestion_ops" from
  966	 * struct bpf_struct_ops_tcp_congestion_ops {
  967	 *	[ ... ]
  968	 *	struct tcp_congestion_ops data;
  969	 * }
  970	 */
  971	kern_data_member = btf_members(kern_vtype);
  972	for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
  973		if (kern_data_member->type == kern_type_id)
  974			break;
  975	}
  976	if (i == btf_vlen(kern_vtype)) {
  977		pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
  978			tname, STRUCT_OPS_VALUE_PREFIX, tname);
  979		return -EINVAL;
  980	}
  981
  982	*type = kern_type;
  983	*type_id = kern_type_id;
  984	*vtype = kern_vtype;
  985	*vtype_id = kern_vtype_id;
  986	*data_member = kern_data_member;
  987
  988	return 0;
  989}
  990
  991static bool bpf_map__is_struct_ops(const struct bpf_map *map)
  992{
  993	return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
  994}
  995
  996/* Init the map's fields that depend on kern_btf */
  997static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
  998					 const struct btf *btf,
  999					 const struct btf *kern_btf)
 1000{
 1001	const struct btf_member *member, *kern_member, *kern_data_member;
 1002	const struct btf_type *type, *kern_type, *kern_vtype;
 1003	__u32 i, kern_type_id, kern_vtype_id, kern_data_off;
 1004	struct bpf_struct_ops *st_ops;
 1005	void *data, *kern_data;
 1006	const char *tname;
 1007	int err;
 1008
 1009	st_ops = map->st_ops;
 1010	type = st_ops->type;
 1011	tname = st_ops->tname;
 1012	err = find_struct_ops_kern_types(kern_btf, tname,
 1013					 &kern_type, &kern_type_id,
 1014					 &kern_vtype, &kern_vtype_id,
 1015					 &kern_data_member);
 1016	if (err)
 1017		return err;
 1018
 1019	pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
 1020		 map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
 1021
 1022	map->def.value_size = kern_vtype->size;
 1023	map->btf_vmlinux_value_type_id = kern_vtype_id;
 1024
 1025	st_ops->kern_vdata = calloc(1, kern_vtype->size);
 1026	if (!st_ops->kern_vdata)
 1027		return -ENOMEM;
 1028
 1029	data = st_ops->data;
 1030	kern_data_off = kern_data_member->offset / 8;
 1031	kern_data = st_ops->kern_vdata + kern_data_off;
 1032
 1033	member = btf_members(type);
 1034	for (i = 0; i < btf_vlen(type); i++, member++) {
 1035		const struct btf_type *mtype, *kern_mtype;
 1036		__u32 mtype_id, kern_mtype_id;
 1037		void *mdata, *kern_mdata;
 1038		__s64 msize, kern_msize;
 1039		__u32 moff, kern_moff;
 1040		__u32 kern_member_idx;
 1041		const char *mname;
 1042
 1043		mname = btf__name_by_offset(btf, member->name_off);
 1044		kern_member = find_member_by_name(kern_btf, kern_type, mname);
 1045		if (!kern_member) {
 1046			pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
 1047				map->name, mname);
 1048			return -ENOTSUP;
 1049		}
 1050
 1051		kern_member_idx = kern_member - btf_members(kern_type);
 1052		if (btf_member_bitfield_size(type, i) ||
 1053		    btf_member_bitfield_size(kern_type, kern_member_idx)) {
 1054			pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
 1055				map->name, mname);
 1056			return -ENOTSUP;
 1057		}
 1058
 1059		moff = member->offset / 8;
 1060		kern_moff = kern_member->offset / 8;
 1061
 1062		mdata = data + moff;
 1063		kern_mdata = kern_data + kern_moff;
 1064
 1065		mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
 1066		kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
 1067						    &kern_mtype_id);
 1068		if (BTF_INFO_KIND(mtype->info) !=
 1069		    BTF_INFO_KIND(kern_mtype->info)) {
 1070			pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
 1071				map->name, mname, BTF_INFO_KIND(mtype->info),
 1072				BTF_INFO_KIND(kern_mtype->info));
 1073			return -ENOTSUP;
 1074		}
 1075
 1076		if (btf_is_ptr(mtype)) {
 1077			struct bpf_program *prog;
 1078
 1079			prog = st_ops->progs[i];
 1080			if (!prog)
 1081				continue;
 1082
 1083			kern_mtype = skip_mods_and_typedefs(kern_btf,
 1084							    kern_mtype->type,
 1085							    &kern_mtype_id);
 1086
 1087			/* mtype->type must be a func_proto which was
 1088			 * guaranteed in bpf_object__collect_st_ops_relos(),
 1089			 * so only check kern_mtype for func_proto here.
 1090			 */
 1091			if (!btf_is_func_proto(kern_mtype)) {
 1092				pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
 1093					map->name, mname);
 1094				return -ENOTSUP;
 1095			}
 1096
 1097			prog->attach_btf_id = kern_type_id;
 1098			prog->expected_attach_type = kern_member_idx;
 1099
 1100			st_ops->kern_func_off[i] = kern_data_off + kern_moff;
 1101
 1102			pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
 1103				 map->name, mname, prog->name, moff,
 1104				 kern_moff);
 1105
 1106			continue;
 1107		}
 1108
 1109		msize = btf__resolve_size(btf, mtype_id);
 1110		kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
 1111		if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
 1112			pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
 1113				map->name, mname, (ssize_t)msize,
 1114				(ssize_t)kern_msize);
 1115			return -ENOTSUP;
 1116		}
 1117
 1118		pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
 1119			 map->name, mname, (unsigned int)msize,
 1120			 moff, kern_moff);
 1121		memcpy(kern_mdata, mdata, msize);
 1122	}
 1123
 1124	return 0;
 1125}
 1126
 1127static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
 1128{
 1129	struct bpf_map *map;
 1130	size_t i;
 1131	int err;
 1132
 1133	for (i = 0; i < obj->nr_maps; i++) {
 1134		map = &obj->maps[i];
 1135
 1136		if (!bpf_map__is_struct_ops(map))
 1137			continue;
 1138
 1139		err = bpf_map__init_kern_struct_ops(map, obj->btf,
 1140						    obj->btf_vmlinux);
 1141		if (err)
 1142			return err;
 1143	}
 1144
 1145	return 0;
 1146}
 1147
 1148static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
 1149{
 1150	const struct btf_type *type, *datasec;
 1151	const struct btf_var_secinfo *vsi;
 1152	struct bpf_struct_ops *st_ops;
 1153	const char *tname, *var_name;
 1154	__s32 type_id, datasec_id;
 1155	const struct btf *btf;
 1156	struct bpf_map *map;
 1157	__u32 i;
 1158
 1159	if (obj->efile.st_ops_shndx == -1)
 1160		return 0;
 1161
 1162	btf = obj->btf;
 1163	datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
 1164					    BTF_KIND_DATASEC);
 1165	if (datasec_id < 0) {
 1166		pr_warn("struct_ops init: DATASEC %s not found\n",
 1167			STRUCT_OPS_SEC);
 1168		return -EINVAL;
 1169	}
 1170
 1171	datasec = btf__type_by_id(btf, datasec_id);
 1172	vsi = btf_var_secinfos(datasec);
 1173	for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
 1174		type = btf__type_by_id(obj->btf, vsi->type);
 1175		var_name = btf__name_by_offset(obj->btf, type->name_off);
 1176
 1177		type_id = btf__resolve_type(obj->btf, vsi->type);
 1178		if (type_id < 0) {
 1179			pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
 1180				vsi->type, STRUCT_OPS_SEC);
 1181			return -EINVAL;
 1182		}
 1183
 1184		type = btf__type_by_id(obj->btf, type_id);
 1185		tname = btf__name_by_offset(obj->btf, type->name_off);
 1186		if (!tname[0]) {
 1187			pr_warn("struct_ops init: anonymous type is not supported\n");
 1188			return -ENOTSUP;
 1189		}
 1190		if (!btf_is_struct(type)) {
 1191			pr_warn("struct_ops init: %s is not a struct\n", tname);
 1192			return -EINVAL;
 1193		}
 1194
 1195		map = bpf_object__add_map(obj);
 1196		if (IS_ERR(map))
 1197			return PTR_ERR(map);
 1198
 1199		map->sec_idx = obj->efile.st_ops_shndx;
 1200		map->sec_offset = vsi->offset;
 1201		map->name = strdup(var_name);
 1202		if (!map->name)
 1203			return -ENOMEM;
 1204
 1205		map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
 1206		map->def.key_size = sizeof(int);
 1207		map->def.value_size = type->size;
 1208		map->def.max_entries = 1;
 1209
 1210		map->st_ops = calloc(1, sizeof(*map->st_ops));
 1211		if (!map->st_ops)
 1212			return -ENOMEM;
 1213		st_ops = map->st_ops;
 1214		st_ops->data = malloc(type->size);
 1215		st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
 1216		st_ops->kern_func_off = malloc(btf_vlen(type) *
 1217					       sizeof(*st_ops->kern_func_off));
 1218		if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
 1219			return -ENOMEM;
 1220
 1221		if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
 1222			pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
 1223				var_name, STRUCT_OPS_SEC);
 1224			return -EINVAL;
 1225		}
 1226
 1227		memcpy(st_ops->data,
 1228		       obj->efile.st_ops_data->d_buf + vsi->offset,
 1229		       type->size);
 1230		st_ops->tname = tname;
 1231		st_ops->type = type;
 1232		st_ops->type_id = type_id;
 1233
 1234		pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
 1235			 tname, type_id, var_name, vsi->offset);
 1236	}
 1237
 1238	return 0;
 1239}
 1240
 1241static struct bpf_object *bpf_object__new(const char *path,
 1242					  const void *obj_buf,
 1243					  size_t obj_buf_sz,
 1244					  const char *obj_name)
 1245{
 1246	struct bpf_object *obj;
 1247	char *end;
 1248
 1249	obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
 1250	if (!obj) {
 1251		pr_warn("alloc memory failed for %s\n", path);
 1252		return ERR_PTR(-ENOMEM);
 1253	}
 1254
 1255	strcpy(obj->path, path);
 1256	if (obj_name) {
 1257		libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
 1258	} else {
 1259		/* Using basename() GNU version which doesn't modify arg. */
 1260		libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
 1261		end = strchr(obj->name, '.');
 1262		if (end)
 1263			*end = 0;
 1264	}
 1265
 1266	obj->efile.fd = -1;
 1267	/*
 1268	 * Caller of this function should also call
 1269	 * bpf_object__elf_finish() after data collection to return
 1270	 * obj_buf to user. If not, we should duplicate the buffer to
 1271	 * avoid user freeing them before elf finish.
 1272	 */
 1273	obj->efile.obj_buf = obj_buf;
 1274	obj->efile.obj_buf_sz = obj_buf_sz;
 1275	obj->efile.maps_shndx = -1;
 1276	obj->efile.btf_maps_shndx = -1;
 1277	obj->efile.st_ops_shndx = -1;
 1278	obj->kconfig_map_idx = -1;
 1279
 1280	obj->kern_version = get_kernel_version();
 1281	obj->loaded = false;
 1282
 1283	return obj;
 1284}
 1285
 1286static void bpf_object__elf_finish(struct bpf_object *obj)
 1287{
 1288	if (!obj->efile.elf)
 1289		return;
 1290
 1291	elf_end(obj->efile.elf);
 1292	obj->efile.elf = NULL;
 1293	obj->efile.symbols = NULL;
 1294	obj->efile.st_ops_data = NULL;
 1295
 1296	zfree(&obj->efile.secs);
 1297	obj->efile.sec_cnt = 0;
 1298	zclose(obj->efile.fd);
 1299	obj->efile.obj_buf = NULL;
 1300	obj->efile.obj_buf_sz = 0;
 1301}
 1302
 1303static int bpf_object__elf_init(struct bpf_object *obj)
 1304{
 1305	Elf64_Ehdr *ehdr;
 1306	int err = 0;
 1307	Elf *elf;
 1308
 1309	if (obj->efile.elf) {
 1310		pr_warn("elf: init internal error\n");
 1311		return -LIBBPF_ERRNO__LIBELF;
 1312	}
 1313
 1314	if (obj->efile.obj_buf_sz > 0) {
 1315		/* obj_buf should have been validated by bpf_object__open_mem(). */
 1316		elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
 1317	} else {
 1318		obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
 1319		if (obj->efile.fd < 0) {
 1320			char errmsg[STRERR_BUFSIZE], *cp;
 1321
 1322			err = -errno;
 1323			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 1324			pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
 1325			return err;
 1326		}
 1327
 1328		elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
 1329	}
 1330
 1331	if (!elf) {
 1332		pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
 1333		err = -LIBBPF_ERRNO__LIBELF;
 1334		goto errout;
 1335	}
 1336
 1337	obj->efile.elf = elf;
 1338
 1339	if (elf_kind(elf) != ELF_K_ELF) {
 1340		err = -LIBBPF_ERRNO__FORMAT;
 1341		pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
 1342		goto errout;
 1343	}
 1344
 1345	if (gelf_getclass(elf) != ELFCLASS64) {
 1346		err = -LIBBPF_ERRNO__FORMAT;
 1347		pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
 1348		goto errout;
 1349	}
 1350
 1351	obj->efile.ehdr = ehdr = elf64_getehdr(elf);
 1352	if (!obj->efile.ehdr) {
 1353		pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
 1354		err = -LIBBPF_ERRNO__FORMAT;
 1355		goto errout;
 1356	}
 1357
 1358	if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
 1359		pr_warn("elf: failed to get section names section index for %s: %s\n",
 1360			obj->path, elf_errmsg(-1));
 1361		err = -LIBBPF_ERRNO__FORMAT;
 1362		goto errout;
 1363	}
 1364
 1365	/* Elf is corrupted/truncated, avoid calling elf_strptr. */
 1366	if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
 1367		pr_warn("elf: failed to get section names strings from %s: %s\n",
 1368			obj->path, elf_errmsg(-1));
 1369		err = -LIBBPF_ERRNO__FORMAT;
 1370		goto errout;
 1371	}
 1372
 1373	/* Old LLVM set e_machine to EM_NONE */
 1374	if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
 1375		pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
 1376		err = -LIBBPF_ERRNO__FORMAT;
 1377		goto errout;
 1378	}
 1379
 1380	return 0;
 1381errout:
 1382	bpf_object__elf_finish(obj);
 1383	return err;
 1384}
 1385
 1386static int bpf_object__check_endianness(struct bpf_object *obj)
 1387{
 1388#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 1389	if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
 1390		return 0;
 1391#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 1392	if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
 1393		return 0;
 1394#else
 1395# error "Unrecognized __BYTE_ORDER__"
 1396#endif
 1397	pr_warn("elf: endianness mismatch in %s.\n", obj->path);
 1398	return -LIBBPF_ERRNO__ENDIAN;
 1399}
 1400
 1401static int
 1402bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
 1403{
 1404	/* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
 1405	 * go over allowed ELF data section buffer
 1406	 */
 1407	libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
 1408	pr_debug("license of %s is %s\n", obj->path, obj->license);
 1409	return 0;
 1410}
 1411
 1412static int
 1413bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
 1414{
 1415	__u32 kver;
 1416
 1417	if (size != sizeof(kver)) {
 1418		pr_warn("invalid kver section in %s\n", obj->path);
 1419		return -LIBBPF_ERRNO__FORMAT;
 1420	}
 1421	memcpy(&kver, data, sizeof(kver));
 1422	obj->kern_version = kver;
 1423	pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
 1424	return 0;
 1425}
 1426
 1427static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
 1428{
 1429	if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 1430	    type == BPF_MAP_TYPE_HASH_OF_MAPS)
 1431		return true;
 1432	return false;
 1433}
 1434
 1435static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
 1436{
 1437	Elf_Data *data;
 1438	Elf_Scn *scn;
 1439
 1440	if (!name)
 1441		return -EINVAL;
 1442
 1443	scn = elf_sec_by_name(obj, name);
 1444	data = elf_sec_data(obj, scn);
 1445	if (data) {
 1446		*size = data->d_size;
 1447		return 0; /* found it */
 1448	}
 1449
 1450	return -ENOENT;
 1451}
 1452
 1453static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off)
 1454{
 1455	Elf_Data *symbols = obj->efile.symbols;
 1456	const char *sname;
 1457	size_t si;
 1458
 1459	if (!name || !off)
 1460		return -EINVAL;
 1461
 1462	for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
 1463		Elf64_Sym *sym = elf_sym_by_idx(obj, si);
 1464
 1465		if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
 1466			continue;
 1467
 1468		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
 1469		    ELF64_ST_BIND(sym->st_info) != STB_WEAK)
 1470			continue;
 1471
 1472		sname = elf_sym_str(obj, sym->st_name);
 1473		if (!sname) {
 1474			pr_warn("failed to get sym name string for var %s\n", name);
 1475			return -EIO;
 1476		}
 1477		if (strcmp(name, sname) == 0) {
 1478			*off = sym->st_value;
 1479			return 0;
 1480		}
 1481	}
 1482
 1483	return -ENOENT;
 1484}
 1485
 1486static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
 1487{
 1488	struct bpf_map *map;
 1489	int err;
 1490
 1491	err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
 1492				sizeof(*obj->maps), obj->nr_maps + 1);
 1493	if (err)
 1494		return ERR_PTR(err);
 1495
 1496	map = &obj->maps[obj->nr_maps++];
 1497	map->obj = obj;
 1498	map->fd = -1;
 1499	map->inner_map_fd = -1;
 1500	map->autocreate = true;
 1501
 1502	return map;
 1503}
 1504
 1505static size_t bpf_map_mmap_sz(const struct bpf_map *map)
 1506{
 1507	long page_sz = sysconf(_SC_PAGE_SIZE);
 1508	size_t map_sz;
 1509
 1510	map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
 1511	map_sz = roundup(map_sz, page_sz);
 1512	return map_sz;
 1513}
 1514
 1515static char *internal_map_name(struct bpf_object *obj, const char *real_name)
 1516{
 1517	char map_name[BPF_OBJ_NAME_LEN], *p;
 1518	int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
 1519
 1520	/* This is one of the more confusing parts of libbpf for various
 1521	 * reasons, some of which are historical. The original idea for naming
 1522	 * internal names was to include as much of BPF object name prefix as
 1523	 * possible, so that it can be distinguished from similar internal
 1524	 * maps of a different BPF object.
 1525	 * As an example, let's say we have bpf_object named 'my_object_name'
 1526	 * and internal map corresponding to '.rodata' ELF section. The final
 1527	 * map name advertised to user and to the kernel will be
 1528	 * 'my_objec.rodata', taking first 8 characters of object name and
 1529	 * entire 7 characters of '.rodata'.
 1530	 * Somewhat confusingly, if internal map ELF section name is shorter
 1531	 * than 7 characters, e.g., '.bss', we still reserve 7 characters
 1532	 * for the suffix, even though we only have 4 actual characters, and
 1533	 * resulting map will be called 'my_objec.bss', not even using all 15
 1534	 * characters allowed by the kernel. Oh well, at least the truncated
 1535	 * object name is somewhat consistent in this case. But if the map
 1536	 * name is '.kconfig', we'll still have entirety of '.kconfig' added
 1537	 * (8 chars) and thus will be left with only first 7 characters of the
 1538	 * object name ('my_obje'). Happy guessing, user, that the final map
 1539	 * name will be "my_obje.kconfig".
 1540	 * Now, with libbpf starting to support arbitrarily named .rodata.*
 1541	 * and .data.* data sections, it's possible that ELF section name is
 1542	 * longer than allowed 15 chars, so we now need to be careful to take
 1543	 * only up to 15 first characters of ELF name, taking no BPF object
 1544	 * name characters at all. So '.rodata.abracadabra' will result in
 1545	 * '.rodata.abracad' kernel and user-visible name.
 1546	 * We need to keep this convoluted logic intact for .data, .bss and
 1547	 * .rodata maps, but for new custom .data.custom and .rodata.custom
 1548	 * maps we use their ELF names as is, not prepending bpf_object name
 1549	 * in front. We still need to truncate them to 15 characters for the
 1550	 * kernel. Full name can be recovered for such maps by using DATASEC
 1551	 * BTF type associated with such map's value type, though.
 1552	 */
 1553	if (sfx_len >= BPF_OBJ_NAME_LEN)
 1554		sfx_len = BPF_OBJ_NAME_LEN - 1;
 1555
 1556	/* if there are two or more dots in map name, it's a custom dot map */
 1557	if (strchr(real_name + 1, '.') != NULL)
 1558		pfx_len = 0;
 1559	else
 1560		pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
 1561
 1562	snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
 1563		 sfx_len, real_name);
 1564
 1565	/* sanitise map name to characters allowed by kernel */
 1566	for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
 1567		if (!isalnum(*p) && *p != '_' && *p != '.')
 1568			*p = '_';
 1569
 1570	return strdup(map_name);
 1571}
 1572
 1573static int
 1574bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map);
 1575
 1576static int
 1577bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 1578			      const char *real_name, int sec_idx, void *data, size_t data_sz)
 1579{
 1580	struct bpf_map_def *def;
 1581	struct bpf_map *map;
 1582	int err;
 1583
 1584	map = bpf_object__add_map(obj);
 1585	if (IS_ERR(map))
 1586		return PTR_ERR(map);
 1587
 1588	map->libbpf_type = type;
 1589	map->sec_idx = sec_idx;
 1590	map->sec_offset = 0;
 1591	map->real_name = strdup(real_name);
 1592	map->name = internal_map_name(obj, real_name);
 1593	if (!map->real_name || !map->name) {
 1594		zfree(&map->real_name);
 1595		zfree(&map->name);
 1596		return -ENOMEM;
 1597	}
 1598
 1599	def = &map->def;
 1600	def->type = BPF_MAP_TYPE_ARRAY;
 1601	def->key_size = sizeof(int);
 1602	def->value_size = data_sz;
 1603	def->max_entries = 1;
 1604	def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
 1605			 ? BPF_F_RDONLY_PROG : 0;
 1606	def->map_flags |= BPF_F_MMAPABLE;
 1607
 1608	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
 1609		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
 1610
 1611	map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
 1612			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 1613	if (map->mmaped == MAP_FAILED) {
 1614		err = -errno;
 1615		map->mmaped = NULL;
 1616		pr_warn("failed to alloc map '%s' content buffer: %d\n",
 1617			map->name, err);
 1618		zfree(&map->real_name);
 1619		zfree(&map->name);
 1620		return err;
 1621	}
 1622
 1623	/* failures are fine because of maps like .rodata.str1.1 */
 1624	(void) bpf_map_find_btf_info(obj, map);
 1625
 1626	if (data)
 1627		memcpy(map->mmaped, data, data_sz);
 1628
 1629	pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
 1630	return 0;
 1631}
 1632
 1633static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 1634{
 1635	struct elf_sec_desc *sec_desc;
 1636	const char *sec_name;
 1637	int err = 0, sec_idx;
 1638
 1639	/*
 1640	 * Populate obj->maps with libbpf internal maps.
 1641	 */
 1642	for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
 1643		sec_desc = &obj->efile.secs[sec_idx];
 1644
 1645		switch (sec_desc->sec_type) {
 1646		case SEC_DATA:
 1647			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1648			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
 1649							    sec_name, sec_idx,
 1650							    sec_desc->data->d_buf,
 1651							    sec_desc->data->d_size);
 1652			break;
 1653		case SEC_RODATA:
 1654			obj->has_rodata = true;
 1655			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1656			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
 1657							    sec_name, sec_idx,
 1658							    sec_desc->data->d_buf,
 1659							    sec_desc->data->d_size);
 1660			break;
 1661		case SEC_BSS:
 1662			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 1663			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
 1664							    sec_name, sec_idx,
 1665							    NULL,
 1666							    sec_desc->data->d_size);
 1667			break;
 1668		default:
 1669			/* skip */
 1670			break;
 1671		}
 1672		if (err)
 1673			return err;
 1674	}
 1675	return 0;
 1676}
 1677
 1678
 1679static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
 1680					       const void *name)
 1681{
 1682	int i;
 1683
 1684	for (i = 0; i < obj->nr_extern; i++) {
 1685		if (strcmp(obj->externs[i].name, name) == 0)
 1686			return &obj->externs[i];
 1687	}
 1688	return NULL;
 1689}
 1690
 1691static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
 1692			      char value)
 1693{
 1694	switch (ext->kcfg.type) {
 1695	case KCFG_BOOL:
 1696		if (value == 'm') {
 1697			pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
 1698				ext->name, value);
 1699			return -EINVAL;
 1700		}
 1701		*(bool *)ext_val = value == 'y' ? true : false;
 1702		break;
 1703	case KCFG_TRISTATE:
 1704		if (value == 'y')
 1705			*(enum libbpf_tristate *)ext_val = TRI_YES;
 1706		else if (value == 'm')
 1707			*(enum libbpf_tristate *)ext_val = TRI_MODULE;
 1708		else /* value == 'n' */
 1709			*(enum libbpf_tristate *)ext_val = TRI_NO;
 1710		break;
 1711	case KCFG_CHAR:
 1712		*(char *)ext_val = value;
 1713		break;
 1714	case KCFG_UNKNOWN:
 1715	case KCFG_INT:
 1716	case KCFG_CHAR_ARR:
 1717	default:
 1718		pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
 1719			ext->name, value);
 1720		return -EINVAL;
 1721	}
 1722	ext->is_set = true;
 1723	return 0;
 1724}
 1725
 1726static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
 1727			      const char *value)
 1728{
 1729	size_t len;
 1730
 1731	if (ext->kcfg.type != KCFG_CHAR_ARR) {
 1732		pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
 1733			ext->name, value);
 1734		return -EINVAL;
 1735	}
 1736
 1737	len = strlen(value);
 1738	if (value[len - 1] != '"') {
 1739		pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
 1740			ext->name, value);
 1741		return -EINVAL;
 1742	}
 1743
 1744	/* strip quotes */
 1745	len -= 2;
 1746	if (len >= ext->kcfg.sz) {
 1747		pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
 1748			ext->name, value, len, ext->kcfg.sz - 1);
 1749		len = ext->kcfg.sz - 1;
 1750	}
 1751	memcpy(ext_val, value + 1, len);
 1752	ext_val[len] = '\0';
 1753	ext->is_set = true;
 1754	return 0;
 1755}
 1756
 1757static int parse_u64(const char *value, __u64 *res)
 1758{
 1759	char *value_end;
 1760	int err;
 1761
 1762	errno = 0;
 1763	*res = strtoull(value, &value_end, 0);
 1764	if (errno) {
 1765		err = -errno;
 1766		pr_warn("failed to parse '%s' as integer: %d\n", value, err);
 1767		return err;
 1768	}
 1769	if (*value_end) {
 1770		pr_warn("failed to parse '%s' as integer completely\n", value);
 1771		return -EINVAL;
 1772	}
 1773	return 0;
 1774}
 1775
 1776static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
 1777{
 1778	int bit_sz = ext->kcfg.sz * 8;
 1779
 1780	if (ext->kcfg.sz == 8)
 1781		return true;
 1782
 1783	/* Validate that value stored in u64 fits in integer of `ext->sz`
 1784	 * bytes size without any loss of information. If the target integer
 1785	 * is signed, we rely on the following limits of integer type of
 1786	 * Y bits and subsequent transformation:
 1787	 *
 1788	 *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
 1789	 *            0 <= X + 2^(Y-1) <= 2^Y - 1
 1790	 *            0 <= X + 2^(Y-1) <  2^Y
 1791	 *
 1792	 *  For unsigned target integer, check that all the (64 - Y) bits are
 1793	 *  zero.
 1794	 */
 1795	if (ext->kcfg.is_signed)
 1796		return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
 1797	else
 1798		return (v >> bit_sz) == 0;
 1799}
 1800
 1801static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
 1802			      __u64 value)
 1803{
 1804	if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
 1805	    ext->kcfg.type != KCFG_BOOL) {
 1806		pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
 1807			ext->name, (unsigned long long)value);
 1808		return -EINVAL;
 1809	}
 1810	if (ext->kcfg.type == KCFG_BOOL && value > 1) {
 1811		pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
 1812			ext->name, (unsigned long long)value);
 1813		return -EINVAL;
 1814
 1815	}
 1816	if (!is_kcfg_value_in_range(ext, value)) {
 1817		pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
 1818			ext->name, (unsigned long long)value, ext->kcfg.sz);
 1819		return -ERANGE;
 1820	}
 1821	switch (ext->kcfg.sz) {
 1822		case 1: *(__u8 *)ext_val = value; break;
 1823		case 2: *(__u16 *)ext_val = value; break;
 1824		case 4: *(__u32 *)ext_val = value; break;
 1825		case 8: *(__u64 *)ext_val = value; break;
 1826		default:
 1827			return -EINVAL;
 1828	}
 1829	ext->is_set = true;
 1830	return 0;
 1831}
 1832
 1833static int bpf_object__process_kconfig_line(struct bpf_object *obj,
 1834					    char *buf, void *data)
 1835{
 1836	struct extern_desc *ext;
 1837	char *sep, *value;
 1838	int len, err = 0;
 1839	void *ext_val;
 1840	__u64 num;
 1841
 1842	if (!str_has_pfx(buf, "CONFIG_"))
 1843		return 0;
 1844
 1845	sep = strchr(buf, '=');
 1846	if (!sep) {
 1847		pr_warn("failed to parse '%s': no separator\n", buf);
 1848		return -EINVAL;
 1849	}
 1850
 1851	/* Trim ending '\n' */
 1852	len = strlen(buf);
 1853	if (buf[len - 1] == '\n')
 1854		buf[len - 1] = '\0';
 1855	/* Split on '=' and ensure that a value is present. */
 1856	*sep = '\0';
 1857	if (!sep[1]) {
 1858		*sep = '=';
 1859		pr_warn("failed to parse '%s': no value\n", buf);
 1860		return -EINVAL;
 1861	}
 1862
 1863	ext = find_extern_by_name(obj, buf);
 1864	if (!ext || ext->is_set)
 1865		return 0;
 1866
 1867	ext_val = data + ext->kcfg.data_off;
 1868	value = sep + 1;
 1869
 1870	switch (*value) {
 1871	case 'y': case 'n': case 'm':
 1872		err = set_kcfg_value_tri(ext, ext_val, *value);
 1873		break;
 1874	case '"':
 1875		err = set_kcfg_value_str(ext, ext_val, value);
 1876		break;
 1877	default:
 1878		/* assume integer */
 1879		err = parse_u64(value, &num);
 1880		if (err) {
 1881			pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
 1882			return err;
 1883		}
 1884		if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
 1885			pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
 1886			return -EINVAL;
 1887		}
 1888		err = set_kcfg_value_num(ext, ext_val, num);
 1889		break;
 1890	}
 1891	if (err)
 1892		return err;
 1893	pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
 1894	return 0;
 1895}
 1896
 1897static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
 1898{
 1899	char buf[PATH_MAX];
 1900	struct utsname uts;
 1901	int len, err = 0;
 1902	gzFile file;
 1903
 1904	uname(&uts);
 1905	len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
 1906	if (len < 0)
 1907		return -EINVAL;
 1908	else if (len >= PATH_MAX)
 1909		return -ENAMETOOLONG;
 1910
 1911	/* gzopen also accepts uncompressed files. */
 1912	file = gzopen(buf, "r");
 1913	if (!file)
 1914		file = gzopen("/proc/config.gz", "r");
 1915
 1916	if (!file) {
 1917		pr_warn("failed to open system Kconfig\n");
 1918		return -ENOENT;
 1919	}
 1920
 1921	while (gzgets(file, buf, sizeof(buf))) {
 1922		err = bpf_object__process_kconfig_line(obj, buf, data);
 1923		if (err) {
 1924			pr_warn("error parsing system Kconfig line '%s': %d\n",
 1925				buf, err);
 1926			goto out;
 1927		}
 1928	}
 1929
 1930out:
 1931	gzclose(file);
 1932	return err;
 1933}
 1934
 1935static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
 1936					const char *config, void *data)
 1937{
 1938	char buf[PATH_MAX];
 1939	int err = 0;
 1940	FILE *file;
 1941
 1942	file = fmemopen((void *)config, strlen(config), "r");
 1943	if (!file) {
 1944		err = -errno;
 1945		pr_warn("failed to open in-memory Kconfig: %d\n", err);
 1946		return err;
 1947	}
 1948
 1949	while (fgets(buf, sizeof(buf), file)) {
 1950		err = bpf_object__process_kconfig_line(obj, buf, data);
 1951		if (err) {
 1952			pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
 1953				buf, err);
 1954			break;
 1955		}
 1956	}
 1957
 1958	fclose(file);
 1959	return err;
 1960}
 1961
 1962static int bpf_object__init_kconfig_map(struct bpf_object *obj)
 1963{
 1964	struct extern_desc *last_ext = NULL, *ext;
 1965	size_t map_sz;
 1966	int i, err;
 1967
 1968	for (i = 0; i < obj->nr_extern; i++) {
 1969		ext = &obj->externs[i];
 1970		if (ext->type == EXT_KCFG)
 1971			last_ext = ext;
 1972	}
 1973
 1974	if (!last_ext)
 1975		return 0;
 1976
 1977	map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
 1978	err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
 1979					    ".kconfig", obj->efile.symbols_shndx,
 1980					    NULL, map_sz);
 1981	if (err)
 1982		return err;
 1983
 1984	obj->kconfig_map_idx = obj->nr_maps - 1;
 1985
 1986	return 0;
 1987}
 1988
 1989const struct btf_type *
 1990skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
 1991{
 1992	const struct btf_type *t = btf__type_by_id(btf, id);
 1993
 1994	if (res_id)
 1995		*res_id = id;
 1996
 1997	while (btf_is_mod(t) || btf_is_typedef(t)) {
 1998		if (res_id)
 1999			*res_id = t->type;
 2000		t = btf__type_by_id(btf, t->type);
 2001	}
 2002
 2003	return t;
 2004}
 2005
 2006static const struct btf_type *
 2007resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
 2008{
 2009	const struct btf_type *t;
 2010
 2011	t = skip_mods_and_typedefs(btf, id, NULL);
 2012	if (!btf_is_ptr(t))
 2013		return NULL;
 2014
 2015	t = skip_mods_and_typedefs(btf, t->type, res_id);
 2016
 2017	return btf_is_func_proto(t) ? t : NULL;
 2018}
 2019
 2020static const char *__btf_kind_str(__u16 kind)
 2021{
 2022	switch (kind) {
 2023	case BTF_KIND_UNKN: return "void";
 2024	case BTF_KIND_INT: return "int";
 2025	case BTF_KIND_PTR: return "ptr";
 2026	case BTF_KIND_ARRAY: return "array";
 2027	case BTF_KIND_STRUCT: return "struct";
 2028	case BTF_KIND_UNION: return "union";
 2029	case BTF_KIND_ENUM: return "enum";
 2030	case BTF_KIND_FWD: return "fwd";
 2031	case BTF_KIND_TYPEDEF: return "typedef";
 2032	case BTF_KIND_VOLATILE: return "volatile";
 2033	case BTF_KIND_CONST: return "const";
 2034	case BTF_KIND_RESTRICT: return "restrict";
 2035	case BTF_KIND_FUNC: return "func";
 2036	case BTF_KIND_FUNC_PROTO: return "func_proto";
 2037	case BTF_KIND_VAR: return "var";
 2038	case BTF_KIND_DATASEC: return "datasec";
 2039	case BTF_KIND_FLOAT: return "float";
 2040	case BTF_KIND_DECL_TAG: return "decl_tag";
 2041	case BTF_KIND_TYPE_TAG: return "type_tag";
 2042	case BTF_KIND_ENUM64: return "enum64";
 2043	default: return "unknown";
 2044	}
 2045}
 2046
 2047const char *btf_kind_str(const struct btf_type *t)
 2048{
 2049	return __btf_kind_str(btf_kind(t));
 2050}
 2051
 2052/*
 2053 * Fetch integer attribute of BTF map definition. Such attributes are
 2054 * represented using a pointer to an array, in which dimensionality of array
 2055 * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
 2056 * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
 2057 * type definition, while using only sizeof(void *) space in ELF data section.
 2058 */
 2059static bool get_map_field_int(const char *map_name, const struct btf *btf,
 2060			      const struct btf_member *m, __u32 *res)
 2061{
 2062	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
 2063	const char *name = btf__name_by_offset(btf, m->name_off);
 2064	const struct btf_array *arr_info;
 2065	const struct btf_type *arr_t;
 2066
 2067	if (!btf_is_ptr(t)) {
 2068		pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
 2069			map_name, name, btf_kind_str(t));
 2070		return false;
 2071	}
 2072
 2073	arr_t = btf__type_by_id(btf, t->type);
 2074	if (!arr_t) {
 2075		pr_warn("map '%s': attr '%s': type [%u] not found.\n",
 2076			map_name, name, t->type);
 2077		return false;
 2078	}
 2079	if (!btf_is_array(arr_t)) {
 2080		pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
 2081			map_name, name, btf_kind_str(arr_t));
 2082		return false;
 2083	}
 2084	arr_info = btf_array(arr_t);
 2085	*res = arr_info->nelems;
 2086	return true;
 2087}
 2088
 2089static int build_map_pin_path(struct bpf_map *map, const char *path)
 2090{
 2091	char buf[PATH_MAX];
 2092	int len;
 2093
 2094	if (!path)
 2095		path = "/sys/fs/bpf";
 2096
 2097	len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
 2098	if (len < 0)
 2099		return -EINVAL;
 2100	else if (len >= PATH_MAX)
 2101		return -ENAMETOOLONG;
 2102
 2103	return bpf_map__set_pin_path(map, buf);
 2104}
 2105
 2106/* should match definition in bpf_helpers.h */
 2107enum libbpf_pin_type {
 2108	LIBBPF_PIN_NONE,
 2109	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
 2110	LIBBPF_PIN_BY_NAME,
 2111};
 2112
 2113int parse_btf_map_def(const char *map_name, struct btf *btf,
 2114		      const struct btf_type *def_t, bool strict,
 2115		      struct btf_map_def *map_def, struct btf_map_def *inner_def)
 2116{
 2117	const struct btf_type *t;
 2118	const struct btf_member *m;
 2119	bool is_inner = inner_def == NULL;
 2120	int vlen, i;
 2121
 2122	vlen = btf_vlen(def_t);
 2123	m = btf_members(def_t);
 2124	for (i = 0; i < vlen; i++, m++) {
 2125		const char *name = btf__name_by_offset(btf, m->name_off);
 2126
 2127		if (!name) {
 2128			pr_warn("map '%s': invalid field #%d.\n", map_name, i);
 2129			return -EINVAL;
 2130		}
 2131		if (strcmp(name, "type") == 0) {
 2132			if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
 2133				return -EINVAL;
 2134			map_def->parts |= MAP_DEF_MAP_TYPE;
 2135		} else if (strcmp(name, "max_entries") == 0) {
 2136			if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
 2137				return -EINVAL;
 2138			map_def->parts |= MAP_DEF_MAX_ENTRIES;
 2139		} else if (strcmp(name, "map_flags") == 0) {
 2140			if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
 2141				return -EINVAL;
 2142			map_def->parts |= MAP_DEF_MAP_FLAGS;
 2143		} else if (strcmp(name, "numa_node") == 0) {
 2144			if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
 2145				return -EINVAL;
 2146			map_def->parts |= MAP_DEF_NUMA_NODE;
 2147		} else if (strcmp(name, "key_size") == 0) {
 2148			__u32 sz;
 2149
 2150			if (!get_map_field_int(map_name, btf, m, &sz))
 2151				return -EINVAL;
 2152			if (map_def->key_size && map_def->key_size != sz) {
 2153				pr_warn("map '%s': conflicting key size %u != %u.\n",
 2154					map_name, map_def->key_size, sz);
 2155				return -EINVAL;
 2156			}
 2157			map_def->key_size = sz;
 2158			map_def->parts |= MAP_DEF_KEY_SIZE;
 2159		} else if (strcmp(name, "key") == 0) {
 2160			__s64 sz;
 2161
 2162			t = btf__type_by_id(btf, m->type);
 2163			if (!t) {
 2164				pr_warn("map '%s': key type [%d] not found.\n",
 2165					map_name, m->type);
 2166				return -EINVAL;
 2167			}
 2168			if (!btf_is_ptr(t)) {
 2169				pr_warn("map '%s': key spec is not PTR: %s.\n",
 2170					map_name, btf_kind_str(t));
 2171				return -EINVAL;
 2172			}
 2173			sz = btf__resolve_size(btf, t->type);
 2174			if (sz < 0) {
 2175				pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
 2176					map_name, t->type, (ssize_t)sz);
 2177				return sz;
 2178			}
 2179			if (map_def->key_size && map_def->key_size != sz) {
 2180				pr_warn("map '%s': conflicting key size %u != %zd.\n",
 2181					map_name, map_def->key_size, (ssize_t)sz);
 2182				return -EINVAL;
 2183			}
 2184			map_def->key_size = sz;
 2185			map_def->key_type_id = t->type;
 2186			map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
 2187		} else if (strcmp(name, "value_size") == 0) {
 2188			__u32 sz;
 2189
 2190			if (!get_map_field_int(map_name, btf, m, &sz))
 2191				return -EINVAL;
 2192			if (map_def->value_size && map_def->value_size != sz) {
 2193				pr_warn("map '%s': conflicting value size %u != %u.\n",
 2194					map_name, map_def->value_size, sz);
 2195				return -EINVAL;
 2196			}
 2197			map_def->value_size = sz;
 2198			map_def->parts |= MAP_DEF_VALUE_SIZE;
 2199		} else if (strcmp(name, "value") == 0) {
 2200			__s64 sz;
 2201
 2202			t = btf__type_by_id(btf, m->type);
 2203			if (!t) {
 2204				pr_warn("map '%s': value type [%d] not found.\n",
 2205					map_name, m->type);
 2206				return -EINVAL;
 2207			}
 2208			if (!btf_is_ptr(t)) {
 2209				pr_warn("map '%s': value spec is not PTR: %s.\n",
 2210					map_name, btf_kind_str(t));
 2211				return -EINVAL;
 2212			}
 2213			sz = btf__resolve_size(btf, t->type);
 2214			if (sz < 0) {
 2215				pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
 2216					map_name, t->type, (ssize_t)sz);
 2217				return sz;
 2218			}
 2219			if (map_def->value_size && map_def->value_size != sz) {
 2220				pr_warn("map '%s': conflicting value size %u != %zd.\n",
 2221					map_name, map_def->value_size, (ssize_t)sz);
 2222				return -EINVAL;
 2223			}
 2224			map_def->value_size = sz;
 2225			map_def->value_type_id = t->type;
 2226			map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
 2227		}
 2228		else if (strcmp(name, "values") == 0) {
 2229			bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
 2230			bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
 2231			const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
 2232			char inner_map_name[128];
 2233			int err;
 2234
 2235			if (is_inner) {
 2236				pr_warn("map '%s': multi-level inner maps not supported.\n",
 2237					map_name);
 2238				return -ENOTSUP;
 2239			}
 2240			if (i != vlen - 1) {
 2241				pr_warn("map '%s': '%s' member should be last.\n",
 2242					map_name, name);
 2243				return -EINVAL;
 2244			}
 2245			if (!is_map_in_map && !is_prog_array) {
 2246				pr_warn("map '%s': should be map-in-map or prog-array.\n",
 2247					map_name);
 2248				return -ENOTSUP;
 2249			}
 2250			if (map_def->value_size && map_def->value_size != 4) {
 2251				pr_warn("map '%s': conflicting value size %u != 4.\n",
 2252					map_name, map_def->value_size);
 2253				return -EINVAL;
 2254			}
 2255			map_def->value_size = 4;
 2256			t = btf__type_by_id(btf, m->type);
 2257			if (!t) {
 2258				pr_warn("map '%s': %s type [%d] not found.\n",
 2259					map_name, desc, m->type);
 2260				return -EINVAL;
 2261			}
 2262			if (!btf_is_array(t) || btf_array(t)->nelems) {
 2263				pr_warn("map '%s': %s spec is not a zero-sized array.\n",
 2264					map_name, desc);
 2265				return -EINVAL;
 2266			}
 2267			t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
 2268			if (!btf_is_ptr(t)) {
 2269				pr_warn("map '%s': %s def is of unexpected kind %s.\n",
 2270					map_name, desc, btf_kind_str(t));
 2271				return -EINVAL;
 2272			}
 2273			t = skip_mods_and_typedefs(btf, t->type, NULL);
 2274			if (is_prog_array) {
 2275				if (!btf_is_func_proto(t)) {
 2276					pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
 2277						map_name, btf_kind_str(t));
 2278					return -EINVAL;
 2279				}
 2280				continue;
 2281			}
 2282			if (!btf_is_struct(t)) {
 2283				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
 2284					map_name, btf_kind_str(t));
 2285				return -EINVAL;
 2286			}
 2287
 2288			snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
 2289			err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
 2290			if (err)
 2291				return err;
 2292
 2293			map_def->parts |= MAP_DEF_INNER_MAP;
 2294		} else if (strcmp(name, "pinning") == 0) {
 2295			__u32 val;
 2296
 2297			if (is_inner) {
 2298				pr_warn("map '%s': inner def can't be pinned.\n", map_name);
 2299				return -EINVAL;
 2300			}
 2301			if (!get_map_field_int(map_name, btf, m, &val))
 2302				return -EINVAL;
 2303			if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
 2304				pr_warn("map '%s': invalid pinning value %u.\n",
 2305					map_name, val);
 2306				return -EINVAL;
 2307			}
 2308			map_def->pinning = val;
 2309			map_def->parts |= MAP_DEF_PINNING;
 2310		} else if (strcmp(name, "map_extra") == 0) {
 2311			__u32 map_extra;
 2312
 2313			if (!get_map_field_int(map_name, btf, m, &map_extra))
 2314				return -EINVAL;
 2315			map_def->map_extra = map_extra;
 2316			map_def->parts |= MAP_DEF_MAP_EXTRA;
 2317		} else {
 2318			if (strict) {
 2319				pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
 2320				return -ENOTSUP;
 2321			}
 2322			pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
 2323		}
 2324	}
 2325
 2326	if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
 2327		pr_warn("map '%s': map type isn't specified.\n", map_name);
 2328		return -EINVAL;
 2329	}
 2330
 2331	return 0;
 2332}
 2333
 2334static size_t adjust_ringbuf_sz(size_t sz)
 2335{
 2336	__u32 page_sz = sysconf(_SC_PAGE_SIZE);
 2337	__u32 mul;
 2338
 2339	/* if user forgot to set any size, make sure they see error */
 2340	if (sz == 0)
 2341		return 0;
 2342	/* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
 2343	 * a power-of-2 multiple of kernel's page size. If user diligently
 2344	 * satisified these conditions, pass the size through.
 2345	 */
 2346	if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
 2347		return sz;
 2348
 2349	/* Otherwise find closest (page_sz * power_of_2) product bigger than
 2350	 * user-set size to satisfy both user size request and kernel
 2351	 * requirements and substitute correct max_entries for map creation.
 2352	 */
 2353	for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
 2354		if (mul * page_sz > sz)
 2355			return mul * page_sz;
 2356	}
 2357
 2358	/* if it's impossible to satisfy the conditions (i.e., user size is
 2359	 * very close to UINT_MAX but is not a power-of-2 multiple of
 2360	 * page_size) then just return original size and let kernel reject it
 2361	 */
 2362	return sz;
 2363}
 2364
 2365static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
 2366{
 2367	map->def.type = def->map_type;
 2368	map->def.key_size = def->key_size;
 2369	map->def.value_size = def->value_size;
 2370	map->def.max_entries = def->max_entries;
 2371	map->def.map_flags = def->map_flags;
 2372	map->map_extra = def->map_extra;
 2373
 2374	map->numa_node = def->numa_node;
 2375	map->btf_key_type_id = def->key_type_id;
 2376	map->btf_value_type_id = def->value_type_id;
 2377
 2378	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
 2379	if (map->def.type == BPF_MAP_TYPE_RINGBUF)
 2380		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
 2381
 2382	if (def->parts & MAP_DEF_MAP_TYPE)
 2383		pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
 2384
 2385	if (def->parts & MAP_DEF_KEY_TYPE)
 2386		pr_debug("map '%s': found key [%u], sz = %u.\n",
 2387			 map->name, def->key_type_id, def->key_size);
 2388	else if (def->parts & MAP_DEF_KEY_SIZE)
 2389		pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
 2390
 2391	if (def->parts & MAP_DEF_VALUE_TYPE)
 2392		pr_debug("map '%s': found value [%u], sz = %u.\n",
 2393			 map->name, def->value_type_id, def->value_size);
 2394	else if (def->parts & MAP_DEF_VALUE_SIZE)
 2395		pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
 2396
 2397	if (def->parts & MAP_DEF_MAX_ENTRIES)
 2398		pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
 2399	if (def->parts & MAP_DEF_MAP_FLAGS)
 2400		pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
 2401	if (def->parts & MAP_DEF_MAP_EXTRA)
 2402		pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
 2403			 (unsigned long long)def->map_extra);
 2404	if (def->parts & MAP_DEF_PINNING)
 2405		pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
 2406	if (def->parts & MAP_DEF_NUMA_NODE)
 2407		pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
 2408
 2409	if (def->parts & MAP_DEF_INNER_MAP)
 2410		pr_debug("map '%s': found inner map definition.\n", map->name);
 2411}
 2412
 2413static const char *btf_var_linkage_str(__u32 linkage)
 2414{
 2415	switch (linkage) {
 2416	case BTF_VAR_STATIC: return "static";
 2417	case BTF_VAR_GLOBAL_ALLOCATED: return "global";
 2418	case BTF_VAR_GLOBAL_EXTERN: return "extern";
 2419	default: return "unknown";
 2420	}
 2421}
 2422
 2423static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 2424					 const struct btf_type *sec,
 2425					 int var_idx, int sec_idx,
 2426					 const Elf_Data *data, bool strict,
 2427					 const char *pin_root_path)
 2428{
 2429	struct btf_map_def map_def = {}, inner_def = {};
 2430	const struct btf_type *var, *def;
 2431	const struct btf_var_secinfo *vi;
 2432	const struct btf_var *var_extra;
 2433	const char *map_name;
 2434	struct bpf_map *map;
 2435	int err;
 2436
 2437	vi = btf_var_secinfos(sec) + var_idx;
 2438	var = btf__type_by_id(obj->btf, vi->type);
 2439	var_extra = btf_var(var);
 2440	map_name = btf__name_by_offset(obj->btf, var->name_off);
 2441
 2442	if (map_name == NULL || map_name[0] == '\0') {
 2443		pr_warn("map #%d: empty name.\n", var_idx);
 2444		return -EINVAL;
 2445	}
 2446	if ((__u64)vi->offset + vi->size > data->d_size) {
 2447		pr_warn("map '%s' BTF data is corrupted.\n", map_name);
 2448		return -EINVAL;
 2449	}
 2450	if (!btf_is_var(var)) {
 2451		pr_warn("map '%s': unexpected var kind %s.\n",
 2452			map_name, btf_kind_str(var));
 2453		return -EINVAL;
 2454	}
 2455	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
 2456		pr_warn("map '%s': unsupported map linkage %s.\n",
 2457			map_name, btf_var_linkage_str(var_extra->linkage));
 2458		return -EOPNOTSUPP;
 2459	}
 2460
 2461	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 2462	if (!btf_is_struct(def)) {
 2463		pr_warn("map '%s': unexpected def kind %s.\n",
 2464			map_name, btf_kind_str(var));
 2465		return -EINVAL;
 2466	}
 2467	if (def->size > vi->size) {
 2468		pr_warn("map '%s': invalid def size.\n", map_name);
 2469		return -EINVAL;
 2470	}
 2471
 2472	map = bpf_object__add_map(obj);
 2473	if (IS_ERR(map))
 2474		return PTR_ERR(map);
 2475	map->name = strdup(map_name);
 2476	if (!map->name) {
 2477		pr_warn("map '%s': failed to alloc map name.\n", map_name);
 2478		return -ENOMEM;
 2479	}
 2480	map->libbpf_type = LIBBPF_MAP_UNSPEC;
 2481	map->def.type = BPF_MAP_TYPE_UNSPEC;
 2482	map->sec_idx = sec_idx;
 2483	map->sec_offset = vi->offset;
 2484	map->btf_var_idx = var_idx;
 2485	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
 2486		 map_name, map->sec_idx, map->sec_offset);
 2487
 2488	err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
 2489	if (err)
 2490		return err;
 2491
 2492	fill_map_from_def(map, &map_def);
 2493
 2494	if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
 2495		err = build_map_pin_path(map, pin_root_path);
 2496		if (err) {
 2497			pr_warn("map '%s': couldn't build pin path.\n", map->name);
 2498			return err;
 2499		}
 2500	}
 2501
 2502	if (map_def.parts & MAP_DEF_INNER_MAP) {
 2503		map->inner_map = calloc(1, sizeof(*map->inner_map));
 2504		if (!map->inner_map)
 2505			return -ENOMEM;
 2506		map->inner_map->fd = -1;
 2507		map->inner_map->sec_idx = sec_idx;
 2508		map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
 2509		if (!map->inner_map->name)
 2510			return -ENOMEM;
 2511		sprintf(map->inner_map->name, "%s.inner", map_name);
 2512
 2513		fill_map_from_def(map->inner_map, &inner_def);
 2514	}
 2515
 2516	err = bpf_map_find_btf_info(obj, map);
 2517	if (err)
 2518		return err;
 2519
 2520	return 0;
 2521}
 2522
 2523static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 2524					  const char *pin_root_path)
 2525{
 2526	const struct btf_type *sec = NULL;
 2527	int nr_types, i, vlen, err;
 2528	const struct btf_type *t;
 2529	const char *name;
 2530	Elf_Data *data;
 2531	Elf_Scn *scn;
 2532
 2533	if (obj->efile.btf_maps_shndx < 0)
 2534		return 0;
 2535
 2536	scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
 2537	data = elf_sec_data(obj, scn);
 2538	if (!scn || !data) {
 2539		pr_warn("elf: failed to get %s map definitions for %s\n",
 2540			MAPS_ELF_SEC, obj->path);
 2541		return -EINVAL;
 2542	}
 2543
 2544	nr_types = btf__type_cnt(obj->btf);
 2545	for (i = 1; i < nr_types; i++) {
 2546		t = btf__type_by_id(obj->btf, i);
 2547		if (!btf_is_datasec(t))
 2548			continue;
 2549		name = btf__name_by_offset(obj->btf, t->name_off);
 2550		if (strcmp(name, MAPS_ELF_SEC) == 0) {
 2551			sec = t;
 2552			obj->efile.btf_maps_sec_btf_id = i;
 2553			break;
 2554		}
 2555	}
 2556
 2557	if (!sec) {
 2558		pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
 2559		return -ENOENT;
 2560	}
 2561
 2562	vlen = btf_vlen(sec);
 2563	for (i = 0; i < vlen; i++) {
 2564		err = bpf_object__init_user_btf_map(obj, sec, i,
 2565						    obj->efile.btf_maps_shndx,
 2566						    data, strict,
 2567						    pin_root_path);
 2568		if (err)
 2569			return err;
 2570	}
 2571
 2572	return 0;
 2573}
 2574
 2575static int bpf_object__init_maps(struct bpf_object *obj,
 2576				 const struct bpf_object_open_opts *opts)
 2577{
 2578	const char *pin_root_path;
 2579	bool strict;
 2580	int err = 0;
 2581
 2582	strict = !OPTS_GET(opts, relaxed_maps, false);
 2583	pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
 2584
 2585	err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
 2586	err = err ?: bpf_object__init_global_data_maps(obj);
 2587	err = err ?: bpf_object__init_kconfig_map(obj);
 2588	err = err ?: bpf_object__init_struct_ops_maps(obj);
 2589
 2590	return err;
 2591}
 2592
 2593static bool section_have_execinstr(struct bpf_object *obj, int idx)
 2594{
 2595	Elf64_Shdr *sh;
 2596
 2597	sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
 2598	if (!sh)
 2599		return false;
 2600
 2601	return sh->sh_flags & SHF_EXECINSTR;
 2602}
 2603
 2604static bool btf_needs_sanitization(struct bpf_object *obj)
 2605{
 2606	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
 2607	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 2608	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 2609	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 2610	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
 2611	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
 2612	bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
 2613
 2614	return !has_func || !has_datasec || !has_func_global || !has_float ||
 2615	       !has_decl_tag || !has_type_tag || !has_enum64;
 2616}
 2617
 2618static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 2619{
 2620	bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
 2621	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 2622	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 2623	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
 2624	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
 2625	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
 2626	bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
 2627	int enum64_placeholder_id = 0;
 2628	struct btf_type *t;
 2629	int i, j, vlen;
 2630
 2631	for (i = 1; i < btf__type_cnt(btf); i++) {
 2632		t = (struct btf_type *)btf__type_by_id(btf, i);
 2633
 2634		if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
 2635			/* replace VAR/DECL_TAG with INT */
 2636			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
 2637			/*
 2638			 * using size = 1 is the safest choice, 4 will be too
 2639			 * big and cause kernel BTF validation failure if
 2640			 * original variable took less than 4 bytes
 2641			 */
 2642			t->size = 1;
 2643			*(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
 2644		} else if (!has_datasec && btf_is_datasec(t)) {
 2645			/* replace DATASEC with STRUCT */
 2646			const struct btf_var_secinfo *v = btf_var_secinfos(t);
 2647			struct btf_member *m = btf_members(t);
 2648			struct btf_type *vt;
 2649			char *name;
 2650
 2651			name = (char *)btf__name_by_offset(btf, t->name_off);
 2652			while (*name) {
 2653				if (*name == '.')
 2654					*name = '_';
 2655				name++;
 2656			}
 2657
 2658			vlen = btf_vlen(t);
 2659			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
 2660			for (j = 0; j < vlen; j++, v++, m++) {
 2661				/* order of field assignments is important */
 2662				m->offset = v->offset * 8;
 2663				m->type = v->type;
 2664				/* preserve variable name as member name */
 2665				vt = (void *)btf__type_by_id(btf, v->type);
 2666				m->name_off = vt->name_off;
 2667			}
 2668		} else if (!has_func && btf_is_func_proto(t)) {
 2669			/* replace FUNC_PROTO with ENUM */
 2670			vlen = btf_vlen(t);
 2671			t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
 2672			t->size = sizeof(__u32); /* kernel enforced */
 2673		} else if (!has_func && btf_is_func(t)) {
 2674			/* replace FUNC with TYPEDEF */
 2675			t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
 2676		} else if (!has_func_global && btf_is_func(t)) {
 2677			/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
 2678			t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
 2679		} else if (!has_float && btf_is_float(t)) {
 2680			/* replace FLOAT with an equally-sized empty STRUCT;
 2681			 * since C compilers do not accept e.g. "float" as a
 2682			 * valid struct name, make it anonymous
 2683			 */
 2684			t->name_off = 0;
 2685			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
 2686		} else if (!has_type_tag && btf_is_type_tag(t)) {
 2687			/* replace TYPE_TAG with a CONST */
 2688			t->name_off = 0;
 2689			t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
 2690		} else if (!has_enum64 && btf_is_enum(t)) {
 2691			/* clear the kflag */
 2692			t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
 2693		} else if (!has_enum64 && btf_is_enum64(t)) {
 2694			/* replace ENUM64 with a union */
 2695			struct btf_member *m;
 2696
 2697			if (enum64_placeholder_id == 0) {
 2698				enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
 2699				if (enum64_placeholder_id < 0)
 2700					return enum64_placeholder_id;
 2701
 2702				t = (struct btf_type *)btf__type_by_id(btf, i);
 2703			}
 2704
 2705			m = btf_members(t);
 2706			vlen = btf_vlen(t);
 2707			t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
 2708			for (j = 0; j < vlen; j++, m++) {
 2709				m->type = enum64_placeholder_id;
 2710				m->offset = 0;
 2711			}
 2712                }
 2713	}
 2714
 2715	return 0;
 2716}
 2717
 2718static bool libbpf_needs_btf(const struct bpf_object *obj)
 2719{
 2720	return obj->efile.btf_maps_shndx >= 0 ||
 2721	       obj->efile.st_ops_shndx >= 0 ||
 2722	       obj->nr_extern > 0;
 2723}
 2724
 2725static bool kernel_needs_btf(const struct bpf_object *obj)
 2726{
 2727	return obj->efile.st_ops_shndx >= 0;
 2728}
 2729
 2730static int bpf_object__init_btf(struct bpf_object *obj,
 2731				Elf_Data *btf_data,
 2732				Elf_Data *btf_ext_data)
 2733{
 2734	int err = -ENOENT;
 2735
 2736	if (btf_data) {
 2737		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
 2738		err = libbpf_get_error(obj->btf);
 2739		if (err) {
 2740			obj->btf = NULL;
 2741			pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
 2742			goto out;
 2743		}
 2744		/* enforce 8-byte pointers for BPF-targeted BTFs */
 2745		btf__set_pointer_size(obj->btf, 8);
 2746	}
 2747	if (btf_ext_data) {
 2748		struct btf_ext_info *ext_segs[3];
 2749		int seg_num, sec_num;
 2750
 2751		if (!obj->btf) {
 2752			pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
 2753				 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
 2754			goto out;
 2755		}
 2756		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
 2757		err = libbpf_get_error(obj->btf_ext);
 2758		if (err) {
 2759			pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
 2760				BTF_EXT_ELF_SEC, err);
 2761			obj->btf_ext = NULL;
 2762			goto out;
 2763		}
 2764
 2765		/* setup .BTF.ext to ELF section mapping */
 2766		ext_segs[0] = &obj->btf_ext->func_info;
 2767		ext_segs[1] = &obj->btf_ext->line_info;
 2768		ext_segs[2] = &obj->btf_ext->core_relo_info;
 2769		for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
 2770			struct btf_ext_info *seg = ext_segs[seg_num];
 2771			const struct btf_ext_info_sec *sec;
 2772			const char *sec_name;
 2773			Elf_Scn *scn;
 2774
 2775			if (seg->sec_cnt == 0)
 2776				continue;
 2777
 2778			seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
 2779			if (!seg->sec_idxs) {
 2780				err = -ENOMEM;
 2781				goto out;
 2782			}
 2783
 2784			sec_num = 0;
 2785			for_each_btf_ext_sec(seg, sec) {
 2786				/* preventively increment index to avoid doing
 2787				 * this before every continue below
 2788				 */
 2789				sec_num++;
 2790
 2791				sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 2792				if (str_is_empty(sec_name))
 2793					continue;
 2794				scn = elf_sec_by_name(obj, sec_name);
 2795				if (!scn)
 2796					continue;
 2797
 2798				seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
 2799			}
 2800		}
 2801	}
 2802out:
 2803	if (err && libbpf_needs_btf(obj)) {
 2804		pr_warn("BTF is required, but is missing or corrupted.\n");
 2805		return err;
 2806	}
 2807	return 0;
 2808}
 2809
 2810static int compare_vsi_off(const void *_a, const void *_b)
 2811{
 2812	const struct btf_var_secinfo *a = _a;
 2813	const struct btf_var_secinfo *b = _b;
 2814
 2815	return a->offset - b->offset;
 2816}
 2817
 2818static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 2819			     struct btf_type *t)
 2820{
 2821	__u32 size = 0, off = 0, i, vars = btf_vlen(t);
 2822	const char *name = btf__name_by_offset(btf, t->name_off);
 2823	const struct btf_type *t_var;
 2824	struct btf_var_secinfo *vsi;
 2825	const struct btf_var *var;
 2826	int ret;
 2827
 2828	if (!name) {
 2829		pr_debug("No name found in string section for DATASEC kind.\n");
 2830		return -ENOENT;
 2831	}
 2832
 2833	/* .extern datasec size and var offsets were set correctly during
 2834	 * extern collection step, so just skip straight to sorting variables
 2835	 */
 2836	if (t->size)
 2837		goto sort_vars;
 2838
 2839	ret = find_elf_sec_sz(obj, name, &size);
 2840	if (ret || !size) {
 2841		pr_debug("Invalid size for section %s: %u bytes\n", name, size);
 2842		return -ENOENT;
 2843	}
 2844
 2845	t->size = size;
 2846
 2847	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
 2848		t_var = btf__type_by_id(btf, vsi->type);
 2849		if (!t_var || !btf_is_var(t_var)) {
 2850			pr_debug("Non-VAR type seen in section %s\n", name);
 2851			return -EINVAL;
 2852		}
 2853
 2854		var = btf_var(t_var);
 2855		if (var->linkage == BTF_VAR_STATIC)
 2856			continue;
 2857
 2858		name = btf__name_by_offset(btf, t_var->name_off);
 2859		if (!name) {
 2860			pr_debug("No name found in string section for VAR kind\n");
 2861			return -ENOENT;
 2862		}
 2863
 2864		ret = find_elf_var_offset(obj, name, &off);
 2865		if (ret) {
 2866			pr_debug("No offset found in symbol table for VAR %s\n",
 2867				 name);
 2868			return -ENOENT;
 2869		}
 2870
 2871		vsi->offset = off;
 2872	}
 2873
 2874sort_vars:
 2875	qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
 2876	return 0;
 2877}
 2878
 2879static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
 2880{
 2881	int err = 0;
 2882	__u32 i, n = btf__type_cnt(btf);
 2883
 2884	for (i = 1; i < n; i++) {
 2885		struct btf_type *t = btf_type_by_id(btf, i);
 2886
 2887		/* Loader needs to fix up some of the things compiler
 2888		 * couldn't get its hands on while emitting BTF. This
 2889		 * is section size and global variable offset. We use
 2890		 * the info from the ELF itself for this purpose.
 2891		 */
 2892		if (btf_is_datasec(t)) {
 2893			err = btf_fixup_datasec(obj, btf, t);
 2894			if (err)
 2895				break;
 2896		}
 2897	}
 2898
 2899	return libbpf_err(err);
 2900}
 2901
 2902static int bpf_object__finalize_btf(struct bpf_object *obj)
 2903{
 2904	int err;
 2905
 2906	if (!obj->btf)
 2907		return 0;
 2908
 2909	err = btf_finalize_data(obj, obj->btf);
 2910	if (err) {
 2911		pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
 2912		return err;
 2913	}
 2914
 2915	return 0;
 2916}
 2917
 2918static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
 2919{
 2920	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
 2921	    prog->type == BPF_PROG_TYPE_LSM)
 2922		return true;
 2923
 2924	/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
 2925	 * also need vmlinux BTF
 2926	 */
 2927	if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
 2928		return true;
 2929
 2930	return false;
 2931}
 2932
 2933static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
 2934{
 2935	struct bpf_program *prog;
 2936	int i;
 2937
 2938	/* CO-RE relocations need kernel BTF, only when btf_custom_path
 2939	 * is not specified
 2940	 */
 2941	if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
 2942		return true;
 2943
 2944	/* Support for typed ksyms needs kernel BTF */
 2945	for (i = 0; i < obj->nr_extern; i++) {
 2946		const struct extern_desc *ext;
 2947
 2948		ext = &obj->externs[i];
 2949		if (ext->type == EXT_KSYM && ext->ksym.type_id)
 2950			return true;
 2951	}
 2952
 2953	bpf_object__for_each_program(prog, obj) {
 2954		if (!prog->autoload)
 2955			continue;
 2956		if (prog_needs_vmlinux_btf(prog))
 2957			return true;
 2958	}
 2959
 2960	return false;
 2961}
 2962
 2963static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
 2964{
 2965	int err;
 2966
 2967	/* btf_vmlinux could be loaded earlier */
 2968	if (obj->btf_vmlinux || obj->gen_loader)
 2969		return 0;
 2970
 2971	if (!force && !obj_needs_vmlinux_btf(obj))
 2972		return 0;
 2973
 2974	obj->btf_vmlinux = btf__load_vmlinux_btf();
 2975	err = libbpf_get_error(obj->btf_vmlinux);
 2976	if (err) {
 2977		pr_warn("Error loading vmlinux BTF: %d\n", err);
 2978		obj->btf_vmlinux = NULL;
 2979		return err;
 2980	}
 2981	return 0;
 2982}
 2983
 2984static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
 2985{
 2986	struct btf *kern_btf = obj->btf;
 2987	bool btf_mandatory, sanitize;
 2988	int i, err = 0;
 2989
 2990	if (!obj->btf)
 2991		return 0;
 2992
 2993	if (!kernel_supports(obj, FEAT_BTF)) {
 2994		if (kernel_needs_btf(obj)) {
 2995			err = -EOPNOTSUPP;
 2996			goto report;
 2997		}
 2998		pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
 2999		return 0;
 3000	}
 3001
 3002	/* Even though some subprogs are global/weak, user might prefer more
 3003	 * permissive BPF verification process that BPF verifier performs for
 3004	 * static functions, taking into account more context from the caller
 3005	 * functions. In such case, they need to mark such subprogs with
 3006	 * __attribute__((visibility("hidden"))) and libbpf will adjust
 3007	 * corresponding FUNC BTF type to be marked as static and trigger more
 3008	 * involved BPF verification process.
 3009	 */
 3010	for (i = 0; i < obj->nr_programs; i++) {
 3011		struct bpf_program *prog = &obj->programs[i];
 3012		struct btf_type *t;
 3013		const char *name;
 3014		int j, n;
 3015
 3016		if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
 3017			continue;
 3018
 3019		n = btf__type_cnt(obj->btf);
 3020		for (j = 1; j < n; j++) {
 3021			t = btf_type_by_id(obj->btf, j);
 3022			if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
 3023				continue;
 3024
 3025			name = btf__str_by_offset(obj->btf, t->name_off);
 3026			if (strcmp(name, prog->name) != 0)
 3027				continue;
 3028
 3029			t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
 3030			break;
 3031		}
 3032	}
 3033
 3034	sanitize = btf_needs_sanitization(obj);
 3035	if (sanitize) {
 3036		const void *raw_data;
 3037		__u32 sz;
 3038
 3039		/* clone BTF to sanitize a copy and leave the original intact */
 3040		raw_data = btf__raw_data(obj->btf, &sz);
 3041		kern_btf = btf__new(raw_data, sz);
 3042		err = libbpf_get_error(kern_btf);
 3043		if (err)
 3044			return err;
 3045
 3046		/* enforce 8-byte pointers for BPF-targeted BTFs */
 3047		btf__set_pointer_size(obj->btf, 8);
 3048		err = bpf_object__sanitize_btf(obj, kern_btf);
 3049		if (err)
 3050			return err;
 3051	}
 3052
 3053	if (obj->gen_loader) {
 3054		__u32 raw_size = 0;
 3055		const void *raw_data = btf__raw_data(kern_btf, &raw_size);
 3056
 3057		if (!raw_data)
 3058			return -ENOMEM;
 3059		bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
 3060		/* Pretend to have valid FD to pass various fd >= 0 checks.
 3061		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
 3062		 */
 3063		btf__set_fd(kern_btf, 0);
 3064	} else {
 3065		/* currently BPF_BTF_LOAD only supports log_level 1 */
 3066		err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
 3067					   obj->log_level ? 1 : 0);
 3068	}
 3069	if (sanitize) {
 3070		if (!err) {
 3071			/* move fd to libbpf's BTF */
 3072			btf__set_fd(obj->btf, btf__fd(kern_btf));
 3073			btf__set_fd(kern_btf, -1);
 3074		}
 3075		btf__free(kern_btf);
 3076	}
 3077report:
 3078	if (err) {
 3079		btf_mandatory = kernel_needs_btf(obj);
 3080		pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
 3081			btf_mandatory ? "BTF is mandatory, can't proceed."
 3082				      : "BTF is optional, ignoring.");
 3083		if (!btf_mandatory)
 3084			err = 0;
 3085	}
 3086	return err;
 3087}
 3088
 3089static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
 3090{
 3091	const char *name;
 3092
 3093	name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
 3094	if (!name) {
 3095		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
 3096			off, obj->path, elf_errmsg(-1));
 3097		return NULL;
 3098	}
 3099
 3100	return name;
 3101}
 3102
 3103static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
 3104{
 3105	const char *name;
 3106
 3107	name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
 3108	if (!name) {
 3109		pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
 3110			off, obj->path, elf_errmsg(-1));
 3111		return NULL;
 3112	}
 3113
 3114	return name;
 3115}
 3116
 3117static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
 3118{
 3119	Elf_Scn *scn;
 3120
 3121	scn = elf_getscn(obj->efile.elf, idx);
 3122	if (!scn) {
 3123		pr_warn("elf: failed to get section(%zu) from %s: %s\n",
 3124			idx, obj->path, elf_errmsg(-1));
 3125		return NULL;
 3126	}
 3127	return scn;
 3128}
 3129
 3130static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
 3131{
 3132	Elf_Scn *scn = NULL;
 3133	Elf *elf = obj->efile.elf;
 3134	const char *sec_name;
 3135
 3136	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3137		sec_name = elf_sec_name(obj, scn);
 3138		if (!sec_name)
 3139			return NULL;
 3140
 3141		if (strcmp(sec_name, name) != 0)
 3142			continue;
 3143
 3144		return scn;
 3145	}
 3146	return NULL;
 3147}
 3148
 3149static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
 3150{
 3151	Elf64_Shdr *shdr;
 3152
 3153	if (!scn)
 3154		return NULL;
 3155
 3156	shdr = elf64_getshdr(scn);
 3157	if (!shdr) {
 3158		pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
 3159			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
 3160		return NULL;
 3161	}
 3162
 3163	return shdr;
 3164}
 3165
 3166static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
 3167{
 3168	const char *name;
 3169	Elf64_Shdr *sh;
 3170
 3171	if (!scn)
 3172		return NULL;
 3173
 3174	sh = elf_sec_hdr(obj, scn);
 3175	if (!sh)
 3176		return NULL;
 3177
 3178	name = elf_sec_str(obj, sh->sh_name);
 3179	if (!name) {
 3180		pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
 3181			elf_ndxscn(scn), obj->path, elf_errmsg(-1));
 3182		return NULL;
 3183	}
 3184
 3185	return name;
 3186}
 3187
 3188static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
 3189{
 3190	Elf_Data *data;
 3191
 3192	if (!scn)
 3193		return NULL;
 3194
 3195	data = elf_getdata(scn, 0);
 3196	if (!data) {
 3197		pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
 3198			elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
 3199			obj->path, elf_errmsg(-1));
 3200		return NULL;
 3201	}
 3202
 3203	return data;
 3204}
 3205
 3206static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
 3207{
 3208	if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
 3209		return NULL;
 3210
 3211	return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
 3212}
 3213
 3214static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
 3215{
 3216	if (idx >= data->d_size / sizeof(Elf64_Rel))
 3217		return NULL;
 3218
 3219	return (Elf64_Rel *)data->d_buf + idx;
 3220}
 3221
 3222static bool is_sec_name_dwarf(const char *name)
 3223{
 3224	/* approximation, but the actual list is too long */
 3225	return str_has_pfx(name, ".debug_");
 3226}
 3227
 3228static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
 3229{
 3230	/* no special handling of .strtab */
 3231	if (hdr->sh_type == SHT_STRTAB)
 3232		return true;
 3233
 3234	/* ignore .llvm_addrsig section as well */
 3235	if (hdr->sh_type == SHT_LLVM_ADDRSIG)
 3236		return true;
 3237
 3238	/* no subprograms will lead to an empty .text section, ignore it */
 3239	if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
 3240	    strcmp(name, ".text") == 0)
 3241		return true;
 3242
 3243	/* DWARF sections */
 3244	if (is_sec_name_dwarf(name))
 3245		return true;
 3246
 3247	if (str_has_pfx(name, ".rel")) {
 3248		name += sizeof(".rel") - 1;
 3249		/* DWARF section relocations */
 3250		if (is_sec_name_dwarf(name))
 3251			return true;
 3252
 3253		/* .BTF and .BTF.ext don't need relocations */
 3254		if (strcmp(name, BTF_ELF_SEC) == 0 ||
 3255		    strcmp(name, BTF_EXT_ELF_SEC) == 0)
 3256			return true;
 3257	}
 3258
 3259	return false;
 3260}
 3261
 3262static int cmp_progs(const void *_a, const void *_b)
 3263{
 3264	const struct bpf_program *a = _a;
 3265	const struct bpf_program *b = _b;
 3266
 3267	if (a->sec_idx != b->sec_idx)
 3268		return a->sec_idx < b->sec_idx ? -1 : 1;
 3269
 3270	/* sec_insn_off can't be the same within the section */
 3271	return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
 3272}
 3273
 3274static int bpf_object__elf_collect(struct bpf_object *obj)
 3275{
 3276	struct elf_sec_desc *sec_desc;
 3277	Elf *elf = obj->efile.elf;
 3278	Elf_Data *btf_ext_data = NULL;
 3279	Elf_Data *btf_data = NULL;
 3280	int idx = 0, err = 0;
 3281	const char *name;
 3282	Elf_Data *data;
 3283	Elf_Scn *scn;
 3284	Elf64_Shdr *sh;
 3285
 3286	/* ELF section indices are 0-based, but sec #0 is special "invalid"
 3287	 * section. e_shnum does include sec #0, so e_shnum is the necessary
 3288	 * size of an array to keep all the sections.
 3289	 */
 3290	obj->efile.sec_cnt = obj->efile.ehdr->e_shnum;
 3291	obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
 3292	if (!obj->efile.secs)
 3293		return -ENOMEM;
 3294
 3295	/* a bunch of ELF parsing functionality depends on processing symbols,
 3296	 * so do the first pass and find the symbol table
 3297	 */
 3298	scn = NULL;
 3299	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3300		sh = elf_sec_hdr(obj, scn);
 3301		if (!sh)
 3302			return -LIBBPF_ERRNO__FORMAT;
 3303
 3304		if (sh->sh_type == SHT_SYMTAB) {
 3305			if (obj->efile.symbols) {
 3306				pr_warn("elf: multiple symbol tables in %s\n", obj->path);
 3307				return -LIBBPF_ERRNO__FORMAT;
 3308			}
 3309
 3310			data = elf_sec_data(obj, scn);
 3311			if (!data)
 3312				return -LIBBPF_ERRNO__FORMAT;
 3313
 3314			idx = elf_ndxscn(scn);
 3315
 3316			obj->efile.symbols = data;
 3317			obj->efile.symbols_shndx = idx;
 3318			obj->efile.strtabidx = sh->sh_link;
 3319		}
 3320	}
 3321
 3322	if (!obj->efile.symbols) {
 3323		pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
 3324			obj->path);
 3325		return -ENOENT;
 3326	}
 3327
 3328	scn = NULL;
 3329	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 3330		idx = elf_ndxscn(scn);
 3331		sec_desc = &obj->efile.secs[idx];
 3332
 3333		sh = elf_sec_hdr(obj, scn);
 3334		if (!sh)
 3335			return -LIBBPF_ERRNO__FORMAT;
 3336
 3337		name = elf_sec_str(obj, sh->sh_name);
 3338		if (!name)
 3339			return -LIBBPF_ERRNO__FORMAT;
 3340
 3341		if (ignore_elf_section(sh, name))
 3342			continue;
 3343
 3344		data = elf_sec_data(obj, scn);
 3345		if (!data)
 3346			return -LIBBPF_ERRNO__FORMAT;
 3347
 3348		pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
 3349			 idx, name, (unsigned long)data->d_size,
 3350			 (int)sh->sh_link, (unsigned long)sh->sh_flags,
 3351			 (int)sh->sh_type);
 3352
 3353		if (strcmp(name, "license") == 0) {
 3354			err = bpf_object__init_license(obj, data->d_buf, data->d_size);
 3355			if (err)
 3356				return err;
 3357		} else if (strcmp(name, "version") == 0) {
 3358			err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
 3359			if (err)
 3360				return err;
 3361		} else if (strcmp(name, "maps") == 0) {
 3362			obj->efile.maps_shndx = idx;
 3363		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
 3364			obj->efile.btf_maps_shndx = idx;
 3365		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
 3366			if (sh->sh_type != SHT_PROGBITS)
 3367				return -LIBBPF_ERRNO__FORMAT;
 3368			btf_data = data;
 3369		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
 3370			if (sh->sh_type != SHT_PROGBITS)
 3371				return -LIBBPF_ERRNO__FORMAT;
 3372			btf_ext_data = data;
 3373		} else if (sh->sh_type == SHT_SYMTAB) {
 3374			/* already processed during the first pass above */
 3375		} else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
 3376			if (sh->sh_flags & SHF_EXECINSTR) {
 3377				if (strcmp(name, ".text") == 0)
 3378					obj->efile.text_shndx = idx;
 3379				err = bpf_object__add_programs(obj, data, name, idx);
 3380				if (err)
 3381					return err;
 3382			} else if (strcmp(name, DATA_SEC) == 0 ||
 3383				   str_has_pfx(name, DATA_SEC ".")) {
 3384				sec_desc->sec_type = SEC_DATA;
 3385				sec_desc->shdr = sh;
 3386				sec_desc->data = data;
 3387			} else if (strcmp(name, RODATA_SEC) == 0 ||
 3388				   str_has_pfx(name, RODATA_SEC ".")) {
 3389				sec_desc->sec_type = SEC_RODATA;
 3390				sec_desc->shdr = sh;
 3391				sec_desc->data = data;
 3392			} else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
 3393				obj->efile.st_ops_data = data;
 3394				obj->efile.st_ops_shndx = idx;
 3395			} else {
 3396				pr_info("elf: skipping unrecognized data section(%d) %s\n",
 3397					idx, name);
 3398			}
 3399		} else if (sh->sh_type == SHT_REL) {
 3400			int targ_sec_idx = sh->sh_info; /* points to other section */
 3401
 3402			if (sh->sh_entsize != sizeof(Elf64_Rel) ||
 3403			    targ_sec_idx >= obj->efile.sec_cnt)
 3404				return -LIBBPF_ERRNO__FORMAT;
 3405
 3406			/* Only do relo for section with exec instructions */
 3407			if (!section_have_execinstr(obj, targ_sec_idx) &&
 3408			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
 3409			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
 3410				pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
 3411					idx, name, targ_sec_idx,
 3412					elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
 3413				continue;
 3414			}
 3415
 3416			sec_desc->sec_type = SEC_RELO;
 3417			sec_desc->shdr = sh;
 3418			sec_desc->data = data;
 3419		} else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
 3420			sec_desc->sec_type = SEC_BSS;
 3421			sec_desc->shdr = sh;
 3422			sec_desc->data = data;
 3423		} else {
 3424			pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
 3425				(size_t)sh->sh_size);
 3426		}
 3427	}
 3428
 3429	if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
 3430		pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
 3431		return -LIBBPF_ERRNO__FORMAT;
 3432	}
 3433
 3434	/* sort BPF programs by section name and in-section instruction offset
 3435	 * for faster search */
 3436	if (obj->nr_programs)
 3437		qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
 3438
 3439	return bpf_object__init_btf(obj, btf_data, btf_ext_data);
 3440}
 3441
 3442static bool sym_is_extern(const Elf64_Sym *sym)
 3443{
 3444	int bind = ELF64_ST_BIND(sym->st_info);
 3445	/* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
 3446	return sym->st_shndx == SHN_UNDEF &&
 3447	       (bind == STB_GLOBAL || bind == STB_WEAK) &&
 3448	       ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
 3449}
 3450
 3451static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
 3452{
 3453	int bind = ELF64_ST_BIND(sym->st_info);
 3454	int type = ELF64_ST_TYPE(sym->st_info);
 3455
 3456	/* in .text section */
 3457	if (sym->st_shndx != text_shndx)
 3458		return false;
 3459
 3460	/* local function */
 3461	if (bind == STB_LOCAL && type == STT_SECTION)
 3462		return true;
 3463
 3464	/* global function */
 3465	return bind == STB_GLOBAL && type == STT_FUNC;
 3466}
 3467
 3468static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
 3469{
 3470	const struct btf_type *t;
 3471	const char *tname;
 3472	int i, n;
 3473
 3474	if (!btf)
 3475		return -ESRCH;
 3476
 3477	n = btf__type_cnt(btf);
 3478	for (i = 1; i < n; i++) {
 3479		t = btf__type_by_id(btf, i);
 3480
 3481		if (!btf_is_var(t) && !btf_is_func(t))
 3482			continue;
 3483
 3484		tname = btf__name_by_offset(btf, t->name_off);
 3485		if (strcmp(tname, ext_name))
 3486			continue;
 3487
 3488		if (btf_is_var(t) &&
 3489		    btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
 3490			return -EINVAL;
 3491
 3492		if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
 3493			return -EINVAL;
 3494
 3495		return i;
 3496	}
 3497
 3498	return -ENOENT;
 3499}
 3500
 3501static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
 3502	const struct btf_var_secinfo *vs;
 3503	const struct btf_type *t;
 3504	int i, j, n;
 3505
 3506	if (!btf)
 3507		return -ESRCH;
 3508
 3509	n = btf__type_cnt(btf);
 3510	for (i = 1; i < n; i++) {
 3511		t = btf__type_by_id(btf, i);
 3512
 3513		if (!btf_is_datasec(t))
 3514			continue;
 3515
 3516		vs = btf_var_secinfos(t);
 3517		for (j = 0; j < btf_vlen(t); j++, vs++) {
 3518			if (vs->type == ext_btf_id)
 3519				return i;
 3520		}
 3521	}
 3522
 3523	return -ENOENT;
 3524}
 3525
 3526static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
 3527				     bool *is_signed)
 3528{
 3529	const struct btf_type *t;
 3530	const char *name;
 3531
 3532	t = skip_mods_and_typedefs(btf, id, NULL);
 3533	name = btf__name_by_offset(btf, t->name_off);
 3534
 3535	if (is_signed)
 3536		*is_signed = false;
 3537	switch (btf_kind(t)) {
 3538	case BTF_KIND_INT: {
 3539		int enc = btf_int_encoding(t);
 3540
 3541		if (enc & BTF_INT_BOOL)
 3542			return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
 3543		if (is_signed)
 3544			*is_signed = enc & BTF_INT_SIGNED;
 3545		if (t->size == 1)
 3546			return KCFG_CHAR;
 3547		if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
 3548			return KCFG_UNKNOWN;
 3549		return KCFG_INT;
 3550	}
 3551	case BTF_KIND_ENUM:
 3552		if (t->size != 4)
 3553			return KCFG_UNKNOWN;
 3554		if (strcmp(name, "libbpf_tristate"))
 3555			return KCFG_UNKNOWN;
 3556		return KCFG_TRISTATE;
 3557	case BTF_KIND_ENUM64:
 3558		if (strcmp(name, "libbpf_tristate"))
 3559			return KCFG_UNKNOWN;
 3560		return KCFG_TRISTATE;
 3561	case BTF_KIND_ARRAY:
 3562		if (btf_array(t)->nelems == 0)
 3563			return KCFG_UNKNOWN;
 3564		if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
 3565			return KCFG_UNKNOWN;
 3566		return KCFG_CHAR_ARR;
 3567	default:
 3568		return KCFG_UNKNOWN;
 3569	}
 3570}
 3571
 3572static int cmp_externs(const void *_a, const void *_b)
 3573{
 3574	const struct extern_desc *a = _a;
 3575	const struct extern_desc *b = _b;
 3576
 3577	if (a->type != b->type)
 3578		return a->type < b->type ? -1 : 1;
 3579
 3580	if (a->type == EXT_KCFG) {
 3581		/* descending order by alignment requirements */
 3582		if (a->kcfg.align != b->kcfg.align)
 3583			return a->kcfg.align > b->kcfg.align ? -1 : 1;
 3584		/* ascending order by size, within same alignment class */
 3585		if (a->kcfg.sz != b->kcfg.sz)
 3586			return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
 3587	}
 3588
 3589	/* resolve ties by name */
 3590	return strcmp(a->name, b->name);
 3591}
 3592
 3593static int find_int_btf_id(const struct btf *btf)
 3594{
 3595	const struct btf_type *t;
 3596	int i, n;
 3597
 3598	n = btf__type_cnt(btf);
 3599	for (i = 1; i < n; i++) {
 3600		t = btf__type_by_id(btf, i);
 3601
 3602		if (btf_is_int(t) && btf_int_bits(t) == 32)
 3603			return i;
 3604	}
 3605
 3606	return 0;
 3607}
 3608
 3609static int add_dummy_ksym_var(struct btf *btf)
 3610{
 3611	int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
 3612	const struct btf_var_secinfo *vs;
 3613	const struct btf_type *sec;
 3614
 3615	if (!btf)
 3616		return 0;
 3617
 3618	sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
 3619					    BTF_KIND_DATASEC);
 3620	if (sec_btf_id < 0)
 3621		return 0;
 3622
 3623	sec = btf__type_by_id(btf, sec_btf_id);
 3624	vs = btf_var_secinfos(sec);
 3625	for (i = 0; i < btf_vlen(sec); i++, vs++) {
 3626		const struct btf_type *vt;
 3627
 3628		vt = btf__type_by_id(btf, vs->type);
 3629		if (btf_is_func(vt))
 3630			break;
 3631	}
 3632
 3633	/* No func in ksyms sec.  No need to add dummy var. */
 3634	if (i == btf_vlen(sec))
 3635		return 0;
 3636
 3637	int_btf_id = find_int_btf_id(btf);
 3638	dummy_var_btf_id = btf__add_var(btf,
 3639					"dummy_ksym",
 3640					BTF_VAR_GLOBAL_ALLOCATED,
 3641					int_btf_id);
 3642	if (dummy_var_btf_id < 0)
 3643		pr_warn("cannot create a dummy_ksym var\n");
 3644
 3645	return dummy_var_btf_id;
 3646}
 3647
 3648static int bpf_object__collect_externs(struct bpf_object *obj)
 3649{
 3650	struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
 3651	const struct btf_type *t;
 3652	struct extern_desc *ext;
 3653	int i, n, off, dummy_var_btf_id;
 3654	const char *ext_name, *sec_name;
 3655	Elf_Scn *scn;
 3656	Elf64_Shdr *sh;
 3657
 3658	if (!obj->efile.symbols)
 3659		return 0;
 3660
 3661	scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
 3662	sh = elf_sec_hdr(obj, scn);
 3663	if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
 3664		return -LIBBPF_ERRNO__FORMAT;
 3665
 3666	dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
 3667	if (dummy_var_btf_id < 0)
 3668		return dummy_var_btf_id;
 3669
 3670	n = sh->sh_size / sh->sh_entsize;
 3671	pr_debug("looking for externs among %d symbols...\n", n);
 3672
 3673	for (i = 0; i < n; i++) {
 3674		Elf64_Sym *sym = elf_sym_by_idx(obj, i);
 3675
 3676		if (!sym)
 3677			return -LIBBPF_ERRNO__FORMAT;
 3678		if (!sym_is_extern(sym))
 3679			continue;
 3680		ext_name = elf_sym_str(obj, sym->st_name);
 3681		if (!ext_name || !ext_name[0])
 3682			continue;
 3683
 3684		ext = obj->externs;
 3685		ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
 3686		if (!ext)
 3687			return -ENOMEM;
 3688		obj->externs = ext;
 3689		ext = &ext[obj->nr_extern];
 3690		memset(ext, 0, sizeof(*ext));
 3691		obj->nr_extern++;
 3692
 3693		ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
 3694		if (ext->btf_id <= 0) {
 3695			pr_warn("failed to find BTF for extern '%s': %d\n",
 3696				ext_name, ext->btf_id);
 3697			return ext->btf_id;
 3698		}
 3699		t = btf__type_by_id(obj->btf, ext->btf_id);
 3700		ext->name = btf__name_by_offset(obj->btf, t->name_off);
 3701		ext->sym_idx = i;
 3702		ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
 3703
 3704		ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
 3705		if (ext->sec_btf_id <= 0) {
 3706			pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
 3707				ext_name, ext->btf_id, ext->sec_btf_id);
 3708			return ext->sec_btf_id;
 3709		}
 3710		sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
 3711		sec_name = btf__name_by_offset(obj->btf, sec->name_off);
 3712
 3713		if (strcmp(sec_name, KCONFIG_SEC) == 0) {
 3714			if (btf_is_func(t)) {
 3715				pr_warn("extern function %s is unsupported under %s section\n",
 3716					ext->name, KCONFIG_SEC);
 3717				return -ENOTSUP;
 3718			}
 3719			kcfg_sec = sec;
 3720			ext->type = EXT_KCFG;
 3721			ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
 3722			if (ext->kcfg.sz <= 0) {
 3723				pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
 3724					ext_name, ext->kcfg.sz);
 3725				return ext->kcfg.sz;
 3726			}
 3727			ext->kcfg.align = btf__align_of(obj->btf, t->type);
 3728			if (ext->kcfg.align <= 0) {
 3729				pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
 3730					ext_name, ext->kcfg.align);
 3731				return -EINVAL;
 3732			}
 3733			ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
 3734						        &ext->kcfg.is_signed);
 3735			if (ext->kcfg.type == KCFG_UNKNOWN) {
 3736				pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
 3737				return -ENOTSUP;
 3738			}
 3739		} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
 3740			ksym_sec = sec;
 3741			ext->type = EXT_KSYM;
 3742			skip_mods_and_typedefs(obj->btf, t->type,
 3743					       &ext->ksym.type_id);
 3744		} else {
 3745			pr_warn("unrecognized extern section '%s'\n", sec_name);
 3746			return -ENOTSUP;
 3747		}
 3748	}
 3749	pr_debug("collected %d externs total\n", obj->nr_extern);
 3750
 3751	if (!obj->nr_extern)
 3752		return 0;
 3753
 3754	/* sort externs by type, for kcfg ones also by (align, size, name) */
 3755	qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
 3756
 3757	/* for .ksyms section, we need to turn all externs into allocated
 3758	 * variables in BTF to pass kernel verification; we do this by
 3759	 * pretending that each extern is a 8-byte variable
 3760	 */
 3761	if (ksym_sec) {
 3762		/* find existing 4-byte integer type in BTF to use for fake
 3763		 * extern variables in DATASEC
 3764		 */
 3765		int int_btf_id = find_int_btf_id(obj->btf);
 3766		/* For extern function, a dummy_var added earlier
 3767		 * will be used to replace the vs->type and
 3768		 * its name string will be used to refill
 3769		 * the missing param's name.
 3770		 */
 3771		const struct btf_type *dummy_var;
 3772
 3773		dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
 3774		for (i = 0; i < obj->nr_extern; i++) {
 3775			ext = &obj->externs[i];
 3776			if (ext->type != EXT_KSYM)
 3777				continue;
 3778			pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
 3779				 i, ext->sym_idx, ext->name);
 3780		}
 3781
 3782		sec = ksym_sec;
 3783		n = btf_vlen(sec);
 3784		for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
 3785			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
 3786			struct btf_type *vt;
 3787
 3788			vt = (void *)btf__type_by_id(obj->btf, vs->type);
 3789			ext_name = btf__name_by_offset(obj->btf, vt->name_off);
 3790			ext = find_extern_by_name(obj, ext_name);
 3791			if (!ext) {
 3792				pr_warn("failed to find extern definition for BTF %s '%s'\n",
 3793					btf_kind_str(vt), ext_name);
 3794				return -ESRCH;
 3795			}
 3796			if (btf_is_func(vt)) {
 3797				const struct btf_type *func_proto;
 3798				struct btf_param *param;
 3799				int j;
 3800
 3801				func_proto = btf__type_by_id(obj->btf,
 3802							     vt->type);
 3803				param = btf_params(func_proto);
 3804				/* Reuse the dummy_var string if the
 3805				 * func proto does not have param name.
 3806				 */
 3807				for (j = 0; j < btf_vlen(func_proto); j++)
 3808					if (param[j].type && !param[j].name_off)
 3809						param[j].name_off =
 3810							dummy_var->name_off;
 3811				vs->type = dummy_var_btf_id;
 3812				vt->info &= ~0xffff;
 3813				vt->info |= BTF_FUNC_GLOBAL;
 3814			} else {
 3815				btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 3816				vt->type = int_btf_id;
 3817			}
 3818			vs->offset = off;
 3819			vs->size = sizeof(int);
 3820		}
 3821		sec->size = off;
 3822	}
 3823
 3824	if (kcfg_sec) {
 3825		sec = kcfg_sec;
 3826		/* for kcfg externs calculate their offsets within a .kconfig map */
 3827		off = 0;
 3828		for (i = 0; i < obj->nr_extern; i++) {
 3829			ext = &obj->externs[i];
 3830			if (ext->type != EXT_KCFG)
 3831				continue;
 3832
 3833			ext->kcfg.data_off = roundup(off, ext->kcfg.align);
 3834			off = ext->kcfg.data_off + ext->kcfg.sz;
 3835			pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
 3836				 i, ext->sym_idx, ext->kcfg.data_off, ext->name);
 3837		}
 3838		sec->size = off;
 3839		n = btf_vlen(sec);
 3840		for (i = 0; i < n; i++) {
 3841			struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
 3842
 3843			t = btf__type_by_id(obj->btf, vs->type);
 3844			ext_name = btf__name_by_offset(obj->btf, t->name_off);
 3845			ext = find_extern_by_name(obj, ext_name);
 3846			if (!ext) {
 3847				pr_warn("failed to find extern definition for BTF var '%s'\n",
 3848					ext_name);
 3849				return -ESRCH;
 3850			}
 3851			btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
 3852			vs->offset = ext->kcfg.data_off;
 3853		}
 3854	}
 3855	return 0;
 3856}
 3857
 3858static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
 3859{
 3860	return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
 3861}
 3862
 3863struct bpf_program *
 3864bpf_object__find_program_by_name(const struct bpf_object *obj,
 3865				 const char *name)
 3866{
 3867	struct bpf_program *prog;
 3868
 3869	bpf_object__for_each_program(prog, obj) {
 3870		if (prog_is_subprog(obj, prog))
 3871			continue;
 3872		if (!strcmp(prog->name, name))
 3873			return prog;
 3874	}
 3875	return errno = ENOENT, NULL;
 3876}
 3877
 3878static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 3879				      int shndx)
 3880{
 3881	switch (obj->efile.secs[shndx].sec_type) {
 3882	case SEC_BSS:
 3883	case SEC_DATA:
 3884	case SEC_RODATA:
 3885		return true;
 3886	default:
 3887		return false;
 3888	}
 3889}
 3890
 3891static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
 3892				      int shndx)
 3893{
 3894	return shndx == obj->efile.maps_shndx ||
 3895	       shndx == obj->efile.btf_maps_shndx;
 3896}
 3897
 3898static enum libbpf_map_type
 3899bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
 3900{
 3901	if (shndx == obj->efile.symbols_shndx)
 3902		return LIBBPF_MAP_KCONFIG;
 3903
 3904	switch (obj->efile.secs[shndx].sec_type) {
 3905	case SEC_BSS:
 3906		return LIBBPF_MAP_BSS;
 3907	case SEC_DATA:
 3908		return LIBBPF_MAP_DATA;
 3909	case SEC_RODATA:
 3910		return LIBBPF_MAP_RODATA;
 3911	default:
 3912		return LIBBPF_MAP_UNSPEC;
 3913	}
 3914}
 3915
 3916static int bpf_program__record_reloc(struct bpf_program *prog,
 3917				     struct reloc_desc *reloc_desc,
 3918				     __u32 insn_idx, const char *sym_name,
 3919				     const Elf64_Sym *sym, const Elf64_Rel *rel)
 3920{
 3921	struct bpf_insn *insn = &prog->insns[insn_idx];
 3922	size_t map_idx, nr_maps = prog->obj->nr_maps;
 3923	struct bpf_object *obj = prog->obj;
 3924	__u32 shdr_idx = sym->st_shndx;
 3925	enum libbpf_map_type type;
 3926	const char *sym_sec_name;
 3927	struct bpf_map *map;
 3928
 3929	if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
 3930		pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
 3931			prog->name, sym_name, insn_idx, insn->code);
 3932		return -LIBBPF_ERRNO__RELOC;
 3933	}
 3934
 3935	if (sym_is_extern(sym)) {
 3936		int sym_idx = ELF64_R_SYM(rel->r_info);
 3937		int i, n = obj->nr_extern;
 3938		struct extern_desc *ext;
 3939
 3940		for (i = 0; i < n; i++) {
 3941			ext = &obj->externs[i];
 3942			if (ext->sym_idx == sym_idx)
 3943				break;
 3944		}
 3945		if (i >= n) {
 3946			pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
 3947				prog->name, sym_name, sym_idx);
 3948			return -LIBBPF_ERRNO__RELOC;
 3949		}
 3950		pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
 3951			 prog->name, i, ext->name, ext->sym_idx, insn_idx);
 3952		if (insn->code == (BPF_JMP | BPF_CALL))
 3953			reloc_desc->type = RELO_EXTERN_FUNC;
 3954		else
 3955			reloc_desc->type = RELO_EXTERN_VAR;
 3956		reloc_desc->insn_idx = insn_idx;
 3957		reloc_desc->sym_off = i; /* sym_off stores extern index */
 3958		return 0;
 3959	}
 3960
 3961	/* sub-program call relocation */
 3962	if (is_call_insn(insn)) {
 3963		if (insn->src_reg != BPF_PSEUDO_CALL) {
 3964			pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
 3965			return -LIBBPF_ERRNO__RELOC;
 3966		}
 3967		/* text_shndx can be 0, if no default "main" program exists */
 3968		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
 3969			sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 3970			pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
 3971				prog->name, sym_name, sym_sec_name);
 3972			return -LIBBPF_ERRNO__RELOC;
 3973		}
 3974		if (sym->st_value % BPF_INSN_SZ) {
 3975			pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
 3976				prog->name, sym_name, (size_t)sym->st_value);
 3977			return -LIBBPF_ERRNO__RELOC;
 3978		}
 3979		reloc_desc->type = RELO_CALL;
 3980		reloc_desc->insn_idx = insn_idx;
 3981		reloc_desc->sym_off = sym->st_value;
 3982		return 0;
 3983	}
 3984
 3985	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
 3986		pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
 3987			prog->name, sym_name, shdr_idx);
 3988		return -LIBBPF_ERRNO__RELOC;
 3989	}
 3990
 3991	/* loading subprog addresses */
 3992	if (sym_is_subprog(sym, obj->efile.text_shndx)) {
 3993		/* global_func: sym->st_value = offset in the section, insn->imm = 0.
 3994		 * local_func: sym->st_value = 0, insn->imm = offset in the section.
 3995		 */
 3996		if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
 3997			pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
 3998				prog->name, sym_name, (size_t)sym->st_value, insn->imm);
 3999			return -LIBBPF_ERRNO__RELOC;
 4000		}
 4001
 4002		reloc_desc->type = RELO_SUBPROG_ADDR;
 4003		reloc_desc->insn_idx = insn_idx;
 4004		reloc_desc->sym_off = sym->st_value;
 4005		return 0;
 4006	}
 4007
 4008	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
 4009	sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
 4010
 4011	/* generic map reference relocation */
 4012	if (type == LIBBPF_MAP_UNSPEC) {
 4013		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
 4014			pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
 4015				prog->name, sym_name, sym_sec_name);
 4016			return -LIBBPF_ERRNO__RELOC;
 4017		}
 4018		for (map_idx = 0; map_idx < nr_maps; map_idx++) {
 4019			map = &obj->maps[map_idx];
 4020			if (map->libbpf_type != type ||
 4021			    map->sec_idx != sym->st_shndx ||
 4022			    map->sec_offset != sym->st_value)
 4023				continue;
 4024			pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
 4025				 prog->name, map_idx, map->name, map->sec_idx,
 4026				 map->sec_offset, insn_idx);
 4027			break;
 4028		}
 4029		if (map_idx >= nr_maps) {
 4030			pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
 4031				prog->name, sym_sec_name, (size_t)sym->st_value);
 4032			return -LIBBPF_ERRNO__RELOC;
 4033		}
 4034		reloc_desc->type = RELO_LD64;
 4035		reloc_desc->insn_idx = insn_idx;
 4036		reloc_desc->map_idx = map_idx;
 4037		reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
 4038		return 0;
 4039	}
 4040
 4041	/* global data map relocation */
 4042	if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
 4043		pr_warn("prog '%s': bad data relo against section '%s'\n",
 4044			prog->name, sym_sec_name);
 4045		return -LIBBPF_ERRNO__RELOC;
 4046	}
 4047	for (map_idx = 0; map_idx < nr_maps; map_idx++) {
 4048		map = &obj->maps[map_idx];
 4049		if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
 4050			continue;
 4051		pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
 4052			 prog->name, map_idx, map->name, map->sec_idx,
 4053			 map->sec_offset, insn_idx);
 4054		break;
 4055	}
 4056	if (map_idx >= nr_maps) {
 4057		pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
 4058			prog->name, sym_sec_name);
 4059		return -LIBBPF_ERRNO__RELOC;
 4060	}
 4061
 4062	reloc_desc->type = RELO_DATA;
 4063	reloc_desc->insn_idx = insn_idx;
 4064	reloc_desc->map_idx = map_idx;
 4065	reloc_desc->sym_off = sym->st_value;
 4066	return 0;
 4067}
 4068
 4069static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
 4070{
 4071	return insn_idx >= prog->sec_insn_off &&
 4072	       insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
 4073}
 4074
 4075static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
 4076						 size_t sec_idx, size_t insn_idx)
 4077{
 4078	int l = 0, r = obj->nr_programs - 1, m;
 4079	struct bpf_program *prog;
 4080
 4081	while (l < r) {
 4082		m = l + (r - l + 1) / 2;
 4083		prog = &obj->programs[m];
 4084
 4085		if (prog->sec_idx < sec_idx ||
 4086		    (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
 4087			l = m;
 4088		else
 4089			r = m - 1;
 4090	}
 4091	/* matching program could be at index l, but it still might be the
 4092	 * wrong one, so we need to double check conditions for the last time
 4093	 */
 4094	prog = &obj->programs[l];
 4095	if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
 4096		return prog;
 4097	return NULL;
 4098}
 4099
 4100static int
 4101bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
 4102{
 4103	const char *relo_sec_name, *sec_name;
 4104	size_t sec_idx = shdr->sh_info, sym_idx;
 4105	struct bpf_program *prog;
 4106	struct reloc_desc *relos;
 4107	int err, i, nrels;
 4108	const char *sym_name;
 4109	__u32 insn_idx;
 4110	Elf_Scn *scn;
 4111	Elf_Data *scn_data;
 4112	Elf64_Sym *sym;
 4113	Elf64_Rel *rel;
 4114
 4115	if (sec_idx >= obj->efile.sec_cnt)
 4116		return -EINVAL;
 4117
 4118	scn = elf_sec_by_idx(obj, sec_idx);
 4119	scn_data = elf_sec_data(obj, scn);
 4120
 4121	relo_sec_name = elf_sec_str(obj, shdr->sh_name);
 4122	sec_name = elf_sec_name(obj, scn);
 4123	if (!relo_sec_name || !sec_name)
 4124		return -EINVAL;
 4125
 4126	pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
 4127		 relo_sec_name, sec_idx, sec_name);
 4128	nrels = shdr->sh_size / shdr->sh_entsize;
 4129
 4130	for (i = 0; i < nrels; i++) {
 4131		rel = elf_rel_by_idx(data, i);
 4132		if (!rel) {
 4133			pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
 4134			return -LIBBPF_ERRNO__FORMAT;
 4135		}
 4136
 4137		sym_idx = ELF64_R_SYM(rel->r_info);
 4138		sym = elf_sym_by_idx(obj, sym_idx);
 4139		if (!sym) {
 4140			pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
 4141				relo_sec_name, sym_idx, i);
 4142			return -LIBBPF_ERRNO__FORMAT;
 4143		}
 4144
 4145		if (sym->st_shndx >= obj->efile.sec_cnt) {
 4146			pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
 4147				relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
 4148			return -LIBBPF_ERRNO__FORMAT;
 4149		}
 4150
 4151		if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
 4152			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
 4153				relo_sec_name, (size_t)rel->r_offset, i);
 4154			return -LIBBPF_ERRNO__FORMAT;
 4155		}
 4156
 4157		insn_idx = rel->r_offset / BPF_INSN_SZ;
 4158		/* relocations against static functions are recorded as
 4159		 * relocations against the section that contains a function;
 4160		 * in such case, symbol will be STT_SECTION and sym.st_name
 4161		 * will point to empty string (0), so fetch section name
 4162		 * instead
 4163		 */
 4164		if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
 4165			sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
 4166		else
 4167			sym_name = elf_sym_str(obj, sym->st_name);
 4168		sym_name = sym_name ?: "<?";
 4169
 4170		pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
 4171			 relo_sec_name, i, insn_idx, sym_name);
 4172
 4173		prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
 4174		if (!prog) {
 4175			pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
 4176				relo_sec_name, i, sec_name, insn_idx);
 4177			continue;
 4178		}
 4179
 4180		relos = libbpf_reallocarray(prog->reloc_desc,
 4181					    prog->nr_reloc + 1, sizeof(*relos));
 4182		if (!relos)
 4183			return -ENOMEM;
 4184		prog->reloc_desc = relos;
 4185
 4186		/* adjust insn_idx to local BPF program frame of reference */
 4187		insn_idx -= prog->sec_insn_off;
 4188		err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
 4189						insn_idx, sym_name, sym, rel);
 4190		if (err)
 4191			return err;
 4192
 4193		prog->nr_reloc++;
 4194	}
 4195	return 0;
 4196}
 4197
 4198static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
 4199{
 4200	int id;
 4201
 4202	if (!obj->btf)
 4203		return -ENOENT;
 4204
 4205	/* if it's BTF-defined map, we don't need to search for type IDs.
 4206	 * For struct_ops map, it does not need btf_key_type_id and
 4207	 * btf_value_type_id.
 4208	 */
 4209	if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
 4210		return 0;
 4211
 4212	/*
 4213	 * LLVM annotates global data differently in BTF, that is,
 4214	 * only as '.data', '.bss' or '.rodata'.
 4215	 */
 4216	if (!bpf_map__is_internal(map))
 4217		return -ENOENT;
 4218
 4219	id = btf__find_by_name(obj->btf, map->real_name);
 4220	if (id < 0)
 4221		return id;
 4222
 4223	map->btf_key_type_id = 0;
 4224	map->btf_value_type_id = id;
 4225	return 0;
 4226}
 4227
 4228static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
 4229{
 4230	char file[PATH_MAX], buff[4096];
 4231	FILE *fp;
 4232	__u32 val;
 4233	int err;
 4234
 4235	snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
 4236	memset(info, 0, sizeof(*info));
 4237
 4238	fp = fopen(file, "r");
 4239	if (!fp) {
 4240		err = -errno;
 4241		pr_warn("failed to open %s: %d. No procfs support?\n", file,
 4242			err);
 4243		return err;
 4244	}
 4245
 4246	while (fgets(buff, sizeof(buff), fp)) {
 4247		if (sscanf(buff, "map_type:\t%u", &val) == 1)
 4248			info->type = val;
 4249		else if (sscanf(buff, "key_size:\t%u", &val) == 1)
 4250			info->key_size = val;
 4251		else if (sscanf(buff, "value_size:\t%u", &val) == 1)
 4252			info->value_size = val;
 4253		else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
 4254			info->max_entries = val;
 4255		else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
 4256			info->map_flags = val;
 4257	}
 4258
 4259	fclose(fp);
 4260
 4261	return 0;
 4262}
 4263
 4264bool bpf_map__autocreate(const struct bpf_map *map)
 4265{
 4266	return map->autocreate;
 4267}
 4268
 4269int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
 4270{
 4271	if (map->obj->loaded)
 4272		return libbpf_err(-EBUSY);
 4273
 4274	map->autocreate = autocreate;
 4275	return 0;
 4276}
 4277
 4278int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 4279{
 4280	struct bpf_map_info info = {};
 4281	__u32 len = sizeof(info), name_len;
 4282	int new_fd, err;
 4283	char *new_name;
 4284
 4285	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 4286	if (err && errno == EINVAL)
 4287		err = bpf_get_map_info_from_fdinfo(fd, &info);
 4288	if (err)
 4289		return libbpf_err(err);
 4290
 4291	name_len = strlen(info.name);
 4292	if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
 4293		new_name = strdup(map->name);
 4294	else
 4295		new_name = strdup(info.name);
 4296
 4297	if (!new_name)
 4298		return libbpf_err(-errno);
 4299
 4300	new_fd = open("/", O_RDONLY | O_CLOEXEC);
 4301	if (new_fd < 0) {
 4302		err = -errno;
 4303		goto err_free_new_name;
 4304	}
 4305
 4306	new_fd = dup3(fd, new_fd, O_CLOEXEC);
 4307	if (new_fd < 0) {
 4308		err = -errno;
 4309		goto err_close_new_fd;
 4310	}
 4311
 4312	err = zclose(map->fd);
 4313	if (err) {
 4314		err = -errno;
 4315		goto err_close_new_fd;
 4316	}
 4317	free(map->name);
 4318
 4319	map->fd = new_fd;
 4320	map->name = new_name;
 4321	map->def.type = info.type;
 4322	map->def.key_size = info.key_size;
 4323	map->def.value_size = info.value_size;
 4324	map->def.max_entries = info.max_entries;
 4325	map->def.map_flags = info.map_flags;
 4326	map->btf_key_type_id = info.btf_key_type_id;
 4327	map->btf_value_type_id = info.btf_value_type_id;
 4328	map->reused = true;
 4329	map->map_extra = info.map_extra;
 4330
 4331	return 0;
 4332
 4333err_close_new_fd:
 4334	close(new_fd);
 4335err_free_new_name:
 4336	free(new_name);
 4337	return libbpf_err(err);
 4338}
 4339
 4340__u32 bpf_map__max_entries(const struct bpf_map *map)
 4341{
 4342	return map->def.max_entries;
 4343}
 4344
 4345struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 4346{
 4347	if (!bpf_map_type__is_map_in_map(map->def.type))
 4348		return errno = EINVAL, NULL;
 4349
 4350	return map->inner_map;
 4351}
 4352
 4353int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 4354{
 4355	if (map->obj->loaded)
 4356		return libbpf_err(-EBUSY);
 4357
 4358	map->def.max_entries = max_entries;
 4359
 4360	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
 4361	if (map->def.type == BPF_MAP_TYPE_RINGBUF)
 4362		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
 4363
 4364	return 0;
 4365}
 4366
 4367static int
 4368bpf_object__probe_loading(struct bpf_object *obj)
 4369{
 4370	char *cp, errmsg[STRERR_BUFSIZE];
 4371	struct bpf_insn insns[] = {
 4372		BPF_MOV64_IMM(BPF_REG_0, 0),
 4373		BPF_EXIT_INSN(),
 4374	};
 4375	int ret, insn_cnt = ARRAY_SIZE(insns);
 4376
 4377	if (obj->gen_loader)
 4378		return 0;
 4379
 4380	ret = bump_rlimit_memlock();
 4381	if (ret)
 4382		pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
 4383
 4384	/* make sure basic loading works */
 4385	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
 4386	if (ret < 0)
 4387		ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
 4388	if (ret < 0) {
 4389		ret = errno;
 4390		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4391		pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
 4392			"program. Make sure your kernel supports BPF "
 4393			"(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
 4394			"set to big enough value.\n", __func__, cp, ret);
 4395		return -ret;
 4396	}
 4397	close(ret);
 4398
 4399	return 0;
 4400}
 4401
 4402static int probe_fd(int fd)
 4403{
 4404	if (fd >= 0)
 4405		close(fd);
 4406	return fd >= 0;
 4407}
 4408
 4409static int probe_kern_prog_name(void)
 4410{
 4411	struct bpf_insn insns[] = {
 4412		BPF_MOV64_IMM(BPF_REG_0, 0),
 4413		BPF_EXIT_INSN(),
 4414	};
 4415	int ret, insn_cnt = ARRAY_SIZE(insns);
 4416
 4417	/* make sure loading with name works */
 4418	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL);
 4419	return probe_fd(ret);
 4420}
 4421
 4422static int probe_kern_global_data(void)
 4423{
 4424	char *cp, errmsg[STRERR_BUFSIZE];
 4425	struct bpf_insn insns[] = {
 4426		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
 4427		BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
 4428		BPF_MOV64_IMM(BPF_REG_0, 0),
 4429		BPF_EXIT_INSN(),
 4430	};
 4431	int ret, map, insn_cnt = ARRAY_SIZE(insns);
 4432
 4433	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
 4434	if (map < 0) {
 4435		ret = -errno;
 4436		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4437		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
 4438			__func__, cp, -ret);
 4439		return ret;
 4440	}
 4441
 4442	insns[0].imm = map;
 4443
 4444	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
 4445	close(map);
 4446	return probe_fd(ret);
 4447}
 4448
 4449static int probe_kern_btf(void)
 4450{
 4451	static const char strs[] = "\0int";
 4452	__u32 types[] = {
 4453		/* int */
 4454		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
 4455	};
 4456
 4457	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4458					     strs, sizeof(strs)));
 4459}
 4460
 4461static int probe_kern_btf_func(void)
 4462{
 4463	static const char strs[] = "\0int\0x\0a";
 4464	/* void x(int a) {} */
 4465	__u32 types[] = {
 4466		/* int */
 4467		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4468		/* FUNC_PROTO */                                /* [2] */
 4469		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
 4470		BTF_PARAM_ENC(7, 1),
 4471		/* FUNC x */                                    /* [3] */
 4472		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
 4473	};
 4474
 4475	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4476					     strs, sizeof(strs)));
 4477}
 4478
 4479static int probe_kern_btf_func_global(void)
 4480{
 4481	static const char strs[] = "\0int\0x\0a";
 4482	/* static void x(int a) {} */
 4483	__u32 types[] = {
 4484		/* int */
 4485		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4486		/* FUNC_PROTO */                                /* [2] */
 4487		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
 4488		BTF_PARAM_ENC(7, 1),
 4489		/* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
 4490		BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
 4491	};
 4492
 4493	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4494					     strs, sizeof(strs)));
 4495}
 4496
 4497static int probe_kern_btf_datasec(void)
 4498{
 4499	static const char strs[] = "\0x\0.data";
 4500	/* static int a; */
 4501	__u32 types[] = {
 4502		/* int */
 4503		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4504		/* VAR x */                                     /* [2] */
 4505		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
 4506		BTF_VAR_STATIC,
 4507		/* DATASEC val */                               /* [3] */
 4508		BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
 4509		BTF_VAR_SECINFO_ENC(2, 0, 4),
 4510	};
 4511
 4512	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4513					     strs, sizeof(strs)));
 4514}
 4515
 4516static int probe_kern_btf_float(void)
 4517{
 4518	static const char strs[] = "\0float";
 4519	__u32 types[] = {
 4520		/* float */
 4521		BTF_TYPE_FLOAT_ENC(1, 4),
 4522	};
 4523
 4524	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4525					     strs, sizeof(strs)));
 4526}
 4527
 4528static int probe_kern_btf_decl_tag(void)
 4529{
 4530	static const char strs[] = "\0tag";
 4531	__u32 types[] = {
 4532		/* int */
 4533		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
 4534		/* VAR x */                                     /* [2] */
 4535		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
 4536		BTF_VAR_STATIC,
 4537		/* attr */
 4538		BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
 4539	};
 4540
 4541	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4542					     strs, sizeof(strs)));
 4543}
 4544
 4545static int probe_kern_btf_type_tag(void)
 4546{
 4547	static const char strs[] = "\0tag";
 4548	__u32 types[] = {
 4549		/* int */
 4550		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
 4551		/* attr */
 4552		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */
 4553		/* ptr */
 4554		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */
 4555	};
 4556
 4557	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4558					     strs, sizeof(strs)));
 4559}
 4560
 4561static int probe_kern_array_mmap(void)
 4562{
 4563	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
 4564	int fd;
 4565
 4566	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts);
 4567	return probe_fd(fd);
 4568}
 4569
 4570static int probe_kern_exp_attach_type(void)
 4571{
 4572	LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
 4573	struct bpf_insn insns[] = {
 4574		BPF_MOV64_IMM(BPF_REG_0, 0),
 4575		BPF_EXIT_INSN(),
 4576	};
 4577	int fd, insn_cnt = ARRAY_SIZE(insns);
 4578
 4579	/* use any valid combination of program type and (optional)
 4580	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
 4581	 * to see if kernel supports expected_attach_type field for
 4582	 * BPF_PROG_LOAD command
 4583	 */
 4584	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
 4585	return probe_fd(fd);
 4586}
 4587
 4588static int probe_kern_probe_read_kernel(void)
 4589{
 4590	struct bpf_insn insns[] = {
 4591		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */
 4592		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */
 4593		BPF_MOV64_IMM(BPF_REG_2, 8),		/* r2 = 8 */
 4594		BPF_MOV64_IMM(BPF_REG_3, 0),		/* r3 = 0 */
 4595		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
 4596		BPF_EXIT_INSN(),
 4597	};
 4598	int fd, insn_cnt = ARRAY_SIZE(insns);
 4599
 4600	fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
 4601	return probe_fd(fd);
 4602}
 4603
 4604static int probe_prog_bind_map(void)
 4605{
 4606	char *cp, errmsg[STRERR_BUFSIZE];
 4607	struct bpf_insn insns[] = {
 4608		BPF_MOV64_IMM(BPF_REG_0, 0),
 4609		BPF_EXIT_INSN(),
 4610	};
 4611	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
 4612
 4613	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
 4614	if (map < 0) {
 4615		ret = -errno;
 4616		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
 4617		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
 4618			__func__, cp, -ret);
 4619		return ret;
 4620	}
 4621
 4622	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
 4623	if (prog < 0) {
 4624		close(map);
 4625		return 0;
 4626	}
 4627
 4628	ret = bpf_prog_bind_map(prog, map, NULL);
 4629
 4630	close(map);
 4631	close(prog);
 4632
 4633	return ret >= 0;
 4634}
 4635
 4636static int probe_module_btf(void)
 4637{
 4638	static const char strs[] = "\0int";
 4639	__u32 types[] = {
 4640		/* int */
 4641		BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
 4642	};
 4643	struct bpf_btf_info info;
 4644	__u32 len = sizeof(info);
 4645	char name[16];
 4646	int fd, err;
 4647
 4648	fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
 4649	if (fd < 0)
 4650		return 0; /* BTF not supported at all */
 4651
 4652	memset(&info, 0, sizeof(info));
 4653	info.name = ptr_to_u64(name);
 4654	info.name_len = sizeof(name);
 4655
 4656	/* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
 4657	 * kernel's module BTF support coincides with support for
 4658	 * name/name_len fields in struct bpf_btf_info.
 4659	 */
 4660	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 4661	close(fd);
 4662	return !err;
 4663}
 4664
 4665static int probe_perf_link(void)
 4666{
 4667	struct bpf_insn insns[] = {
 4668		BPF_MOV64_IMM(BPF_REG_0, 0),
 4669		BPF_EXIT_INSN(),
 4670	};
 4671	int prog_fd, link_fd, err;
 4672
 4673	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
 4674				insns, ARRAY_SIZE(insns), NULL);
 4675	if (prog_fd < 0)
 4676		return -errno;
 4677
 4678	/* use invalid perf_event FD to get EBADF, if link is supported;
 4679	 * otherwise EINVAL should be returned
 4680	 */
 4681	link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
 4682	err = -errno; /* close() can clobber errno */
 4683
 4684	if (link_fd >= 0)
 4685		close(link_fd);
 4686	close(prog_fd);
 4687
 4688	return link_fd < 0 && err == -EBADF;
 4689}
 4690
 4691static int probe_kern_bpf_cookie(void)
 4692{
 4693	struct bpf_insn insns[] = {
 4694		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
 4695		BPF_EXIT_INSN(),
 4696	};
 4697	int ret, insn_cnt = ARRAY_SIZE(insns);
 4698
 4699	ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
 4700	return probe_fd(ret);
 4701}
 4702
 4703static int probe_kern_btf_enum64(void)
 4704{
 4705	static const char strs[] = "\0enum64";
 4706	__u32 types[] = {
 4707		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
 4708	};
 4709
 4710	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
 4711					     strs, sizeof(strs)));
 4712}
 4713
 4714static int probe_kern_syscall_wrapper(void);
 4715
 4716enum kern_feature_result {
 4717	FEAT_UNKNOWN = 0,
 4718	FEAT_SUPPORTED = 1,
 4719	FEAT_MISSING = 2,
 4720};
 4721
 4722typedef int (*feature_probe_fn)(void);
 4723
 4724static struct kern_feature_desc {
 4725	const char *desc;
 4726	feature_probe_fn probe;
 4727	enum kern_feature_result res;
 4728} feature_probes[__FEAT_CNT] = {
 4729	[FEAT_PROG_NAME] = {
 4730		"BPF program name", probe_kern_prog_name,
 4731	},
 4732	[FEAT_GLOBAL_DATA] = {
 4733		"global variables", probe_kern_global_data,
 4734	},
 4735	[FEAT_BTF] = {
 4736		"minimal BTF", probe_kern_btf,
 4737	},
 4738	[FEAT_BTF_FUNC] = {
 4739		"BTF functions", probe_kern_btf_func,
 4740	},
 4741	[FEAT_BTF_GLOBAL_FUNC] = {
 4742		"BTF global function", probe_kern_btf_func_global,
 4743	},
 4744	[FEAT_BTF_DATASEC] = {
 4745		"BTF data section and variable", probe_kern_btf_datasec,
 4746	},
 4747	[FEAT_ARRAY_MMAP] = {
 4748		"ARRAY map mmap()", probe_kern_array_mmap,
 4749	},
 4750	[FEAT_EXP_ATTACH_TYPE] = {
 4751		"BPF_PROG_LOAD expected_attach_type attribute",
 4752		probe_kern_exp_attach_type,
 4753	},
 4754	[FEAT_PROBE_READ_KERN] = {
 4755		"bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
 4756	},
 4757	[FEAT_PROG_BIND_MAP] = {
 4758		"BPF_PROG_BIND_MAP support", probe_prog_bind_map,
 4759	},
 4760	[FEAT_MODULE_BTF] = {
 4761		"module BTF support", probe_module_btf,
 4762	},
 4763	[FEAT_BTF_FLOAT] = {
 4764		"BTF_KIND_FLOAT support", probe_kern_btf_float,
 4765	},
 4766	[FEAT_PERF_LINK] = {
 4767		"BPF perf link support", probe_perf_link,
 4768	},
 4769	[FEAT_BTF_DECL_TAG] = {
 4770		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
 4771	},
 4772	[FEAT_BTF_TYPE_TAG] = {
 4773		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
 4774	},
 4775	[FEAT_MEMCG_ACCOUNT] = {
 4776		"memcg-based memory accounting", probe_memcg_account,
 4777	},
 4778	[FEAT_BPF_COOKIE] = {
 4779		"BPF cookie support", probe_kern_bpf_cookie,
 4780	},
 4781	[FEAT_BTF_ENUM64] = {
 4782		"BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
 4783	},
 4784	[FEAT_SYSCALL_WRAPPER] = {
 4785		"Kernel using syscall wrapper", probe_kern_syscall_wrapper,
 4786	},
 4787};
 4788
 4789bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 4790{
 4791	struct kern_feature_desc *feat = &feature_probes[feat_id];
 4792	int ret;
 4793
 4794	if (obj && obj->gen_loader)
 4795		/* To generate loader program assume the latest kernel
 4796		 * to avoid doing extra prog_load, map_create syscalls.
 4797		 */
 4798		return true;
 4799
 4800	if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
 4801		ret = feat->probe();
 4802		if (ret > 0) {
 4803			WRITE_ONCE(feat->res, FEAT_SUPPORTED);
 4804		} else if (ret == 0) {
 4805			WRITE_ONCE(feat->res, FEAT_MISSING);
 4806		} else {
 4807			pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
 4808			WRITE_ONCE(feat->res, FEAT_MISSING);
 4809		}
 4810	}
 4811
 4812	return READ_ONCE(feat->res) == FEAT_SUPPORTED;
 4813}
 4814
 4815static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
 4816{
 4817	struct bpf_map_info map_info = {};
 4818	char msg[STRERR_BUFSIZE];
 4819	__u32 map_info_len;
 4820	int err;
 4821
 4822	map_info_len = sizeof(map_info);
 4823
 4824	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
 4825	if (err && errno == EINVAL)
 4826		err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
 4827	if (err) {
 4828		pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
 4829			libbpf_strerror_r(errno, msg, sizeof(msg)));
 4830		return false;
 4831	}
 4832
 4833	return (map_info.type == map->def.type &&
 4834		map_info.key_size == map->def.key_size &&
 4835		map_info.value_size == map->def.value_size &&
 4836		map_info.max_entries == map->def.max_entries &&
 4837		map_info.map_flags == map->def.map_flags &&
 4838		map_info.map_extra == map->map_extra);
 4839}
 4840
 4841static int
 4842bpf_object__reuse_map(struct bpf_map *map)
 4843{
 4844	char *cp, errmsg[STRERR_BUFSIZE];
 4845	int err, pin_fd;
 4846
 4847	pin_fd = bpf_obj_get(map->pin_path);
 4848	if (pin_fd < 0) {
 4849		err = -errno;
 4850		if (err == -ENOENT) {
 4851			pr_debug("found no pinned map to reuse at '%s'\n",
 4852				 map->pin_path);
 4853			return 0;
 4854		}
 4855
 4856		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 4857		pr_warn("couldn't retrieve pinned map '%s': %s\n",
 4858			map->pin_path, cp);
 4859		return err;
 4860	}
 4861
 4862	if (!map_is_reuse_compat(map, pin_fd)) {
 4863		pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
 4864			map->pin_path);
 4865		close(pin_fd);
 4866		return -EINVAL;
 4867	}
 4868
 4869	err = bpf_map__reuse_fd(map, pin_fd);
 4870	close(pin_fd);
 4871	if (err) {
 4872		return err;
 4873	}
 4874	map->pinned = true;
 4875	pr_debug("reused pinned map at '%s'\n", map->pin_path);
 4876
 4877	return 0;
 4878}
 4879
 4880static int
 4881bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 4882{
 4883	enum libbpf_map_type map_type = map->libbpf_type;
 4884	char *cp, errmsg[STRERR_BUFSIZE];
 4885	int err, zero = 0;
 4886
 4887	if (obj->gen_loader) {
 4888		bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
 4889					 map->mmaped, map->def.value_size);
 4890		if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
 4891			bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
 4892		return 0;
 4893	}
 4894	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
 4895	if (err) {
 4896		err = -errno;
 4897		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 4898		pr_warn("Error setting initial map(%s) contents: %s\n",
 4899			map->name, cp);
 4900		return err;
 4901	}
 4902
 4903	/* Freeze .rodata and .kconfig map as read-only from syscall side. */
 4904	if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
 4905		err = bpf_map_freeze(map->fd);
 4906		if (err) {
 4907			err = -errno;
 4908			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 4909			pr_warn("Error freezing map(%s) as read-only: %s\n",
 4910				map->name, cp);
 4911			return err;
 4912		}
 4913	}
 4914	return 0;
 4915}
 4916
 4917static void bpf_map__destroy(struct bpf_map *map);
 4918
 4919static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
 4920{
 4921	LIBBPF_OPTS(bpf_map_create_opts, create_attr);
 4922	struct bpf_map_def *def = &map->def;
 4923	const char *map_name = NULL;
 4924	int err = 0;
 4925
 4926	if (kernel_supports(obj, FEAT_PROG_NAME))
 4927		map_name = map->name;
 4928	create_attr.map_ifindex = map->map_ifindex;
 4929	create_attr.map_flags = def->map_flags;
 4930	create_attr.numa_node = map->numa_node;
 4931	create_attr.map_extra = map->map_extra;
 4932
 4933	if (bpf_map__is_struct_ops(map))
 4934		create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
 4935
 4936	if (obj->btf && btf__fd(obj->btf) >= 0) {
 4937		create_attr.btf_fd = btf__fd(obj->btf);
 4938		create_attr.btf_key_type_id = map->btf_key_type_id;
 4939		create_attr.btf_value_type_id = map->btf_value_type_id;
 4940	}
 4941
 4942	if (bpf_map_type__is_map_in_map(def->type)) {
 4943		if (map->inner_map) {
 4944			err = bpf_object__create_map(obj, map->inner_map, true);
 4945			if (err) {
 4946				pr_warn("map '%s': failed to create inner map: %d\n",
 4947					map->name, err);
 4948				return err;
 4949			}
 4950			map->inner_map_fd = bpf_map__fd(map->inner_map);
 4951		}
 4952		if (map->inner_map_fd >= 0)
 4953			create_attr.inner_map_fd = map->inner_map_fd;
 4954	}
 4955
 4956	switch (def->type) {
 4957	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 4958	case BPF_MAP_TYPE_CGROUP_ARRAY:
 4959	case BPF_MAP_TYPE_STACK_TRACE:
 4960	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 4961	case BPF_MAP_TYPE_HASH_OF_MAPS:
 4962	case BPF_MAP_TYPE_DEVMAP:
 4963	case BPF_MAP_TYPE_DEVMAP_HASH:
 4964	case BPF_MAP_TYPE_CPUMAP:
 4965	case BPF_MAP_TYPE_XSKMAP:
 4966	case BPF_MAP_TYPE_SOCKMAP:
 4967	case BPF_MAP_TYPE_SOCKHASH:
 4968	case BPF_MAP_TYPE_QUEUE:
 4969	case BPF_MAP_TYPE_STACK:
 4970		create_attr.btf_fd = 0;
 4971		create_attr.btf_key_type_id = 0;
 4972		create_attr.btf_value_type_id = 0;
 4973		map->btf_key_type_id = 0;
 4974		map->btf_value_type_id = 0;
 4975	default:
 4976		break;
 4977	}
 4978
 4979	if (obj->gen_loader) {
 4980		bpf_gen__map_create(obj->gen_loader, def->type, map_name,
 4981				    def->key_size, def->value_size, def->max_entries,
 4982				    &create_attr, is_inner ? -1 : map - obj->maps);
 4983		/* Pretend to have valid FD to pass various fd >= 0 checks.
 4984		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
 4985		 */
 4986		map->fd = 0;
 4987	} else {
 4988		map->fd = bpf_map_create(def->type, map_name,
 4989					 def->key_size, def->value_size,
 4990					 def->max_entries, &create_attr);
 4991	}
 4992	if (map->fd < 0 && (create_attr.btf_key_type_id ||
 4993			    create_attr.btf_value_type_id)) {
 4994		char *cp, errmsg[STRERR_BUFSIZE];
 4995
 4996		err = -errno;
 4997		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 4998		pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
 4999			map->name, cp, err);
 5000		create_attr.btf_fd = 0;
 5001		create_attr.btf_key_type_id = 0;
 5002		create_attr.btf_value_type_id = 0;
 5003		map->btf_key_type_id = 0;
 5004		map->btf_value_type_id = 0;
 5005		map->fd = bpf_map_create(def->type, map_name,
 5006					 def->key_size, def->value_size,
 5007					 def->max_entries, &create_attr);
 5008	}
 5009
 5010	err = map->fd < 0 ? -errno : 0;
 5011
 5012	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
 5013		if (obj->gen_loader)
 5014			map->inner_map->fd = -1;
 5015		bpf_map__destroy(map->inner_map);
 5016		zfree(&map->inner_map);
 5017	}
 5018
 5019	return err;
 5020}
 5021
 5022static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
 5023{
 5024	const struct bpf_map *targ_map;
 5025	unsigned int i;
 5026	int fd, err = 0;
 5027
 5028	for (i = 0; i < map->init_slots_sz; i++) {
 5029		if (!map->init_slots[i])
 5030			continue;
 5031
 5032		targ_map = map->init_slots[i];
 5033		fd = bpf_map__fd(targ_map);
 5034
 5035		if (obj->gen_loader) {
 5036			bpf_gen__populate_outer_map(obj->gen_loader,
 5037						    map - obj->maps, i,
 5038						    targ_map - obj->maps);
 5039		} else {
 5040			err = bpf_map_update_elem(map->fd, &i, &fd, 0);
 5041		}
 5042		if (err) {
 5043			err = -errno;
 5044			pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
 5045				map->name, i, targ_map->name, fd, err);
 5046			return err;
 5047		}
 5048		pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
 5049			 map->name, i, targ_map->name, fd);
 5050	}
 5051
 5052	zfree(&map->init_slots);
 5053	map->init_slots_sz = 0;
 5054
 5055	return 0;
 5056}
 5057
 5058static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
 5059{
 5060	const struct bpf_program *targ_prog;
 5061	unsigned int i;
 5062	int fd, err;
 5063
 5064	if (obj->gen_loader)
 5065		return -ENOTSUP;
 5066
 5067	for (i = 0; i < map->init_slots_sz; i++) {
 5068		if (!map->init_slots[i])
 5069			continue;
 5070
 5071		targ_prog = map->init_slots[i];
 5072		fd = bpf_program__fd(targ_prog);
 5073
 5074		err = bpf_map_update_elem(map->fd, &i, &fd, 0);
 5075		if (err) {
 5076			err = -errno;
 5077			pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
 5078				map->name, i, targ_prog->name, fd, err);
 5079			return err;
 5080		}
 5081		pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
 5082			 map->name, i, targ_prog->name, fd);
 5083	}
 5084
 5085	zfree(&map->init_slots);
 5086	map->init_slots_sz = 0;
 5087
 5088	return 0;
 5089}
 5090
 5091static int bpf_object_init_prog_arrays(struct bpf_object *obj)
 5092{
 5093	struct bpf_map *map;
 5094	int i, err;
 5095
 5096	for (i = 0; i < obj->nr_maps; i++) {
 5097		map = &obj->maps[i];
 5098
 5099		if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
 5100			continue;
 5101
 5102		err = init_prog_array_slots(obj, map);
 5103		if (err < 0) {
 5104			zclose(map->fd);
 5105			return err;
 5106		}
 5107	}
 5108	return 0;
 5109}
 5110
 5111static int map_set_def_max_entries(struct bpf_map *map)
 5112{
 5113	if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
 5114		int nr_cpus;
 5115
 5116		nr_cpus = libbpf_num_possible_cpus();
 5117		if (nr_cpus < 0) {
 5118			pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
 5119				map->name, nr_cpus);
 5120			return nr_cpus;
 5121		}
 5122		pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
 5123		map->def.max_entries = nr_cpus;
 5124	}
 5125
 5126	return 0;
 5127}
 5128
 5129static int
 5130bpf_object__create_maps(struct bpf_object *obj)
 5131{
 5132	struct bpf_map *map;
 5133	char *cp, errmsg[STRERR_BUFSIZE];
 5134	unsigned int i, j;
 5135	int err;
 5136	bool retried;
 5137
 5138	for (i = 0; i < obj->nr_maps; i++) {
 5139		map = &obj->maps[i];
 5140
 5141		/* To support old kernels, we skip creating global data maps
 5142		 * (.rodata, .data, .kconfig, etc); later on, during program
 5143		 * loading, if we detect that at least one of the to-be-loaded
 5144		 * programs is referencing any global data map, we'll error
 5145		 * out with program name and relocation index logged.
 5146		 * This approach allows to accommodate Clang emitting
 5147		 * unnecessary .rodata.str1.1 sections for string literals,
 5148		 * but also it allows to have CO-RE applications that use
 5149		 * global variables in some of BPF programs, but not others.
 5150		 * If those global variable-using programs are not loaded at
 5151		 * runtime due to bpf_program__set_autoload(prog, false),
 5152		 * bpf_object loading will succeed just fine even on old
 5153		 * kernels.
 5154		 */
 5155		if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
 5156			map->autocreate = false;
 5157
 5158		if (!map->autocreate) {
 5159			pr_debug("map '%s': skipped auto-creating...\n", map->name);
 5160			continue;
 5161		}
 5162
 5163		err = map_set_def_max_entries(map);
 5164		if (err)
 5165			goto err_out;
 5166
 5167		retried = false;
 5168retry:
 5169		if (map->pin_path) {
 5170			err = bpf_object__reuse_map(map);
 5171			if (err) {
 5172				pr_warn("map '%s': error reusing pinned map\n",
 5173					map->name);
 5174				goto err_out;
 5175			}
 5176			if (retried && map->fd < 0) {
 5177				pr_warn("map '%s': cannot find pinned map\n",
 5178					map->name);
 5179				err = -ENOENT;
 5180				goto err_out;
 5181			}
 5182		}
 5183
 5184		if (map->fd >= 0) {
 5185			pr_debug("map '%s': skipping creation (preset fd=%d)\n",
 5186				 map->name, map->fd);
 5187		} else {
 5188			err = bpf_object__create_map(obj, map, false);
 5189			if (err)
 5190				goto err_out;
 5191
 5192			pr_debug("map '%s': created successfully, fd=%d\n",
 5193				 map->name, map->fd);
 5194
 5195			if (bpf_map__is_internal(map)) {
 5196				err = bpf_object__populate_internal_map(obj, map);
 5197				if (err < 0) {
 5198					zclose(map->fd);
 5199					goto err_out;
 5200				}
 5201			}
 5202
 5203			if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
 5204				err = init_map_in_map_slots(obj, map);
 5205				if (err < 0) {
 5206					zclose(map->fd);
 5207					goto err_out;
 5208				}
 5209			}
 5210		}
 5211
 5212		if (map->pin_path && !map->pinned) {
 5213			err = bpf_map__pin(map, NULL);
 5214			if (err) {
 5215				zclose(map->fd);
 5216				if (!retried && err == -EEXIST) {
 5217					retried = true;
 5218					goto retry;
 5219				}
 5220				pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
 5221					map->name, map->pin_path, err);
 5222				goto err_out;
 5223			}
 5224		}
 5225	}
 5226
 5227	return 0;
 5228
 5229err_out:
 5230	cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 5231	pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
 5232	pr_perm_msg(err);
 5233	for (j = 0; j < i; j++)
 5234		zclose(obj->maps[j].fd);
 5235	return err;
 5236}
 5237
 5238static bool bpf_core_is_flavor_sep(const char *s)
 5239{
 5240	/* check X___Y name pattern, where X and Y are not underscores */
 5241	return s[0] != '_' &&				      /* X */
 5242	       s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
 5243	       s[4] != '_';				      /* Y */
 5244}
 5245
 5246/* Given 'some_struct_name___with_flavor' return the length of a name prefix
 5247 * before last triple underscore. Struct name part after last triple
 5248 * underscore is ignored by BPF CO-RE relocation during relocation matching.
 5249 */
 5250size_t bpf_core_essential_name_len(const char *name)
 5251{
 5252	size_t n = strlen(name);
 5253	int i;
 5254
 5255	for (i = n - 5; i >= 0; i--) {
 5256		if (bpf_core_is_flavor_sep(name + i))
 5257			return i + 1;
 5258	}
 5259	return n;
 5260}
 5261
 5262void bpf_core_free_cands(struct bpf_core_cand_list *cands)
 5263{
 5264	if (!cands)
 5265		return;
 5266
 5267	free(cands->cands);
 5268	free(cands);
 5269}
 5270
 5271int bpf_core_add_cands(struct bpf_core_cand *local_cand,
 5272		       size_t local_essent_len,
 5273		       const struct btf *targ_btf,
 5274		       const char *targ_btf_name,
 5275		       int targ_start_id,
 5276		       struct bpf_core_cand_list *cands)
 5277{
 5278	struct bpf_core_cand *new_cands, *cand;
 5279	const struct btf_type *t, *local_t;
 5280	const char *targ_name, *local_name;
 5281	size_t targ_essent_len;
 5282	int n, i;
 5283
 5284	local_t = btf__type_by_id(local_cand->btf, local_cand->id);
 5285	local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
 5286
 5287	n = btf__type_cnt(targ_btf);
 5288	for (i = targ_start_id; i < n; i++) {
 5289		t = btf__type_by_id(targ_btf, i);
 5290		if (!btf_kind_core_compat(t, local_t))
 5291			continue;
 5292
 5293		targ_name = btf__name_by_offset(targ_btf, t->name_off);
 5294		if (str_is_empty(targ_name))
 5295			continue;
 5296
 5297		targ_essent_len = bpf_core_essential_name_len(targ_name);
 5298		if (targ_essent_len != local_essent_len)
 5299			continue;
 5300
 5301		if (strncmp(local_name, targ_name, local_essent_len) != 0)
 5302			continue;
 5303
 5304		pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
 5305			 local_cand->id, btf_kind_str(local_t),
 5306			 local_name, i, btf_kind_str(t), targ_name,
 5307			 targ_btf_name);
 5308		new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
 5309					      sizeof(*cands->cands));
 5310		if (!new_cands)
 5311			return -ENOMEM;
 5312
 5313		cand = &new_cands[cands->len];
 5314		cand->btf = targ_btf;
 5315		cand->id = i;
 5316
 5317		cands->cands = new_cands;
 5318		cands->len++;
 5319	}
 5320	return 0;
 5321}
 5322
 5323static int load_module_btfs(struct bpf_object *obj)
 5324{
 5325	struct bpf_btf_info info;
 5326	struct module_btf *mod_btf;
 5327	struct btf *btf;
 5328	char name[64];
 5329	__u32 id = 0, len;
 5330	int err, fd;
 5331
 5332	if (obj->btf_modules_loaded)
 5333		return 0;
 5334
 5335	if (obj->gen_loader)
 5336		return 0;
 5337
 5338	/* don't do this again, even if we find no module BTFs */
 5339	obj->btf_modules_loaded = true;
 5340
 5341	/* kernel too old to support module BTFs */
 5342	if (!kernel_supports(obj, FEAT_MODULE_BTF))
 5343		return 0;
 5344
 5345	while (true) {
 5346		err = bpf_btf_get_next_id(id, &id);
 5347		if (err && errno == ENOENT)
 5348			return 0;
 5349		if (err) {
 5350			err = -errno;
 5351			pr_warn("failed to iterate BTF objects: %d\n", err);
 5352			return err;
 5353		}
 5354
 5355		fd = bpf_btf_get_fd_by_id(id);
 5356		if (fd < 0) {
 5357			if (errno == ENOENT)
 5358				continue; /* expected race: BTF was unloaded */
 5359			err = -errno;
 5360			pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
 5361			return err;
 5362		}
 5363
 5364		len = sizeof(info);
 5365		memset(&info, 0, sizeof(info));
 5366		info.name = ptr_to_u64(name);
 5367		info.name_len = sizeof(name);
 5368
 5369		err = bpf_obj_get_info_by_fd(fd, &info, &len);
 5370		if (err) {
 5371			err = -errno;
 5372			pr_warn("failed to get BTF object #%d info: %d\n", id, err);
 5373			goto err_out;
 5374		}
 5375
 5376		/* ignore non-module BTFs */
 5377		if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
 5378			close(fd);
 5379			continue;
 5380		}
 5381
 5382		btf = btf_get_from_fd(fd, obj->btf_vmlinux);
 5383		err = libbpf_get_error(btf);
 5384		if (err) {
 5385			pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
 5386				name, id, err);
 5387			goto err_out;
 5388		}
 5389
 5390		err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
 5391				        sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
 5392		if (err)
 5393			goto err_out;
 5394
 5395		mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
 5396
 5397		mod_btf->btf = btf;
 5398		mod_btf->id = id;
 5399		mod_btf->fd = fd;
 5400		mod_btf->name = strdup(name);
 5401		if (!mod_btf->name) {
 5402			err = -ENOMEM;
 5403			goto err_out;
 5404		}
 5405		continue;
 5406
 5407err_out:
 5408		close(fd);
 5409		return err;
 5410	}
 5411
 5412	return 0;
 5413}
 5414
 5415static struct bpf_core_cand_list *
 5416bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
 5417{
 5418	struct bpf_core_cand local_cand = {};
 5419	struct bpf_core_cand_list *cands;
 5420	const struct btf *main_btf;
 5421	const struct btf_type *local_t;
 5422	const char *local_name;
 5423	size_t local_essent_len;
 5424	int err, i;
 5425
 5426	local_cand.btf = local_btf;
 5427	local_cand.id = local_type_id;
 5428	local_t = btf__type_by_id(local_btf, local_type_id);
 5429	if (!local_t)
 5430		return ERR_PTR(-EINVAL);
 5431
 5432	local_name = btf__name_by_offset(local_btf, local_t->name_off);
 5433	if (str_is_empty(local_name))
 5434		return ERR_PTR(-EINVAL);
 5435	local_essent_len = bpf_core_essential_name_len(local_name);
 5436
 5437	cands = calloc(1, sizeof(*cands));
 5438	if (!cands)
 5439		return ERR_PTR(-ENOMEM);
 5440
 5441	/* Attempt to find target candidates in vmlinux BTF first */
 5442	main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
 5443	err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
 5444	if (err)
 5445		goto err_out;
 5446
 5447	/* if vmlinux BTF has any candidate, don't got for module BTFs */
 5448	if (cands->len)
 5449		return cands;
 5450
 5451	/* if vmlinux BTF was overridden, don't attempt to load module BTFs */
 5452	if (obj->btf_vmlinux_override)
 5453		return cands;
 5454
 5455	/* now look through module BTFs, trying to still find candidates */
 5456	err = load_module_btfs(obj);
 5457	if (err)
 5458		goto err_out;
 5459
 5460	for (i = 0; i < obj->btf_module_cnt; i++) {
 5461		err = bpf_core_add_cands(&local_cand, local_essent_len,
 5462					 obj->btf_modules[i].btf,
 5463					 obj->btf_modules[i].name,
 5464					 btf__type_cnt(obj->btf_vmlinux),
 5465					 cands);
 5466		if (err)
 5467			goto err_out;
 5468	}
 5469
 5470	return cands;
 5471err_out:
 5472	bpf_core_free_cands(cands);
 5473	return ERR_PTR(err);
 5474}
 5475
 5476/* Check local and target types for compatibility. This check is used for
 5477 * type-based CO-RE relocations and follow slightly different rules than
 5478 * field-based relocations. This function assumes that root types were already
 5479 * checked for name match. Beyond that initial root-level name check, names
 5480 * are completely ignored. Compatibility rules are as follows:
 5481 *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
 5482 *     kind should match for local and target types (i.e., STRUCT is not
 5483 *     compatible with UNION);
 5484 *   - for ENUMs, the size is ignored;
 5485 *   - for INT, size and signedness are ignored;
 5486 *   - for ARRAY, dimensionality is ignored, element types are checked for
 5487 *     compatibility recursively;
 5488 *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
 5489 *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
 5490 *   - FUNC_PROTOs are compatible if they have compatible signature: same
 5491 *     number of input args and compatible return and argument types.
 5492 * These rules are not set in stone and probably will be adjusted as we get
 5493 * more experience with using BPF CO-RE relocations.
 5494 */
 5495int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
 5496			      const struct btf *targ_btf, __u32 targ_id)
 5497{
 5498	return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
 5499}
 5500
 5501int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
 5502			 const struct btf *targ_btf, __u32 targ_id)
 5503{
 5504	return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
 5505}
 5506
 5507static size_t bpf_core_hash_fn(const void *key, void *ctx)
 5508{
 5509	return (size_t)key;
 5510}
 5511
 5512static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
 5513{
 5514	return k1 == k2;
 5515}
 5516
 5517static void *u32_as_hash_key(__u32 x)
 5518{
 5519	return (void *)(uintptr_t)x;
 5520}
 5521
 5522static int record_relo_core(struct bpf_program *prog,
 5523			    const struct bpf_core_relo *core_relo, int insn_idx)
 5524{
 5525	struct reloc_desc *relos, *relo;
 5526
 5527	relos = libbpf_reallocarray(prog->reloc_desc,
 5528				    prog->nr_reloc + 1, sizeof(*relos));
 5529	if (!relos)
 5530		return -ENOMEM;
 5531	relo = &relos[prog->nr_reloc];
 5532	relo->type = RELO_CORE;
 5533	relo->insn_idx = insn_idx;
 5534	relo->core_relo = core_relo;
 5535	prog->reloc_desc = relos;
 5536	prog->nr_reloc++;
 5537	return 0;
 5538}
 5539
 5540static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
 5541{
 5542	struct reloc_desc *relo;
 5543	int i;
 5544
 5545	for (i = 0; i < prog->nr_reloc; i++) {
 5546		relo = &prog->reloc_desc[i];
 5547		if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
 5548			continue;
 5549
 5550		return relo->core_relo;
 5551	}
 5552
 5553	return NULL;
 5554}
 5555
 5556static int bpf_core_resolve_relo(struct bpf_program *prog,
 5557				 const struct bpf_core_relo *relo,
 5558				 int relo_idx,
 5559				 const struct btf *local_btf,
 5560				 struct hashmap *cand_cache,
 5561				 struct bpf_core_relo_res *targ_res)
 5562{
 5563	struct bpf_core_spec specs_scratch[3] = {};
 5564	const void *type_key = u32_as_hash_key(relo->type_id);
 5565	struct bpf_core_cand_list *cands = NULL;
 5566	const char *prog_name = prog->name;
 5567	const struct btf_type *local_type;
 5568	const char *local_name;
 5569	__u32 local_id = relo->type_id;
 5570	int err;
 5571
 5572	local_type = btf__type_by_id(local_btf, local_id);
 5573	if (!local_type)
 5574		return -EINVAL;
 5575
 5576	local_name = btf__name_by_offset(local_btf, local_type->name_off);
 5577	if (!local_name)
 5578		return -EINVAL;
 5579
 5580	if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
 5581	    !hashmap__find(cand_cache, type_key, (void **)&cands)) {
 5582		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
 5583		if (IS_ERR(cands)) {
 5584			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
 5585				prog_name, relo_idx, local_id, btf_kind_str(local_type),
 5586				local_name, PTR_ERR(cands));
 5587			return PTR_ERR(cands);
 5588		}
 5589		err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
 5590		if (err) {
 5591			bpf_core_free_cands(cands);
 5592			return err;
 5593		}
 5594	}
 5595
 5596	return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
 5597				       targ_res);
 5598}
 5599
 5600static int
 5601bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 5602{
 5603	const struct btf_ext_info_sec *sec;
 5604	struct bpf_core_relo_res targ_res;
 5605	const struct bpf_core_relo *rec;
 5606	const struct btf_ext_info *seg;
 5607	struct hashmap_entry *entry;
 5608	struct hashmap *cand_cache = NULL;
 5609	struct bpf_program *prog;
 5610	struct bpf_insn *insn;
 5611	const char *sec_name;
 5612	int i, err = 0, insn_idx, sec_idx, sec_num;
 5613
 5614	if (obj->btf_ext->core_relo_info.len == 0)
 5615		return 0;
 5616
 5617	if (targ_btf_path) {
 5618		obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
 5619		err = libbpf_get_error(obj->btf_vmlinux_override);
 5620		if (err) {
 5621			pr_warn("failed to parse target BTF: %d\n", err);
 5622			return err;
 5623		}
 5624	}
 5625
 5626	cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
 5627	if (IS_ERR(cand_cache)) {
 5628		err = PTR_ERR(cand_cache);
 5629		goto out;
 5630	}
 5631
 5632	seg = &obj->btf_ext->core_relo_info;
 5633	sec_num = 0;
 5634	for_each_btf_ext_sec(seg, sec) {
 5635		sec_idx = seg->sec_idxs[sec_num];
 5636		sec_num++;
 5637
 5638		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
 5639		if (str_is_empty(sec_name)) {
 5640			err = -EINVAL;
 5641			goto out;
 5642		}
 5643
 5644		pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
 5645
 5646		for_each_btf_ext_rec(seg, sec, i, rec) {
 5647			if (rec->insn_off % BPF_INSN_SZ)
 5648				return -EINVAL;
 5649			insn_idx = rec->insn_off / BPF_INSN_SZ;
 5650			prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
 5651			if (!prog) {
 5652				/* When __weak subprog is "overridden" by another instance
 5653				 * of the subprog from a different object file, linker still
 5654				 * appends all the .BTF.ext info that used to belong to that
 5655				 * eliminated subprogram.
 5656				 * This is similar to what x86-64 linker does for relocations.
 5657				 * So just ignore such relocations just like we ignore
 5658				 * subprog instructions when discovering subprograms.
 5659				 */
 5660				pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
 5661					 sec_name, i, insn_idx);
 5662				continue;
 5663			}
 5664			/* no need to apply CO-RE relocation if the program is
 5665			 * not going to be loaded
 5666			 */
 5667			if (!prog->autoload)
 5668				continue;
 5669
 5670			/* adjust insn_idx from section frame of reference to the local
 5671			 * program's frame of reference; (sub-)program code is not yet
 5672			 * relocated, so it's enough to just subtract in-section offset
 5673			 */
 5674			insn_idx = insn_idx - prog->sec_insn_off;
 5675			if (insn_idx >= prog->insns_cnt)
 5676				return -EINVAL;
 5677			insn = &prog->insns[insn_idx];
 5678
 5679			err = record_relo_core(prog, rec, insn_idx);
 5680			if (err) {
 5681				pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
 5682					prog->name, i, err);
 5683				goto out;
 5684			}
 5685
 5686			if (prog->obj->gen_loader)
 5687				continue;
 5688
 5689			err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
 5690			if (err) {
 5691				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
 5692					prog->name, i, err);
 5693				goto out;
 5694			}
 5695
 5696			err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
 5697			if (err) {
 5698				pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
 5699					prog->name, i, insn_idx, err);
 5700				goto out;
 5701			}
 5702		}
 5703	}
 5704
 5705out:
 5706	/* obj->btf_vmlinux and module BTFs are freed after object load */
 5707	btf__free(obj->btf_vmlinux_override);
 5708	obj->btf_vmlinux_override = NULL;
 5709
 5710	if (!IS_ERR_OR_NULL(cand_cache)) {
 5711		hashmap__for_each_entry(cand_cache, entry, i) {
 5712			bpf_core_free_cands(entry->value);
 5713		}
 5714		hashmap__free(cand_cache);
 5715	}
 5716	return err;
 5717}
 5718
 5719/* base map load ldimm64 special constant, used also for log fixup logic */
 5720#define MAP_LDIMM64_POISON_BASE 2001000000
 5721#define MAP_LDIMM64_POISON_PFX "200100"
 5722
 5723static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
 5724			       int insn_idx, struct bpf_insn *insn,
 5725			       int map_idx, const struct bpf_map *map)
 5726{
 5727	int i;
 5728
 5729	pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
 5730		 prog->name, relo_idx, insn_idx, map_idx, map->name);
 5731
 5732	/* we turn single ldimm64 into two identical invalid calls */
 5733	for (i = 0; i < 2; i++) {
 5734		insn->code = BPF_JMP | BPF_CALL;
 5735		insn->dst_reg = 0;
 5736		insn->src_reg = 0;
 5737		insn->off = 0;
 5738		/* if this instruction is reachable (not a dead code),
 5739		 * verifier will complain with something like:
 5740		 * invalid func unknown#2001000123
 5741		 * where lower 123 is map index into obj->maps[] array
 5742		 */
 5743		insn->imm = MAP_LDIMM64_POISON_BASE + map_idx;
 5744
 5745		insn++;
 5746	}
 5747}
 5748
 5749/* Relocate data references within program code:
 5750 *  - map references;
 5751 *  - global variable references;
 5752 *  - extern references.
 5753 */
 5754static int
 5755bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
 5756{
 5757	int i;
 5758
 5759	for (i = 0; i < prog->nr_reloc; i++) {
 5760		struct reloc_desc *relo = &prog->reloc_desc[i];
 5761		struct bpf_insn *insn = &prog->insns[relo->insn_idx];
 5762		const struct bpf_map *map;
 5763		struct extern_desc *ext;
 5764
 5765		switch (relo->type) {
 5766		case RELO_LD64:
 5767			map = &obj->maps[relo->map_idx];
 5768			if (obj->gen_loader) {
 5769				insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
 5770				insn[0].imm = relo->map_idx;
 5771			} else if (map->autocreate) {
 5772				insn[0].src_reg = BPF_PSEUDO_MAP_FD;
 5773				insn[0].imm = map->fd;
 5774			} else {
 5775				poison_map_ldimm64(prog, i, relo->insn_idx, insn,
 5776						   relo->map_idx, map);
 5777			}
 5778			break;
 5779		case RELO_DATA:
 5780			map = &obj->maps[relo->map_idx];
 5781			insn[1].imm = insn[0].imm + relo->sym_off;
 5782			if (obj->gen_loader) {
 5783				insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
 5784				insn[0].imm = relo->map_idx;
 5785			} else if (map->autocreate) {
 5786				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 5787				insn[0].imm = map->fd;
 5788			} else {
 5789				poison_map_ldimm64(prog, i, relo->insn_idx, insn,
 5790						   relo->map_idx, map);
 5791			}
 5792			break;
 5793		case RELO_EXTERN_VAR:
 5794			ext = &obj->externs[relo->sym_off];
 5795			if (ext->type == EXT_KCFG) {
 5796				if (obj->gen_loader) {
 5797					insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
 5798					insn[0].imm = obj->kconfig_map_idx;
 5799				} else {
 5800					insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
 5801					insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
 5802				}
 5803				insn[1].imm = ext->kcfg.data_off;
 5804			} else /* EXT_KSYM */ {
 5805				if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
 5806					insn[0].src_reg = BPF_PSEUDO_BTF_ID;
 5807					insn[0].imm = ext->ksym.kernel_btf_id;
 5808					insn[1].imm = ext->ksym.kernel_btf_obj_fd;
 5809				} else { /* typeless ksyms or unresolved typed ksyms */
 5810					insn[0].imm = (__u32)ext->ksym.addr;
 5811					insn[1].imm = ext->ksym.addr >> 32;
 5812				}
 5813			}
 5814			break;
 5815		case RELO_EXTERN_FUNC:
 5816			ext = &obj->externs[relo->sym_off];
 5817			insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
 5818			if (ext->is_set) {
 5819				insn[0].imm = ext->ksym.kernel_btf_id;
 5820				insn[0].off = ext->ksym.btf_fd_idx;
 5821			} else { /* unresolved weak kfunc */
 5822				insn[0].imm = 0;
 5823				insn[0].off = 0;
 5824			}
 5825			break;
 5826		case RELO_SUBPROG_ADDR:
 5827			if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
 5828				pr_warn("prog '%s': relo #%d: bad insn\n",
 5829					prog->name, i);
 5830				return -EINVAL;
 5831			}
 5832			/* handled already */
 5833			break;
 5834		case RELO_CALL:
 5835			/* handled already */
 5836			break;
 5837		case RELO_CORE:
 5838			/* will be handled by bpf_program_record_relos() */
 5839			break;
 5840		default:
 5841			pr_warn("prog '%s': relo #%d: bad relo type %d\n",
 5842				prog->name, i, relo->type);
 5843			return -EINVAL;
 5844		}
 5845	}
 5846
 5847	return 0;
 5848}
 5849
 5850static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
 5851				    const struct bpf_program *prog,
 5852				    const struct btf_ext_info *ext_info,
 5853				    void **prog_info, __u32 *prog_rec_cnt,
 5854				    __u32 *prog_rec_sz)
 5855{
 5856	void *copy_start = NULL, *copy_end = NULL;
 5857	void *rec, *rec_end, *new_prog_info;
 5858	const struct btf_ext_info_sec *sec;
 5859	size_t old_sz, new_sz;
 5860	int i, sec_num, sec_idx, off_adj;
 5861
 5862	sec_num = 0;
 5863	for_each_btf_ext_sec(ext_info, sec) {
 5864		sec_idx = ext_info->sec_idxs[sec_num];
 5865		sec_num++;
 5866		if (prog->sec_idx != sec_idx)
 5867			continue;
 5868
 5869		for_each_btf_ext_rec(ext_info, sec, i, rec) {
 5870			__u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
 5871
 5872			if (insn_off < prog->sec_insn_off)
 5873				continue;
 5874			if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
 5875				break;
 5876
 5877			if (!copy_start)
 5878				copy_start = rec;
 5879			copy_end = rec + ext_info->rec_size;
 5880		}
 5881
 5882		if (!copy_start)
 5883			return -ENOENT;
 5884
 5885		/* append func/line info of a given (sub-)program to the main
 5886		 * program func/line info
 5887		 */
 5888		old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
 5889		new_sz = old_sz + (copy_end - copy_start);
 5890		new_prog_info = realloc(*prog_info, new_sz);
 5891		if (!new_prog_info)
 5892			return -ENOMEM;
 5893		*prog_info = new_prog_info;
 5894		*prog_rec_cnt = new_sz / ext_info->rec_size;
 5895		memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
 5896
 5897		/* Kernel instruction offsets are in units of 8-byte
 5898		 * instructions, while .BTF.ext instruction offsets generated
 5899		 * by Clang are in units of bytes. So convert Clang offsets
 5900		 * into kernel offsets and adjust offset according to program
 5901		 * relocated position.
 5902		 */
 5903		off_adj = prog->sub_insn_off - prog->sec_insn_off;
 5904		rec = new_prog_info + old_sz;
 5905		rec_end = new_prog_info + new_sz;
 5906		for (; rec < rec_end; rec += ext_info->rec_size) {
 5907			__u32 *insn_off = rec;
 5908
 5909			*insn_off = *insn_off / BPF_INSN_SZ + off_adj;
 5910		}
 5911		*prog_rec_sz = ext_info->rec_size;
 5912		return 0;
 5913	}
 5914
 5915	return -ENOENT;
 5916}
 5917
 5918static int
 5919reloc_prog_func_and_line_info(const struct bpf_object *obj,
 5920			      struct bpf_program *main_prog,
 5921			      const struct bpf_program *prog)
 5922{
 5923	int err;
 5924
 5925	/* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
 5926	 * supprot func/line info
 5927	 */
 5928	if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
 5929		return 0;
 5930
 5931	/* only attempt func info relocation if main program's func_info
 5932	 * relocation was successful
 5933	 */
 5934	if (main_prog != prog && !main_prog->func_info)
 5935		goto line_info;
 5936
 5937	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
 5938				       &main_prog->func_info,
 5939				       &main_prog->func_info_cnt,
 5940				       &main_prog->func_info_rec_size);
 5941	if (err) {
 5942		if (err != -ENOENT) {
 5943			pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
 5944				prog->name, err);
 5945			return err;
 5946		}
 5947		if (main_prog->func_info) {
 5948			/*
 5949			 * Some info has already been found but has problem
 5950			 * in the last btf_ext reloc. Must have to error out.
 5951			 */
 5952			pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
 5953			return err;
 5954		}
 5955		/* Have problem loading the very first info. Ignore the rest. */
 5956		pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
 5957			prog->name);
 5958	}
 5959
 5960line_info:
 5961	/* don't relocate line info if main program's relocation failed */
 5962	if (main_prog != prog && !main_prog->line_info)
 5963		return 0;
 5964
 5965	err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
 5966				       &main_prog->line_info,
 5967				       &main_prog->line_info_cnt,
 5968				       &main_prog->line_info_rec_size);
 5969	if (err) {
 5970		if (err != -ENOENT) {
 5971			pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
 5972				prog->name, err);
 5973			return err;
 5974		}
 5975		if (main_prog->line_info) {
 5976			/*
 5977			 * Some info has already been found but has problem
 5978			 * in the last btf_ext reloc. Must have to error out.
 5979			 */
 5980			pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
 5981			return err;
 5982		}
 5983		/* Have problem loading the very first info. Ignore the rest. */
 5984		pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
 5985			prog->name);
 5986	}
 5987	return 0;
 5988}
 5989
 5990static int cmp_relo_by_insn_idx(const void *key, const void *elem)
 5991{
 5992	size_t insn_idx = *(const size_t *)key;
 5993	const struct reloc_desc *relo = elem;
 5994
 5995	if (insn_idx == relo->insn_idx)
 5996		return 0;
 5997	return insn_idx < relo->insn_idx ? -1 : 1;
 5998}
 5999
 6000static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
 6001{
 6002	if (!prog->nr_reloc)
 6003		return NULL;
 6004	return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
 6005		       sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
 6006}
 6007
 6008static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
 6009{
 6010	int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
 6011	struct reloc_desc *relos;
 6012	int i;
 6013
 6014	if (main_prog == subprog)
 6015		return 0;
 6016	relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
 6017	if (!relos)
 6018		return -ENOMEM;
 6019	if (subprog->nr_reloc)
 6020		memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
 6021		       sizeof(*relos) * subprog->nr_reloc);
 6022
 6023	for (i = main_prog->nr_reloc; i < new_cnt; i++)
 6024		relos[i].insn_idx += subprog->sub_insn_off;
 6025	/* After insn_idx adjustment the 'relos' array is still sorted
 6026	 * by insn_idx and doesn't break bsearch.
 6027	 */
 6028	main_prog->reloc_desc = relos;
 6029	main_prog->nr_reloc = new_cnt;
 6030	return 0;
 6031}
 6032
 6033static int
 6034bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
 6035		       struct bpf_program *prog)
 6036{
 6037	size_t sub_insn_idx, insn_idx, new_cnt;
 6038	struct bpf_program *subprog;
 6039	struct bpf_insn *insns, *insn;
 6040	struct reloc_desc *relo;
 6041	int err;
 6042
 6043	err = reloc_prog_func_and_line_info(obj, main_prog, prog);
 6044	if (err)
 6045		return err;
 6046
 6047	for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
 6048		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
 6049		if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
 6050			continue;
 6051
 6052		relo = find_prog_insn_relo(prog, insn_idx);
 6053		if (relo && relo->type == RELO_EXTERN_FUNC)
 6054			/* kfunc relocations will be handled later
 6055			 * in bpf_object__relocate_data()
 6056			 */
 6057			continue;
 6058		if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
 6059			pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
 6060				prog->name, insn_idx, relo->type);
 6061			return -LIBBPF_ERRNO__RELOC;
 6062		}
 6063		if (relo) {
 6064			/* sub-program instruction index is a combination of
 6065			 * an offset of a symbol pointed to by relocation and
 6066			 * call instruction's imm field; for global functions,
 6067			 * call always has imm = -1, but for static functions
 6068			 * relocation is against STT_SECTION and insn->imm
 6069			 * points to a start of a static function
 6070			 *
 6071			 * for subprog addr relocation, the relo->sym_off + insn->imm is
 6072			 * the byte offset in the corresponding section.
 6073			 */
 6074			if (relo->type == RELO_CALL)
 6075				sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
 6076			else
 6077				sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
 6078		} else if (insn_is_pseudo_func(insn)) {
 6079			/*
 6080			 * RELO_SUBPROG_ADDR relo is always emitted even if both
 6081			 * functions are in the same section, so it shouldn't reach here.
 6082			 */
 6083			pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
 6084				prog->name, insn_idx);
 6085			return -LIBBPF_ERRNO__RELOC;
 6086		} else {
 6087			/* if subprogram call is to a static function within
 6088			 * the same ELF section, there won't be any relocation
 6089			 * emitted, but it also means there is no additional
 6090			 * offset necessary, insns->imm is relative to
 6091			 * instruction's original position within the section
 6092			 */
 6093			sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
 6094		}
 6095
 6096		/* we enforce that sub-programs should be in .text section */
 6097		subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
 6098		if (!subprog) {
 6099			pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
 6100				prog->name);
 6101			return -LIBBPF_ERRNO__RELOC;
 6102		}
 6103
 6104		/* if it's the first call instruction calling into this
 6105		 * subprogram (meaning this subprog hasn't been processed
 6106		 * yet) within the context of current main program:
 6107		 *   - append it at the end of main program's instructions blog;
 6108		 *   - process is recursively, while current program is put on hold;
 6109		 *   - if that subprogram calls some other not yet processes
 6110		 *   subprogram, same thing will happen recursively until
 6111		 *   there are no more unprocesses subprograms left to append
 6112		 *   and relocate.
 6113		 */
 6114		if (subprog->sub_insn_off == 0) {
 6115			subprog->sub_insn_off = main_prog->insns_cnt;
 6116
 6117			new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
 6118			insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
 6119			if (!insns) {
 6120				pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
 6121				return -ENOMEM;
 6122			}
 6123			main_prog->insns = insns;
 6124			main_prog->insns_cnt = new_cnt;
 6125
 6126			memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
 6127			       subprog->insns_cnt * sizeof(*insns));
 6128
 6129			pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
 6130				 main_prog->name, subprog->insns_cnt, subprog->name);
 6131
 6132			/* The subprog insns are now appended. Append its relos too. */
 6133			err = append_subprog_relos(main_prog, subprog);
 6134			if (err)
 6135				return err;
 6136			err = bpf_object__reloc_code(obj, main_prog, subprog);
 6137			if (err)
 6138				return err;
 6139		}
 6140
 6141		/* main_prog->insns memory could have been re-allocated, so
 6142		 * calculate pointer again
 6143		 */
 6144		insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
 6145		/* calculate correct instruction position within current main
 6146		 * prog; each main prog can have a different set of
 6147		 * subprograms appended (potentially in different order as
 6148		 * well), so position of any subprog can be different for
 6149		 * different main programs */
 6150		insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
 6151
 6152		pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
 6153			 prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
 6154	}
 6155
 6156	return 0;
 6157}
 6158
 6159/*
 6160 * Relocate sub-program calls.
 6161 *
 6162 * Algorithm operates as follows. Each entry-point BPF program (referred to as
 6163 * main prog) is processed separately. For each subprog (non-entry functions,
 6164 * that can be called from either entry progs or other subprogs) gets their
 6165 * sub_insn_off reset to zero. This serves as indicator that this subprogram
 6166 * hasn't been yet appended and relocated within current main prog. Once its
 6167 * relocated, sub_insn_off will point at the position within current main prog
 6168 * where given subprog was appended. This will further be used to relocate all
 6169 * the call instructions jumping into this subprog.
 6170 *
 6171 * We start with main program and process all call instructions. If the call
 6172 * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
 6173 * is zero), subprog instructions are appended at the end of main program's
 6174 * instruction array. Then main program is "put on hold" while we recursively
 6175 * process newly appended subprogram. If that subprogram calls into another
 6176 * subprogram that hasn't been appended, new subprogram is appended again to
 6177 * the *main* prog's instructions (subprog's instructions are always left
 6178 * untouched, as they need to be in unmodified state for subsequent main progs
 6179 * and subprog instructions are always sent only as part of a main prog) and
 6180 * the process continues recursively. Once all the subprogs called from a main
 6181 * prog or any of its subprogs are appended (and relocated), all their
 6182 * positions within finalized instructions array are known, so it's easy to
 6183 * rewrite call instructions with correct relative offsets, corresponding to
 6184 * desired target subprog.
 6185 *
 6186 * Its important to realize that some subprogs might not be called from some
 6187 * main prog and any of its called/used subprogs. Those will keep their
 6188 * subprog->sub_insn_off as zero at all times and won't be appended to current
 6189 * main prog and won't be relocated within the context of current main prog.
 6190 * They might still be used from other main progs later.
 6191 *
 6192 * Visually this process can be shown as below. Suppose we have two main
 6193 * programs mainA and mainB and BPF object contains three subprogs: subA,
 6194 * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
 6195 * subC both call subB:
 6196 *
 6197 *        +--------+ +-------+
 6198 *        |        v v       |
 6199 *     +--+---+ +--+-+-+ +---+--+
 6200 *     | subA | | subB | | subC |
 6201 *     +--+---+ +------+ +---+--+
 6202 *        ^                  ^
 6203 *        |                  |
 6204 *    +---+-------+   +------+----+
 6205 *    |   mainA   |   |   mainB   |
 6206 *    +-----------+   +-----------+
 6207 *
 6208 * We'll start relocating mainA, will find subA, append it and start
 6209 * processing sub A recursively:
 6210 *
 6211 *    +-----------+------+
 6212 *    |   mainA   | subA |
 6213 *    +-----------+------+
 6214 *
 6215 * At this point we notice that subB is used from subA, so we append it and
 6216 * relocate (there are no further subcalls from subB):
 6217 *
 6218 *    +-----------+------+------+
 6219 *    |   mainA   | subA | subB |
 6220 *    +-----------+------+------+
 6221 *
 6222 * At this point, we relocate subA calls, then go one level up and finish with
 6223 * relocatin mainA calls. mainA is done.
 6224 *
 6225 * For mainB process is similar but results in different order. We start with
 6226 * mainB and skip subA and subB, as mainB never calls them (at least
 6227 * directly), but we see subC is needed, so we append and start processing it:
 6228 *
 6229 *    +-----------+------+
 6230 *    |   mainB   | subC |
 6231 *    +-----------+------+
 6232 * Now we see subC needs subB, so we go back to it, append and relocate it:
 6233 *
 6234 *    +-----------+------+------+
 6235 *    |   mainB   | subC | subB |
 6236 *    +-----------+------+------+
 6237 *
 6238 * At this point we unwind recursion, relocate calls in subC, then in mainB.
 6239 */
 6240static int
 6241bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
 6242{
 6243	struct bpf_program *subprog;
 6244	int i, err;
 6245
 6246	/* mark all subprogs as not relocated (yet) within the context of
 6247	 * current main program
 6248	 */
 6249	for (i = 0; i < obj->nr_programs; i++) {
 6250		subprog = &obj->programs[i];
 6251		if (!prog_is_subprog(obj, subprog))
 6252			continue;
 6253
 6254		subprog->sub_insn_off = 0;
 6255	}
 6256
 6257	err = bpf_object__reloc_code(obj, prog, prog);
 6258	if (err)
 6259		return err;
 6260
 6261	return 0;
 6262}
 6263
 6264static void
 6265bpf_object__free_relocs(struct bpf_object *obj)
 6266{
 6267	struct bpf_program *prog;
 6268	int i;
 6269
 6270	/* free up relocation descriptors */
 6271	for (i = 0; i < obj->nr_programs; i++) {
 6272		prog = &obj->programs[i];
 6273		zfree(&prog->reloc_desc);
 6274		prog->nr_reloc = 0;
 6275	}
 6276}
 6277
 6278static int cmp_relocs(const void *_a, const void *_b)
 6279{
 6280	const struct reloc_desc *a = _a;
 6281	const struct reloc_desc *b = _b;
 6282
 6283	if (a->insn_idx != b->insn_idx)
 6284		return a->insn_idx < b->insn_idx ? -1 : 1;
 6285
 6286	/* no two relocations should have the same insn_idx, but ... */
 6287	if (a->type != b->type)
 6288		return a->type < b->type ? -1 : 1;
 6289
 6290	return 0;
 6291}
 6292
 6293static void bpf_object__sort_relos(struct bpf_object *obj)
 6294{
 6295	int i;
 6296
 6297	for (i = 0; i < obj->nr_programs; i++) {
 6298		struct bpf_program *p = &obj->programs[i];
 6299
 6300		if (!p->nr_reloc)
 6301			continue;
 6302
 6303		qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
 6304	}
 6305}
 6306
 6307static int
 6308bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 6309{
 6310	struct bpf_program *prog;
 6311	size_t i, j;
 6312	int err;
 6313
 6314	if (obj->btf_ext) {
 6315		err = bpf_object__relocate_core(obj, targ_btf_path);
 6316		if (err) {
 6317			pr_warn("failed to perform CO-RE relocations: %d\n",
 6318				err);
 6319			return err;
 6320		}
 6321		bpf_object__sort_relos(obj);
 6322	}
 6323
 6324	/* Before relocating calls pre-process relocations and mark
 6325	 * few ld_imm64 instructions that points to subprogs.
 6326	 * Otherwise bpf_object__reloc_code() later would have to consider
 6327	 * all ld_imm64 insns as relocation candidates. That would
 6328	 * reduce relocation speed, since amount of find_prog_insn_relo()
 6329	 * would increase and most of them will fail to find a relo.
 6330	 */
 6331	for (i = 0; i < obj->nr_programs; i++) {
 6332		prog = &obj->programs[i];
 6333		for (j = 0; j < prog->nr_reloc; j++) {
 6334			struct reloc_desc *relo = &prog->reloc_desc[j];
 6335			struct bpf_insn *insn = &prog->insns[relo->insn_idx];
 6336
 6337			/* mark the insn, so it's recognized by insn_is_pseudo_func() */
 6338			if (relo->type == RELO_SUBPROG_ADDR)
 6339				insn[0].src_reg = BPF_PSEUDO_FUNC;
 6340		}
 6341	}
 6342
 6343	/* relocate subprogram calls and append used subprograms to main
 6344	 * programs; each copy of subprogram code needs to be relocated
 6345	 * differently for each main program, because its code location might
 6346	 * have changed.
 6347	 * Append subprog relos to main programs to allow data relos to be
 6348	 * processed after text is completely relocated.
 6349	 */
 6350	for (i = 0; i < obj->nr_programs; i++) {
 6351		prog = &obj->programs[i];
 6352		/* sub-program's sub-calls are relocated within the context of
 6353		 * its main program only
 6354		 */
 6355		if (prog_is_subprog(obj, prog))
 6356			continue;
 6357		if (!prog->autoload)
 6358			continue;
 6359
 6360		err = bpf_object__relocate_calls(obj, prog);
 6361		if (err) {
 6362			pr_warn("prog '%s': failed to relocate calls: %d\n",
 6363				prog->name, err);
 6364			return err;
 6365		}
 6366	}
 6367	/* Process data relos for main programs */
 6368	for (i = 0; i < obj->nr_programs; i++) {
 6369		prog = &obj->programs[i];
 6370		if (prog_is_subprog(obj, prog))
 6371			continue;
 6372		if (!prog->autoload)
 6373			continue;
 6374		err = bpf_object__relocate_data(obj, prog);
 6375		if (err) {
 6376			pr_warn("prog '%s': failed to relocate data references: %d\n",
 6377				prog->name, err);
 6378			return err;
 6379		}
 6380	}
 6381
 6382	return 0;
 6383}
 6384
 6385static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 6386					    Elf64_Shdr *shdr, Elf_Data *data);
 6387
 6388static int bpf_object__collect_map_relos(struct bpf_object *obj,
 6389					 Elf64_Shdr *shdr, Elf_Data *data)
 6390{
 6391	const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
 6392	int i, j, nrels, new_sz;
 6393	const struct btf_var_secinfo *vi = NULL;
 6394	const struct btf_type *sec, *var, *def;
 6395	struct bpf_map *map = NULL, *targ_map = NULL;
 6396	struct bpf_program *targ_prog = NULL;
 6397	bool is_prog_array, is_map_in_map;
 6398	const struct btf_member *member;
 6399	const char *name, *mname, *type;
 6400	unsigned int moff;
 6401	Elf64_Sym *sym;
 6402	Elf64_Rel *rel;
 6403	void *tmp;
 6404
 6405	if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
 6406		return -EINVAL;
 6407	sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
 6408	if (!sec)
 6409		return -EINVAL;
 6410
 6411	nrels = shdr->sh_size / shdr->sh_entsize;
 6412	for (i = 0; i < nrels; i++) {
 6413		rel = elf_rel_by_idx(data, i);
 6414		if (!rel) {
 6415			pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
 6416			return -LIBBPF_ERRNO__FORMAT;
 6417		}
 6418
 6419		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
 6420		if (!sym) {
 6421			pr_warn(".maps relo #%d: symbol %zx not found\n",
 6422				i, (size_t)ELF64_R_SYM(rel->r_info));
 6423			return -LIBBPF_ERRNO__FORMAT;
 6424		}
 6425		name = elf_sym_str(obj, sym->st_name) ?: "<?>";
 6426
 6427		pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
 6428			 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
 6429			 (size_t)rel->r_offset, sym->st_name, name);
 6430
 6431		for (j = 0; j < obj->nr_maps; j++) {
 6432			map = &obj->maps[j];
 6433			if (map->sec_idx != obj->efile.btf_maps_shndx)
 6434				continue;
 6435
 6436			vi = btf_var_secinfos(sec) + map->btf_var_idx;
 6437			if (vi->offset <= rel->r_offset &&
 6438			    rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
 6439				break;
 6440		}
 6441		if (j == obj->nr_maps) {
 6442			pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
 6443				i, name, (size_t)rel->r_offset);
 6444			return -EINVAL;
 6445		}
 6446
 6447		is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
 6448		is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
 6449		type = is_map_in_map ? "map" : "prog";
 6450		if (is_map_in_map) {
 6451			if (sym->st_shndx != obj->efile.btf_maps_shndx) {
 6452				pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
 6453					i, name);
 6454				return -LIBBPF_ERRNO__RELOC;
 6455			}
 6456			if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
 6457			    map->def.key_size != sizeof(int)) {
 6458				pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
 6459					i, map->name, sizeof(int));
 6460				return -EINVAL;
 6461			}
 6462			targ_map = bpf_object__find_map_by_name(obj, name);
 6463			if (!targ_map) {
 6464				pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
 6465					i, name);
 6466				return -ESRCH;
 6467			}
 6468		} else if (is_prog_array) {
 6469			targ_prog = bpf_object__find_program_by_name(obj, name);
 6470			if (!targ_prog) {
 6471				pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
 6472					i, name);
 6473				return -ESRCH;
 6474			}
 6475			if (targ_prog->sec_idx != sym->st_shndx ||
 6476			    targ_prog->sec_insn_off * 8 != sym->st_value ||
 6477			    prog_is_subprog(obj, targ_prog)) {
 6478				pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
 6479					i, name);
 6480				return -LIBBPF_ERRNO__RELOC;
 6481			}
 6482		} else {
 6483			return -EINVAL;
 6484		}
 6485
 6486		var = btf__type_by_id(obj->btf, vi->type);
 6487		def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
 6488		if (btf_vlen(def) == 0)
 6489			return -EINVAL;
 6490		member = btf_members(def) + btf_vlen(def) - 1;
 6491		mname = btf__name_by_offset(obj->btf, member->name_off);
 6492		if (strcmp(mname, "values"))
 6493			return -EINVAL;
 6494
 6495		moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
 6496		if (rel->r_offset - vi->offset < moff)
 6497			return -EINVAL;
 6498
 6499		moff = rel->r_offset - vi->offset - moff;
 6500		/* here we use BPF pointer size, which is always 64 bit, as we
 6501		 * are parsing ELF that was built for BPF target
 6502		 */
 6503		if (moff % bpf_ptr_sz)
 6504			return -EINVAL;
 6505		moff /= bpf_ptr_sz;
 6506		if (moff >= map->init_slots_sz) {
 6507			new_sz = moff + 1;
 6508			tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
 6509			if (!tmp)
 6510				return -ENOMEM;
 6511			map->init_slots = tmp;
 6512			memset(map->init_slots + map->init_slots_sz, 0,
 6513			       (new_sz - map->init_slots_sz) * host_ptr_sz);
 6514			map->init_slots_sz = new_sz;
 6515		}
 6516		map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
 6517
 6518		pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
 6519			 i, map->name, moff, type, name);
 6520	}
 6521
 6522	return 0;
 6523}
 6524
 6525static int bpf_object__collect_relos(struct bpf_object *obj)
 6526{
 6527	int i, err;
 6528
 6529	for (i = 0; i < obj->efile.sec_cnt; i++) {
 6530		struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
 6531		Elf64_Shdr *shdr;
 6532		Elf_Data *data;
 6533		int idx;
 6534
 6535		if (sec_desc->sec_type != SEC_RELO)
 6536			continue;
 6537
 6538		shdr = sec_desc->shdr;
 6539		data = sec_desc->data;
 6540		idx = shdr->sh_info;
 6541
 6542		if (shdr->sh_type != SHT_REL) {
 6543			pr_warn("internal error at %d\n", __LINE__);
 6544			return -LIBBPF_ERRNO__INTERNAL;
 6545		}
 6546
 6547		if (idx == obj->efile.st_ops_shndx)
 6548			err = bpf_object__collect_st_ops_relos(obj, shdr, data);
 6549		else if (idx == obj->efile.btf_maps_shndx)
 6550			err = bpf_object__collect_map_relos(obj, shdr, data);
 6551		else
 6552			err = bpf_object__collect_prog_relos(obj, shdr, data);
 6553		if (err)
 6554			return err;
 6555	}
 6556
 6557	bpf_object__sort_relos(obj);
 6558	return 0;
 6559}
 6560
 6561static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
 6562{
 6563	if (BPF_CLASS(insn->code) == BPF_JMP &&
 6564	    BPF_OP(insn->code) == BPF_CALL &&
 6565	    BPF_SRC(insn->code) == BPF_K &&
 6566	    insn->src_reg == 0 &&
 6567	    insn->dst_reg == 0) {
 6568		    *func_id = insn->imm;
 6569		    return true;
 6570	}
 6571	return false;
 6572}
 6573
 6574static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
 6575{
 6576	struct bpf_insn *insn = prog->insns;
 6577	enum bpf_func_id func_id;
 6578	int i;
 6579
 6580	if (obj->gen_loader)
 6581		return 0;
 6582
 6583	for (i = 0; i < prog->insns_cnt; i++, insn++) {
 6584		if (!insn_is_helper_call(insn, &func_id))
 6585			continue;
 6586
 6587		/* on kernels that don't yet support
 6588		 * bpf_probe_read_{kernel,user}[_str] helpers, fall back
 6589		 * to bpf_probe_read() which works well for old kernels
 6590		 */
 6591		switch (func_id) {
 6592		case BPF_FUNC_probe_read_kernel:
 6593		case BPF_FUNC_probe_read_user:
 6594			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 6595				insn->imm = BPF_FUNC_probe_read;
 6596			break;
 6597		case BPF_FUNC_probe_read_kernel_str:
 6598		case BPF_FUNC_probe_read_user_str:
 6599			if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
 6600				insn->imm = BPF_FUNC_probe_read_str;
 6601			break;
 6602		default:
 6603			break;
 6604		}
 6605	}
 6606	return 0;
 6607}
 6608
 6609static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
 6610				     int *btf_obj_fd, int *btf_type_id);
 6611
 6612/* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
 6613static int libbpf_prepare_prog_load(struct bpf_program *prog,
 6614				    struct bpf_prog_load_opts *opts, long cookie)
 6615{
 6616	enum sec_def_flags def = cookie;
 6617
 6618	/* old kernels might not support specifying expected_attach_type */
 6619	if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
 6620		opts->expected_attach_type = 0;
 6621
 6622	if (def & SEC_SLEEPABLE)
 6623		opts->prog_flags |= BPF_F_SLEEPABLE;
 6624
 6625	if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
 6626		opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
 6627
 6628	if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
 6629		int btf_obj_fd = 0, btf_type_id = 0, err;
 6630		const char *attach_name;
 6631
 6632		attach_name = strchr(prog->sec_name, '/');
 6633		if (!attach_name) {
 6634			/* if BPF program is annotated with just SEC("fentry")
 6635			 * (or similar) without declaratively specifying
 6636			 * target, then it is expected that target will be
 6637			 * specified with bpf_program__set_attach_target() at
 6638			 * runtime before BPF object load step. If not, then
 6639			 * there is nothing to load into the kernel as BPF
 6640			 * verifier won't be able to validate BPF program
 6641			 * correctness anyways.
 6642			 */
 6643			pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
 6644				prog->name);
 6645			return -EINVAL;
 6646		}
 6647		attach_name++; /* skip over / */
 6648
 6649		err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
 6650		if (err)
 6651			return err;
 6652
 6653		/* cache resolved BTF FD and BTF type ID in the prog */
 6654		prog->attach_btf_obj_fd = btf_obj_fd;
 6655		prog->attach_btf_id = btf_type_id;
 6656
 6657		/* but by now libbpf common logic is not utilizing
 6658		 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
 6659		 * this callback is called after opts were populated by
 6660		 * libbpf, so this callback has to update opts explicitly here
 6661		 */
 6662		opts->attach_btf_obj_fd = btf_obj_fd;
 6663		opts->attach_btf_id = btf_type_id;
 6664	}
 6665	return 0;
 6666}
 6667
 6668static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
 6669
 6670static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
 6671				struct bpf_insn *insns, int insns_cnt,
 6672				const char *license, __u32 kern_version, int *prog_fd)
 6673{
 6674	LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
 6675	const char *prog_name = NULL;
 6676	char *cp, errmsg[STRERR_BUFSIZE];
 6677	size_t log_buf_size = 0;
 6678	char *log_buf = NULL, *tmp;
 6679	int btf_fd, ret, err;
 6680	bool own_log_buf = true;
 6681	__u32 log_level = prog->log_level;
 6682
 6683	if (prog->type == BPF_PROG_TYPE_UNSPEC) {
 6684		/*
 6685		 * The program type must be set.  Most likely we couldn't find a proper
 6686		 * section definition at load time, and thus we didn't infer the type.
 6687		 */
 6688		pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
 6689			prog->name, prog->sec_name);
 6690		return -EINVAL;
 6691	}
 6692
 6693	if (!insns || !insns_cnt)
 6694		return -EINVAL;
 6695
 6696	load_attr.expected_attach_type = prog->expected_attach_type;
 6697	if (kernel_supports(obj, FEAT_PROG_NAME))
 6698		prog_name = prog->name;
 6699	load_attr.attach_prog_fd = prog->attach_prog_fd;
 6700	load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
 6701	load_attr.attach_btf_id = prog->attach_btf_id;
 6702	load_attr.kern_version = kern_version;
 6703	load_attr.prog_ifindex = prog->prog_ifindex;
 6704
 6705	/* specify func_info/line_info only if kernel supports them */
 6706	btf_fd = bpf_object__btf_fd(obj);
 6707	if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
 6708		load_attr.prog_btf_fd = btf_fd;
 6709		load_attr.func_info = prog->func_info;
 6710		load_attr.func_info_rec_size = prog->func_info_rec_size;
 6711		load_attr.func_info_cnt = prog->func_info_cnt;
 6712		load_attr.line_info = prog->line_info;
 6713		load_attr.line_info_rec_size = prog->line_info_rec_size;
 6714		load_attr.line_info_cnt = prog->line_info_cnt;
 6715	}
 6716	load_attr.log_level = log_level;
 6717	load_attr.prog_flags = prog->prog_flags;
 6718	load_attr.fd_array = obj->fd_array;
 6719
 6720	/* adjust load_attr if sec_def provides custom preload callback */
 6721	if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
 6722		err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
 6723		if (err < 0) {
 6724			pr_warn("prog '%s': failed to prepare load attributes: %d\n",
 6725				prog->name, err);
 6726			return err;
 6727		}
 6728		insns = prog->insns;
 6729		insns_cnt = prog->insns_cnt;
 6730	}
 6731
 6732	if (obj->gen_loader) {
 6733		bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
 6734				   license, insns, insns_cnt, &load_attr,
 6735				   prog - obj->programs);
 6736		*prog_fd = -1;
 6737		return 0;
 6738	}
 6739
 6740retry_load:
 6741	/* if log_level is zero, we don't request logs initially even if
 6742	 * custom log_buf is specified; if the program load fails, then we'll
 6743	 * bump log_level to 1 and use either custom log_buf or we'll allocate
 6744	 * our own and retry the load to get details on what failed
 6745	 */
 6746	if (log_level) {
 6747		if (prog->log_buf) {
 6748			log_buf = prog->log_buf;
 6749			log_buf_size = prog->log_size;
 6750			own_log_buf = false;
 6751		} else if (obj->log_buf) {
 6752			log_buf = obj->log_buf;
 6753			log_buf_size = obj->log_size;
 6754			own_log_buf = false;
 6755		} else {
 6756			log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
 6757			tmp = realloc(log_buf, log_buf_size);
 6758			if (!tmp) {
 6759				ret = -ENOMEM;
 6760				goto out;
 6761			}
 6762			log_buf = tmp;
 6763			log_buf[0] = '\0';
 6764			own_log_buf = true;
 6765		}
 6766	}
 6767
 6768	load_attr.log_buf = log_buf;
 6769	load_attr.log_size = log_buf_size;
 6770	load_attr.log_level = log_level;
 6771
 6772	ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
 6773	if (ret >= 0) {
 6774		if (log_level && own_log_buf) {
 6775			pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
 6776				 prog->name, log_buf);
 6777		}
 6778
 6779		if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
 6780			struct bpf_map *map;
 6781			int i;
 6782
 6783			for (i = 0; i < obj->nr_maps; i++) {
 6784				map = &prog->obj->maps[i];
 6785				if (map->libbpf_type != LIBBPF_MAP_RODATA)
 6786					continue;
 6787
 6788				if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
 6789					cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 6790					pr_warn("prog '%s': failed to bind map '%s': %s\n",
 6791						prog->name, map->real_name, cp);
 6792					/* Don't fail hard if can't bind rodata. */
 6793				}
 6794			}
 6795		}
 6796
 6797		*prog_fd = ret;
 6798		ret = 0;
 6799		goto out;
 6800	}
 6801
 6802	if (log_level == 0) {
 6803		log_level = 1;
 6804		goto retry_load;
 6805	}
 6806	/* On ENOSPC, increase log buffer size and retry, unless custom
 6807	 * log_buf is specified.
 6808	 * Be careful to not overflow u32, though. Kernel's log buf size limit
 6809	 * isn't part of UAPI so it can always be bumped to full 4GB. So don't
 6810	 * multiply by 2 unless we are sure we'll fit within 32 bits.
 6811	 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
 6812	 */
 6813	if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
 6814		goto retry_load;
 6815
 6816	ret = -errno;
 6817
 6818	/* post-process verifier log to improve error descriptions */
 6819	fixup_verifier_log(prog, log_buf, log_buf_size);
 6820
 6821	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 6822	pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
 6823	pr_perm_msg(ret);
 6824
 6825	if (own_log_buf && log_buf && log_buf[0] != '\0') {
 6826		pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
 6827			prog->name, log_buf);
 6828	}
 6829
 6830out:
 6831	if (own_log_buf)
 6832		free(log_buf);
 6833	return ret;
 6834}
 6835
 6836static char *find_prev_line(char *buf, char *cur)
 6837{
 6838	char *p;
 6839
 6840	if (cur == buf) /* end of a log buf */
 6841		return NULL;
 6842
 6843	p = cur - 1;
 6844	while (p - 1 >= buf && *(p - 1) != '\n')
 6845		p--;
 6846
 6847	return p;
 6848}
 6849
 6850static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
 6851		      char *orig, size_t orig_sz, const char *patch)
 6852{
 6853	/* size of the remaining log content to the right from the to-be-replaced part */
 6854	size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
 6855	size_t patch_sz = strlen(patch);
 6856
 6857	if (patch_sz != orig_sz) {
 6858		/* If patch line(s) are longer than original piece of verifier log,
 6859		 * shift log contents by (patch_sz - orig_sz) bytes to the right
 6860		 * starting from after to-be-replaced part of the log.
 6861		 *
 6862		 * If patch line(s) are shorter than original piece of verifier log,
 6863		 * shift log contents by (orig_sz - patch_sz) bytes to the left
 6864		 * starting from after to-be-replaced part of the log
 6865		 *
 6866		 * We need to be careful about not overflowing available
 6867		 * buf_sz capacity. If that's the case, we'll truncate the end
 6868		 * of the original log, as necessary.
 6869		 */
 6870		if (patch_sz > orig_sz) {
 6871			if (orig + patch_sz >= buf + buf_sz) {
 6872				/* patch is big enough to cover remaining space completely */
 6873				patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
 6874				rem_sz = 0;
 6875			} else if (patch_sz - orig_sz > buf_sz - log_sz) {
 6876				/* patch causes part of remaining log to be truncated */
 6877				rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
 6878			}
 6879		}
 6880		/* shift remaining log to the right by calculated amount */
 6881		memmove(orig + patch_sz, orig + orig_sz, rem_sz);
 6882	}
 6883
 6884	memcpy(orig, patch, patch_sz);
 6885}
 6886
 6887static void fixup_log_failed_core_relo(struct bpf_program *prog,
 6888				       char *buf, size_t buf_sz, size_t log_sz,
 6889				       char *line1, char *line2, char *line3)
 6890{
 6891	/* Expected log for failed and not properly guarded CO-RE relocation:
 6892	 * line1 -> 123: (85) call unknown#195896080
 6893	 * line2 -> invalid func unknown#195896080
 6894	 * line3 -> <anything else or end of buffer>
 6895	 *
 6896	 * "123" is the index of the instruction that was poisoned. We extract
 6897	 * instruction index to find corresponding CO-RE relocation and
 6898	 * replace this part of the log with more relevant information about
 6899	 * failed CO-RE relocation.
 6900	 */
 6901	const struct bpf_core_relo *relo;
 6902	struct bpf_core_spec spec;
 6903	char patch[512], spec_buf[256];
 6904	int insn_idx, err, spec_len;
 6905
 6906	if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
 6907		return;
 6908
 6909	relo = find_relo_core(prog, insn_idx);
 6910	if (!relo)
 6911		return;
 6912
 6913	err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
 6914	if (err)
 6915		return;
 6916
 6917	spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
 6918	snprintf(patch, sizeof(patch),
 6919		 "%d: <invalid CO-RE relocation>\n"
 6920		 "failed to resolve CO-RE relocation %s%s\n",
 6921		 insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
 6922
 6923	patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
 6924}
 6925
 6926static void fixup_log_missing_map_load(struct bpf_program *prog,
 6927				       char *buf, size_t buf_sz, size_t log_sz,
 6928				       char *line1, char *line2, char *line3)
 6929{
 6930	/* Expected log for failed and not properly guarded CO-RE relocation:
 6931	 * line1 -> 123: (85) call unknown#2001000345
 6932	 * line2 -> invalid func unknown#2001000345
 6933	 * line3 -> <anything else or end of buffer>
 6934	 *
 6935	 * "123" is the index of the instruction that was poisoned.
 6936	 * "345" in "2001000345" are map index in obj->maps to fetch map name.
 6937	 */
 6938	struct bpf_object *obj = prog->obj;
 6939	const struct bpf_map *map;
 6940	int insn_idx, map_idx;
 6941	char patch[128];
 6942
 6943	if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
 6944		return;
 6945
 6946	map_idx -= MAP_LDIMM64_POISON_BASE;
 6947	if (map_idx < 0 || map_idx >= obj->nr_maps)
 6948		return;
 6949	map = &obj->maps[map_idx];
 6950
 6951	snprintf(patch, sizeof(patch),
 6952		 "%d: <invalid BPF map reference>\n"
 6953		 "BPF map '%s' is referenced but wasn't created\n",
 6954		 insn_idx, map->name);
 6955
 6956	patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
 6957}
 6958
 6959static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
 6960{
 6961	/* look for familiar error patterns in last N lines of the log */
 6962	const size_t max_last_line_cnt = 10;
 6963	char *prev_line, *cur_line, *next_line;
 6964	size_t log_sz;
 6965	int i;
 6966
 6967	if (!buf)
 6968		return;
 6969
 6970	log_sz = strlen(buf) + 1;
 6971	next_line = buf + log_sz - 1;
 6972
 6973	for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
 6974		cur_line = find_prev_line(buf, next_line);
 6975		if (!cur_line)
 6976			return;
 6977
 6978		/* failed CO-RE relocation case */
 6979		if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
 6980			prev_line = find_prev_line(buf, cur_line);
 6981			if (!prev_line)
 6982				continue;
 6983
 6984			fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
 6985						   prev_line, cur_line, next_line);
 6986			return;
 6987		} else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) {
 6988			prev_line = find_prev_line(buf, cur_line);
 6989			if (!prev_line)
 6990				continue;
 6991
 6992			fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
 6993						   prev_line, cur_line, next_line);
 6994			return;
 6995		}
 6996	}
 6997}
 6998
 6999static int bpf_program_record_relos(struct bpf_program *prog)
 7000{
 7001	struct bpf_object *obj = prog->obj;
 7002	int i;
 7003
 7004	for (i = 0; i < prog->nr_reloc; i++) {
 7005		struct reloc_desc *relo = &prog->reloc_desc[i];
 7006		struct extern_desc *ext = &obj->externs[relo->sym_off];
 7007
 7008		switch (relo->type) {
 7009		case RELO_EXTERN_VAR:
 7010			if (ext->type != EXT_KSYM)
 7011				continue;
 7012			bpf_gen__record_extern(obj->gen_loader, ext->name,
 7013					       ext->is_weak, !ext->ksym.type_id,
 7014					       BTF_KIND_VAR, relo->insn_idx);
 7015			break;
 7016		case RELO_EXTERN_FUNC:
 7017			bpf_gen__record_extern(obj->gen_loader, ext->name,
 7018					       ext->is_weak, false, BTF_KIND_FUNC,
 7019					       relo->insn_idx);
 7020			break;
 7021		case RELO_CORE: {
 7022			struct bpf_core_relo cr = {
 7023				.insn_off = relo->insn_idx * 8,
 7024				.type_id = relo->core_relo->type_id,
 7025				.access_str_off = relo->core_relo->access_str_off,
 7026				.kind = relo->core_relo->kind,
 7027			};
 7028
 7029			bpf_gen__record_relo_core(obj->gen_loader, &cr);
 7030			break;
 7031		}
 7032		default:
 7033			continue;
 7034		}
 7035	}
 7036	return 0;
 7037}
 7038
 7039static int
 7040bpf_object__load_progs(struct bpf_object *obj, int log_level)
 7041{
 7042	struct bpf_program *prog;
 7043	size_t i;
 7044	int err;
 7045
 7046	for (i = 0; i < obj->nr_programs; i++) {
 7047		prog = &obj->programs[i];
 7048		err = bpf_object__sanitize_prog(obj, prog);
 7049		if (err)
 7050			return err;
 7051	}
 7052
 7053	for (i = 0; i < obj->nr_programs; i++) {
 7054		prog = &obj->programs[i];
 7055		if (prog_is_subprog(obj, prog))
 7056			continue;
 7057		if (!prog->autoload) {
 7058			pr_debug("prog '%s': skipped loading\n", prog->name);
 7059			continue;
 7060		}
 7061		prog->log_level |= log_level;
 7062
 7063		if (obj->gen_loader)
 7064			bpf_program_record_relos(prog);
 7065
 7066		err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
 7067					   obj->license, obj->kern_version, &prog->fd);
 7068		if (err) {
 7069			pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
 7070			return err;
 7071		}
 7072	}
 7073
 7074	bpf_object__free_relocs(obj);
 7075	return 0;
 7076}
 7077
 7078static const struct bpf_sec_def *find_sec_def(const char *sec_name);
 7079
 7080static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
 7081{
 7082	struct bpf_program *prog;
 7083	int err;
 7084
 7085	bpf_object__for_each_program(prog, obj) {
 7086		prog->sec_def = find_sec_def(prog->sec_name);
 7087		if (!prog->sec_def) {
 7088			/* couldn't guess, but user might manually specify */
 7089			pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
 7090				prog->name, prog->sec_name);
 7091			continue;
 7092		}
 7093
 7094		prog->type = prog->sec_def->prog_type;
 7095		prog->expected_attach_type = prog->sec_def->expected_attach_type;
 7096
 7097		/* sec_def can have custom callback which should be called
 7098		 * after bpf_program is initialized to adjust its properties
 7099		 */
 7100		if (prog->sec_def->prog_setup_fn) {
 7101			err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
 7102			if (err < 0) {
 7103				pr_warn("prog '%s': failed to initialize: %d\n",
 7104					prog->name, err);
 7105				return err;
 7106			}
 7107		}
 7108	}
 7109
 7110	return 0;
 7111}
 7112
 7113static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 7114					  const struct bpf_object_open_opts *opts)
 7115{
 7116	const char *obj_name, *kconfig, *btf_tmp_path;
 7117	struct bpf_object *obj;
 7118	char tmp_name[64];
 7119	int err;
 7120	char *log_buf;
 7121	size_t log_size;
 7122	__u32 log_level;
 7123
 7124	if (elf_version(EV_CURRENT) == EV_NONE) {
 7125		pr_warn("failed to init libelf for %s\n",
 7126			path ? : "(mem buf)");
 7127		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
 7128	}
 7129
 7130	if (!OPTS_VALID(opts, bpf_object_open_opts))
 7131		return ERR_PTR(-EINVAL);
 7132
 7133	obj_name = OPTS_GET(opts, object_name, NULL);
 7134	if (obj_buf) {
 7135		if (!obj_name) {
 7136			snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
 7137				 (unsigned long)obj_buf,
 7138				 (unsigned long)obj_buf_sz);
 7139			obj_name = tmp_name;
 7140		}
 7141		path = obj_name;
 7142		pr_debug("loading object '%s' from buffer\n", obj_name);
 7143	}
 7144
 7145	log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
 7146	log_size = OPTS_GET(opts, kernel_log_size, 0);
 7147	log_level = OPTS_GET(opts, kernel_log_level, 0);
 7148	if (log_size > UINT_MAX)
 7149		return ERR_PTR(-EINVAL);
 7150	if (log_size && !log_buf)
 7151		return ERR_PTR(-EINVAL);
 7152
 7153	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
 7154	if (IS_ERR(obj))
 7155		return obj;
 7156
 7157	obj->log_buf = log_buf;
 7158	obj->log_size = log_size;
 7159	obj->log_level = log_level;
 7160
 7161	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
 7162	if (btf_tmp_path) {
 7163		if (strlen(btf_tmp_path) >= PATH_MAX) {
 7164			err = -ENAMETOOLONG;
 7165			goto out;
 7166		}
 7167		obj->btf_custom_path = strdup(btf_tmp_path);
 7168		if (!obj->btf_custom_path) {
 7169			err = -ENOMEM;
 7170			goto out;
 7171		}
 7172	}
 7173
 7174	kconfig = OPTS_GET(opts, kconfig, NULL);
 7175	if (kconfig) {
 7176		obj->kconfig = strdup(kconfig);
 7177		if (!obj->kconfig) {
 7178			err = -ENOMEM;
 7179			goto out;
 7180		}
 7181	}
 7182
 7183	err = bpf_object__elf_init(obj);
 7184	err = err ? : bpf_object__check_endianness(obj);
 7185	err = err ? : bpf_object__elf_collect(obj);
 7186	err = err ? : bpf_object__collect_externs(obj);
 7187	err = err ? : bpf_object__finalize_btf(obj);
 7188	err = err ? : bpf_object__init_maps(obj, opts);
 7189	err = err ? : bpf_object_init_progs(obj, opts);
 7190	err = err ? : bpf_object__collect_relos(obj);
 7191	if (err)
 7192		goto out;
 7193
 7194	bpf_object__elf_finish(obj);
 7195
 7196	return obj;
 7197out:
 7198	bpf_object__close(obj);
 7199	return ERR_PTR(err);
 7200}
 7201
 7202struct bpf_object *
 7203bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 7204{
 7205	if (!path)
 7206		return libbpf_err_ptr(-EINVAL);
 7207
 7208	pr_debug("loading %s\n", path);
 7209
 7210	return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
 7211}
 7212
 7213struct bpf_object *bpf_object__open(const char *path)
 7214{
 7215	return bpf_object__open_file(path, NULL);
 7216}
 7217
 7218struct bpf_object *
 7219bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
 7220		     const struct bpf_object_open_opts *opts)
 7221{
 7222	if (!obj_buf || obj_buf_sz == 0)
 7223		return libbpf_err_ptr(-EINVAL);
 7224
 7225	return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
 7226}
 7227
 7228static int bpf_object_unload(struct bpf_object *obj)
 7229{
 7230	size_t i;
 7231
 7232	if (!obj)
 7233		return libbpf_err(-EINVAL);
 7234
 7235	for (i = 0; i < obj->nr_maps; i++) {
 7236		zclose(obj->maps[i].fd);
 7237		if (obj->maps[i].st_ops)
 7238			zfree(&obj->maps[i].st_ops->kern_vdata);
 7239	}
 7240
 7241	for (i = 0; i < obj->nr_programs; i++)
 7242		bpf_program__unload(&obj->programs[i]);
 7243
 7244	return 0;
 7245}
 7246
 7247int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload")));
 7248
 7249static int bpf_object__sanitize_maps(struct bpf_object *obj)
 7250{
 7251	struct bpf_map *m;
 7252
 7253	bpf_object__for_each_map(m, obj) {
 7254		if (!bpf_map__is_internal(m))
 7255			continue;
 7256		if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
 7257			m->def.map_flags ^= BPF_F_MMAPABLE;
 7258	}
 7259
 7260	return 0;
 7261}
 7262
 7263int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
 7264{
 7265	char sym_type, sym_name[500];
 7266	unsigned long long sym_addr;
 7267	int ret, err = 0;
 7268	FILE *f;
 7269
 7270	f = fopen("/proc/kallsyms", "r");
 7271	if (!f) {
 7272		err = -errno;
 7273		pr_warn("failed to open /proc/kallsyms: %d\n", err);
 7274		return err;
 7275	}
 7276
 7277	while (true) {
 7278		ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
 7279			     &sym_addr, &sym_type, sym_name);
 7280		if (ret == EOF && feof(f))
 7281			break;
 7282		if (ret != 3) {
 7283			pr_warn("failed to read kallsyms entry: %d\n", ret);
 7284			err = -EINVAL;
 7285			break;
 7286		}
 7287
 7288		err = cb(sym_addr, sym_type, sym_name, ctx);
 7289		if (err)
 7290			break;
 7291	}
 7292
 7293	fclose(f);
 7294	return err;
 7295}
 7296
 7297static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
 7298		       const char *sym_name, void *ctx)
 7299{
 7300	struct bpf_object *obj = ctx;
 7301	const struct btf_type *t;
 7302	struct extern_desc *ext;
 7303
 7304	ext = find_extern_by_name(obj, sym_name);
 7305	if (!ext || ext->type != EXT_KSYM)
 7306		return 0;
 7307
 7308	t = btf__type_by_id(obj->btf, ext->btf_id);
 7309	if (!btf_is_var(t))
 7310		return 0;
 7311
 7312	if (ext->is_set && ext->ksym.addr != sym_addr) {
 7313		pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
 7314			sym_name, ext->ksym.addr, sym_addr);
 7315		return -EINVAL;
 7316	}
 7317	if (!ext->is_set) {
 7318		ext->is_set = true;
 7319		ext->ksym.addr = sym_addr;
 7320		pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
 7321	}
 7322	return 0;
 7323}
 7324
 7325static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
 7326{
 7327	return libbpf_kallsyms_parse(kallsyms_cb, obj);
 7328}
 7329
 7330static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
 7331			    __u16 kind, struct btf **res_btf,
 7332			    struct module_btf **res_mod_btf)
 7333{
 7334	struct module_btf *mod_btf;
 7335	struct btf *btf;
 7336	int i, id, err;
 7337
 7338	btf = obj->btf_vmlinux;
 7339	mod_btf = NULL;
 7340	id = btf__find_by_name_kind(btf, ksym_name, kind);
 7341
 7342	if (id == -ENOENT) {
 7343		err = load_module_btfs(obj);
 7344		if (err)
 7345			return err;
 7346
 7347		for (i = 0; i < obj->btf_module_cnt; i++) {
 7348			/* we assume module_btf's BTF FD is always >0 */
 7349			mod_btf = &obj->btf_modules[i];
 7350			btf = mod_btf->btf;
 7351			id = btf__find_by_name_kind_own(btf, ksym_name, kind);
 7352			if (id != -ENOENT)
 7353				break;
 7354		}
 7355	}
 7356	if (id <= 0)
 7357		return -ESRCH;
 7358
 7359	*res_btf = btf;
 7360	*res_mod_btf = mod_btf;
 7361	return id;
 7362}
 7363
 7364static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
 7365					       struct extern_desc *ext)
 7366{
 7367	const struct btf_type *targ_var, *targ_type;
 7368	__u32 targ_type_id, local_type_id;
 7369	struct module_btf *mod_btf = NULL;
 7370	const char *targ_var_name;
 7371	struct btf *btf = NULL;
 7372	int id, err;
 7373
 7374	id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
 7375	if (id < 0) {
 7376		if (id == -ESRCH && ext->is_weak)
 7377			return 0;
 7378		pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
 7379			ext->name);
 7380		return id;
 7381	}
 7382
 7383	/* find local type_id */
 7384	local_type_id = ext->ksym.type_id;
 7385
 7386	/* find target type_id */
 7387	targ_var = btf__type_by_id(btf, id);
 7388	targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
 7389	targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
 7390
 7391	err = bpf_core_types_are_compat(obj->btf, local_type_id,
 7392					btf, targ_type_id);
 7393	if (err <= 0) {
 7394		const struct btf_type *local_type;
 7395		const char *targ_name, *local_name;
 7396
 7397		local_type = btf__type_by_id(obj->btf, local_type_id);
 7398		local_name = btf__name_by_offset(obj->btf, local_type->name_off);
 7399		targ_name = btf__name_by_offset(btf, targ_type->name_off);
 7400
 7401		pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
 7402			ext->name, local_type_id,
 7403			btf_kind_str(local_type), local_name, targ_type_id,
 7404			btf_kind_str(targ_type), targ_name);
 7405		return -EINVAL;
 7406	}
 7407
 7408	ext->is_set = true;
 7409	ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
 7410	ext->ksym.kernel_btf_id = id;
 7411	pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
 7412		 ext->name, id, btf_kind_str(targ_var), targ_var_name);
 7413
 7414	return 0;
 7415}
 7416
 7417static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
 7418						struct extern_desc *ext)
 7419{
 7420	int local_func_proto_id, kfunc_proto_id, kfunc_id;
 7421	struct module_btf *mod_btf = NULL;
 7422	const struct btf_type *kern_func;
 7423	struct btf *kern_btf = NULL;
 7424	int ret;
 7425
 7426	local_func_proto_id = ext->ksym.type_id;
 7427
 7428	kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf);
 7429	if (kfunc_id < 0) {
 7430		if (kfunc_id == -ESRCH && ext->is_weak)
 7431			return 0;
 7432		pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
 7433			ext->name);
 7434		return kfunc_id;
 7435	}
 7436
 7437	kern_func = btf__type_by_id(kern_btf, kfunc_id);
 7438	kfunc_proto_id = kern_func->type;
 7439
 7440	ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
 7441					kern_btf, kfunc_proto_id);
 7442	if (ret <= 0) {
 7443		pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
 7444			ext->name, local_func_proto_id, kfunc_proto_id);
 7445		return -EINVAL;
 7446	}
 7447
 7448	/* set index for module BTF fd in fd_array, if unset */
 7449	if (mod_btf && !mod_btf->fd_array_idx) {
 7450		/* insn->off is s16 */
 7451		if (obj->fd_array_cnt == INT16_MAX) {
 7452			pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
 7453				ext->name, mod_btf->fd_array_idx);
 7454			return -E2BIG;
 7455		}
 7456		/* Cannot use index 0 for module BTF fd */
 7457		if (!obj->fd_array_cnt)
 7458			obj->fd_array_cnt = 1;
 7459
 7460		ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
 7461					obj->fd_array_cnt + 1);
 7462		if (ret)
 7463			return ret;
 7464		mod_btf->fd_array_idx = obj->fd_array_cnt;
 7465		/* we assume module BTF FD is always >0 */
 7466		obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
 7467	}
 7468
 7469	ext->is_set = true;
 7470	ext->ksym.kernel_btf_id = kfunc_id;
 7471	ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
 7472	pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
 7473		 ext->name, kfunc_id);
 7474
 7475	return 0;
 7476}
 7477
 7478static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
 7479{
 7480	const struct btf_type *t;
 7481	struct extern_desc *ext;
 7482	int i, err;
 7483
 7484	for (i = 0; i < obj->nr_extern; i++) {
 7485		ext = &obj->externs[i];
 7486		if (ext->type != EXT_KSYM || !ext->ksym.type_id)
 7487			continue;
 7488
 7489		if (obj->gen_loader) {
 7490			ext->is_set = true;
 7491			ext->ksym.kernel_btf_obj_fd = 0;
 7492			ext->ksym.kernel_btf_id = 0;
 7493			continue;
 7494		}
 7495		t = btf__type_by_id(obj->btf, ext->btf_id);
 7496		if (btf_is_var(t))
 7497			err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
 7498		else
 7499			err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
 7500		if (err)
 7501			return err;
 7502	}
 7503	return 0;
 7504}
 7505
 7506static int bpf_object__resolve_externs(struct bpf_object *obj,
 7507				       const char *extra_kconfig)
 7508{
 7509	bool need_config = false, need_kallsyms = false;
 7510	bool need_vmlinux_btf = false;
 7511	struct extern_desc *ext;
 7512	void *kcfg_data = NULL;
 7513	int err, i;
 7514
 7515	if (obj->nr_extern == 0)
 7516		return 0;
 7517
 7518	if (obj->kconfig_map_idx >= 0)
 7519		kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
 7520
 7521	for (i = 0; i < obj->nr_extern; i++) {
 7522		ext = &obj->externs[i];
 7523
 7524		if (ext->type == EXT_KSYM) {
 7525			if (ext->ksym.type_id)
 7526				need_vmlinux_btf = true;
 7527			else
 7528				need_kallsyms = true;
 7529			continue;
 7530		} else if (ext->type == EXT_KCFG) {
 7531			void *ext_ptr = kcfg_data + ext->kcfg.data_off;
 7532			__u64 value = 0;
 7533
 7534			/* Kconfig externs need actual /proc/config.gz */
 7535			if (str_has_pfx(ext->name, "CONFIG_")) {
 7536				need_config = true;
 7537				continue;
 7538			}
 7539
 7540			/* Virtual kcfg externs are customly handled by libbpf */
 7541			if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
 7542				value = get_kernel_version();
 7543				if (!value) {
 7544					pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
 7545					return -EINVAL;
 7546				}
 7547			} else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
 7548				value = kernel_supports(obj, FEAT_BPF_COOKIE);
 7549			} else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
 7550				value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
 7551			} else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
 7552				/* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
 7553				 * __kconfig externs, where LINUX_ ones are virtual and filled out
 7554				 * customly by libbpf (their values don't come from Kconfig).
 7555				 * If LINUX_xxx variable is not recognized by libbpf, but is marked
 7556				 * __weak, it defaults to zero value, just like for CONFIG_xxx
 7557				 * externs.
 7558				 */
 7559				pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
 7560				return -EINVAL;
 7561			}
 7562
 7563			err = set_kcfg_value_num(ext, ext_ptr, value);
 7564			if (err)
 7565				return err;
 7566			pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
 7567				 ext->name, (long long)value);
 7568		} else {
 7569			pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
 7570			return -EINVAL;
 7571		}
 7572	}
 7573	if (need_config && extra_kconfig) {
 7574		err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
 7575		if (err)
 7576			return -EINVAL;
 7577		need_config = false;
 7578		for (i = 0; i < obj->nr_extern; i++) {
 7579			ext = &obj->externs[i];
 7580			if (ext->type == EXT_KCFG && !ext->is_set) {
 7581				need_config = true;
 7582				break;
 7583			}
 7584		}
 7585	}
 7586	if (need_config) {
 7587		err = bpf_object__read_kconfig_file(obj, kcfg_data);
 7588		if (err)
 7589			return -EINVAL;
 7590	}
 7591	if (need_kallsyms) {
 7592		err = bpf_object__read_kallsyms_file(obj);
 7593		if (err)
 7594			return -EINVAL;
 7595	}
 7596	if (need_vmlinux_btf) {
 7597		err = bpf_object__resolve_ksyms_btf_id(obj);
 7598		if (err)
 7599			return -EINVAL;
 7600	}
 7601	for (i = 0; i < obj->nr_extern; i++) {
 7602		ext = &obj->externs[i];
 7603
 7604		if (!ext->is_set && !ext->is_weak) {
 7605			pr_warn("extern '%s' (strong): not resolved\n", ext->name);
 7606			return -ESRCH;
 7607		} else if (!ext->is_set) {
 7608			pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
 7609				 ext->name);
 7610		}
 7611	}
 7612
 7613	return 0;
 7614}
 7615
 7616static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
 7617{
 7618	int err, i;
 7619
 7620	if (!obj)
 7621		return libbpf_err(-EINVAL);
 7622
 7623	if (obj->loaded) {
 7624		pr_warn("object '%s': load can't be attempted twice\n", obj->name);
 7625		return libbpf_err(-EINVAL);
 7626	}
 7627
 7628	if (obj->gen_loader)
 7629		bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
 7630
 7631	err = bpf_object__probe_loading(obj);
 7632	err = err ? : bpf_object__load_vmlinux_btf(obj, false);
 7633	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 7634	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 7635	err = err ? : bpf_object__sanitize_maps(obj);
 7636	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
 7637	err = err ? : bpf_object__create_maps(obj);
 7638	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
 7639	err = err ? : bpf_object__load_progs(obj, extra_log_level);
 7640	err = err ? : bpf_object_init_prog_arrays(obj);
 7641
 7642	if (obj->gen_loader) {
 7643		/* reset FDs */
 7644		if (obj->btf)
 7645			btf__set_fd(obj->btf, -1);
 7646		for (i = 0; i < obj->nr_maps; i++)
 7647			obj->maps[i].fd = -1;
 7648		if (!err)
 7649			err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
 7650	}
 7651
 7652	/* clean up fd_array */
 7653	zfree(&obj->fd_array);
 7654
 7655	/* clean up module BTFs */
 7656	for (i = 0; i < obj->btf_module_cnt; i++) {
 7657		close(obj->btf_modules[i].fd);
 7658		btf__free(obj->btf_modules[i].btf);
 7659		free(obj->btf_modules[i].name);
 7660	}
 7661	free(obj->btf_modules);
 7662
 7663	/* clean up vmlinux BTF */
 7664	btf__free(obj->btf_vmlinux);
 7665	obj->btf_vmlinux = NULL;
 7666
 7667	obj->loaded = true; /* doesn't matter if successfully or not */
 7668
 7669	if (err)
 7670		goto out;
 7671
 7672	return 0;
 7673out:
 7674	/* unpin any maps that were auto-pinned during load */
 7675	for (i = 0; i < obj->nr_maps; i++)
 7676		if (obj->maps[i].pinned && !obj->maps[i].reused)
 7677			bpf_map__unpin(&obj->maps[i], NULL);
 7678
 7679	bpf_object_unload(obj);
 7680	pr_warn("failed to load object '%s'\n", obj->path);
 7681	return libbpf_err(err);
 7682}
 7683
 7684int bpf_object__load(struct bpf_object *obj)
 7685{
 7686	return bpf_object_load(obj, 0, NULL);
 7687}
 7688
 7689static int make_parent_dir(const char *path)
 7690{
 7691	char *cp, errmsg[STRERR_BUFSIZE];
 7692	char *dname, *dir;
 7693	int err = 0;
 7694
 7695	dname = strdup(path);
 7696	if (dname == NULL)
 7697		return -ENOMEM;
 7698
 7699	dir = dirname(dname);
 7700	if (mkdir(dir, 0700) && errno != EEXIST)
 7701		err = -errno;
 7702
 7703	free(dname);
 7704	if (err) {
 7705		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 7706		pr_warn("failed to mkdir %s: %s\n", path, cp);
 7707	}
 7708	return err;
 7709}
 7710
 7711static int check_path(const char *path)
 7712{
 7713	char *cp, errmsg[STRERR_BUFSIZE];
 7714	struct statfs st_fs;
 7715	char *dname, *dir;
 7716	int err = 0;
 7717
 7718	if (path == NULL)
 7719		return -EINVAL;
 7720
 7721	dname = strdup(path);
 7722	if (dname == NULL)
 7723		return -ENOMEM;
 7724
 7725	dir = dirname(dname);
 7726	if (statfs(dir, &st_fs)) {
 7727		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
 7728		pr_warn("failed to statfs %s: %s\n", dir, cp);
 7729		err = -errno;
 7730	}
 7731	free(dname);
 7732
 7733	if (!err && st_fs.f_type != BPF_FS_MAGIC) {
 7734		pr_warn("specified path %s is not on BPF FS\n", path);
 7735		err = -EINVAL;
 7736	}
 7737
 7738	return err;
 7739}
 7740
 7741int bpf_program__pin(struct bpf_program *prog, const char *path)
 7742{
 7743	char *cp, errmsg[STRERR_BUFSIZE];
 7744	int err;
 7745
 7746	if (prog->fd < 0) {
 7747		pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
 7748		return libbpf_err(-EINVAL);
 7749	}
 7750
 7751	err = make_parent_dir(path);
 7752	if (err)
 7753		return libbpf_err(err);
 7754
 7755	err = check_path(path);
 7756	if (err)
 7757		return libbpf_err(err);
 7758
 7759	if (bpf_obj_pin(prog->fd, path)) {
 7760		err = -errno;
 7761		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
 7762		pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
 7763		return libbpf_err(err);
 7764	}
 7765
 7766	pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
 7767	return 0;
 7768}
 7769
 7770int bpf_program__unpin(struct bpf_program *prog, const char *path)
 7771{
 7772	int err;
 7773
 7774	if (prog->fd < 0) {
 7775		pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
 7776		return libbpf_err(-EINVAL);
 7777	}
 7778
 7779	err = check_path(path);
 7780	if (err)
 7781		return libbpf_err(err);
 7782
 7783	err = unlink(path);
 7784	if (err)
 7785		return libbpf_err(-errno);
 7786
 7787	pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
 7788	return 0;
 7789}
 7790
 7791int bpf_map__pin(struct bpf_map *map, const char *path)
 7792{
 7793	char *cp, errmsg[STRERR_BUFSIZE];
 7794	int err;
 7795
 7796	if (map == NULL) {
 7797		pr_warn("invalid map pointer\n");
 7798		return libbpf_err(-EINVAL);
 7799	}
 7800
 7801	if (map->pin_path) {
 7802		if (path && strcmp(path, map->pin_path)) {
 7803			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 7804				bpf_map__name(map), map->pin_path, path);
 7805			return libbpf_err(-EINVAL);
 7806		} else if (map->pinned) {
 7807			pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
 7808				 bpf_map__name(map), map->pin_path);
 7809			return 0;
 7810		}
 7811	} else {
 7812		if (!path) {
 7813			pr_warn("missing a path to pin map '%s' at\n",
 7814				bpf_map__name(map));
 7815			return libbpf_err(-EINVAL);
 7816		} else if (map->pinned) {
 7817			pr_warn("map '%s' already pinned\n", bpf_map__name(map));
 7818			return libbpf_err(-EEXIST);
 7819		}
 7820
 7821		map->pin_path = strdup(path);
 7822		if (!map->pin_path) {
 7823			err = -errno;
 7824			goto out_err;
 7825		}
 7826	}
 7827
 7828	err = make_parent_dir(map->pin_path);
 7829	if (err)
 7830		return libbpf_err(err);
 7831
 7832	err = check_path(map->pin_path);
 7833	if (err)
 7834		return libbpf_err(err);
 7835
 7836	if (bpf_obj_pin(map->fd, map->pin_path)) {
 7837		err = -errno;
 7838		goto out_err;
 7839	}
 7840
 7841	map->pinned = true;
 7842	pr_debug("pinned map '%s'\n", map->pin_path);
 7843
 7844	return 0;
 7845
 7846out_err:
 7847	cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
 7848	pr_warn("failed to pin map: %s\n", cp);
 7849	return libbpf_err(err);
 7850}
 7851
 7852int bpf_map__unpin(struct bpf_map *map, const char *path)
 7853{
 7854	int err;
 7855
 7856	if (map == NULL) {
 7857		pr_warn("invalid map pointer\n");
 7858		return libbpf_err(-EINVAL);
 7859	}
 7860
 7861	if (map->pin_path) {
 7862		if (path && strcmp(path, map->pin_path)) {
 7863			pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
 7864				bpf_map__name(map), map->pin_path, path);
 7865			return libbpf_err(-EINVAL);
 7866		}
 7867		path = map->pin_path;
 7868	} else if (!path) {
 7869		pr_warn("no path to unpin map '%s' from\n",
 7870			bpf_map__name(map));
 7871		return libbpf_err(-EINVAL);
 7872	}
 7873
 7874	err = check_path(path);
 7875	if (err)
 7876		return libbpf_err(err);
 7877
 7878	err = unlink(path);
 7879	if (err != 0)
 7880		return libbpf_err(-errno);
 7881
 7882	map->pinned = false;
 7883	pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
 7884
 7885	return 0;
 7886}
 7887
 7888int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
 7889{
 7890	char *new = NULL;
 7891
 7892	if (path) {
 7893		new = strdup(path);
 7894		if (!new)
 7895			return libbpf_err(-errno);
 7896	}
 7897
 7898	free(map->pin_path);
 7899	map->pin_path = new;
 7900	return 0;
 7901}
 7902
 7903__alias(bpf_map__pin_path)
 7904const char *bpf_map__get_pin_path(const struct bpf_map *map);
 7905
 7906const char *bpf_map__pin_path(const struct bpf_map *map)
 7907{
 7908	return map->pin_path;
 7909}
 7910
 7911bool bpf_map__is_pinned(const struct bpf_map *map)
 7912{
 7913	return map->pinned;
 7914}
 7915
 7916static void sanitize_pin_path(char *s)
 7917{
 7918	/* bpffs disallows periods in path names */
 7919	while (*s) {
 7920		if (*s == '.')
 7921			*s = '_';
 7922		s++;
 7923	}
 7924}
 7925
 7926int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 7927{
 7928	struct bpf_map *map;
 7929	int err;
 7930
 7931	if (!obj)
 7932		return libbpf_err(-ENOENT);
 7933
 7934	if (!obj->loaded) {
 7935		pr_warn("object not yet loaded; load it first\n");
 7936		return libbpf_err(-ENOENT);
 7937	}
 7938
 7939	bpf_object__for_each_map(map, obj) {
 7940		char *pin_path = NULL;
 7941		char buf[PATH_MAX];
 7942
 7943		if (!map->autocreate)
 7944			continue;
 7945
 7946		if (path) {
 7947			int len;
 7948
 7949			len = snprintf(buf, PATH_MAX, "%s/%s", path,
 7950				       bpf_map__name(map));
 7951			if (len < 0) {
 7952				err = -EINVAL;
 7953				goto err_unpin_maps;
 7954			} else if (len >= PATH_MAX) {
 7955				err = -ENAMETOOLONG;
 7956				goto err_unpin_maps;
 7957			}
 7958			sanitize_pin_path(buf);
 7959			pin_path = buf;
 7960		} else if (!map->pin_path) {
 7961			continue;
 7962		}
 7963
 7964		err = bpf_map__pin(map, pin_path);
 7965		if (err)
 7966			goto err_unpin_maps;
 7967	}
 7968
 7969	return 0;
 7970
 7971err_unpin_maps:
 7972	while ((map = bpf_object__prev_map(obj, map))) {
 7973		if (!map->pin_path)
 7974			continue;
 7975
 7976		bpf_map__unpin(map, NULL);
 7977	}
 7978
 7979	return libbpf_err(err);
 7980}
 7981
 7982int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 7983{
 7984	struct bpf_map *map;
 7985	int err;
 7986
 7987	if (!obj)
 7988		return libbpf_err(-ENOENT);
 7989
 7990	bpf_object__for_each_map(map, obj) {
 7991		char *pin_path = NULL;
 7992		char buf[PATH_MAX];
 7993
 7994		if (path) {
 7995			int len;
 7996
 7997			len = snprintf(buf, PATH_MAX, "%s/%s", path,
 7998				       bpf_map__name(map));
 7999			if (len < 0)
 8000				return libbpf_err(-EINVAL);
 8001			else if (len >= PATH_MAX)
 8002				return libbpf_err(-ENAMETOOLONG);
 8003			sanitize_pin_path(buf);
 8004			pin_path = buf;
 8005		} else if (!map->pin_path) {
 8006			continue;
 8007		}
 8008
 8009		err = bpf_map__unpin(map, pin_path);
 8010		if (err)
 8011			return libbpf_err(err);
 8012	}
 8013
 8014	return 0;
 8015}
 8016
 8017int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 8018{
 8019	struct bpf_program *prog;
 8020	int err;
 8021
 8022	if (!obj)
 8023		return libbpf_err(-ENOENT);
 8024
 8025	if (!obj->loaded) {
 8026		pr_warn("object not yet loaded; load it first\n");
 8027		return libbpf_err(-ENOENT);
 8028	}
 8029
 8030	bpf_object__for_each_program(prog, obj) {
 8031		char buf[PATH_MAX];
 8032		int len;
 8033
 8034		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
 8035		if (len < 0) {
 8036			err = -EINVAL;
 8037			goto err_unpin_programs;
 8038		} else if (len >= PATH_MAX) {
 8039			err = -ENAMETOOLONG;
 8040			goto err_unpin_programs;
 8041		}
 8042
 8043		err = bpf_program__pin(prog, buf);
 8044		if (err)
 8045			goto err_unpin_programs;
 8046	}
 8047
 8048	return 0;
 8049
 8050err_unpin_programs:
 8051	while ((prog = bpf_object__prev_program(obj, prog))) {
 8052		char buf[PATH_MAX];
 8053		int len;
 8054
 8055		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
 8056		if (len < 0)
 8057			continue;
 8058		else if (len >= PATH_MAX)
 8059			continue;
 8060
 8061		bpf_program__unpin(prog, buf);
 8062	}
 8063
 8064	return libbpf_err(err);
 8065}
 8066
 8067int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 8068{
 8069	struct bpf_program *prog;
 8070	int err;
 8071
 8072	if (!obj)
 8073		return libbpf_err(-ENOENT);
 8074
 8075	bpf_object__for_each_program(prog, obj) {
 8076		char buf[PATH_MAX];
 8077		int len;
 8078
 8079		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
 8080		if (len < 0)
 8081			return libbpf_err(-EINVAL);
 8082		else if (len >= PATH_MAX)
 8083			return libbpf_err(-ENAMETOOLONG);
 8084
 8085		err = bpf_program__unpin(prog, buf);
 8086		if (err)
 8087			return libbpf_err(err);
 8088	}
 8089
 8090	return 0;
 8091}
 8092
 8093int bpf_object__pin(struct bpf_object *obj, const char *path)
 8094{
 8095	int err;
 8096
 8097	err = bpf_object__pin_maps(obj, path);
 8098	if (err)
 8099		return libbpf_err(err);
 8100
 8101	err = bpf_object__pin_programs(obj, path);
 8102	if (err) {
 8103		bpf_object__unpin_maps(obj, path);
 8104		return libbpf_err(err);
 8105	}
 8106
 8107	return 0;
 8108}
 8109
 8110static void bpf_map__destroy(struct bpf_map *map)
 8111{
 8112	if (map->inner_map) {
 8113		bpf_map__destroy(map->inner_map);
 8114		zfree(&map->inner_map);
 8115	}
 8116
 8117	zfree(&map->init_slots);
 8118	map->init_slots_sz = 0;
 8119
 8120	if (map->mmaped) {
 8121		munmap(map->mmaped, bpf_map_mmap_sz(map));
 8122		map->mmaped = NULL;
 8123	}
 8124
 8125	if (map->st_ops) {
 8126		zfree(&map->st_ops->data);
 8127		zfree(&map->st_ops->progs);
 8128		zfree(&map->st_ops->kern_func_off);
 8129		zfree(&map->st_ops);
 8130	}
 8131
 8132	zfree(&map->name);
 8133	zfree(&map->real_name);
 8134	zfree(&map->pin_path);
 8135
 8136	if (map->fd >= 0)
 8137		zclose(map->fd);
 8138}
 8139
 8140void bpf_object__close(struct bpf_object *obj)
 8141{
 8142	size_t i;
 8143
 8144	if (IS_ERR_OR_NULL(obj))
 8145		return;
 8146
 8147	usdt_manager_free(obj->usdt_man);
 8148	obj->usdt_man = NULL;
 8149
 8150	bpf_gen__free(obj->gen_loader);
 8151	bpf_object__elf_finish(obj);
 8152	bpf_object_unload(obj);
 8153	btf__free(obj->btf);
 8154	btf_ext__free(obj->btf_ext);
 8155
 8156	for (i = 0; i < obj->nr_maps; i++)
 8157		bpf_map__destroy(&obj->maps[i]);
 8158
 8159	zfree(&obj->btf_custom_path);
 8160	zfree(&obj->kconfig);
 8161	zfree(&obj->externs);
 8162	obj->nr_extern = 0;
 8163
 8164	zfree(&obj->maps);
 8165	obj->nr_maps = 0;
 8166
 8167	if (obj->programs && obj->nr_programs) {
 8168		for (i = 0; i < obj->nr_programs; i++)
 8169			bpf_program__exit(&obj->programs[i]);
 8170	}
 8171	zfree(&obj->programs);
 8172
 8173	free(obj);
 8174}
 8175
 8176const char *bpf_object__name(const struct bpf_object *obj)
 8177{
 8178	return obj ? obj->name : libbpf_err_ptr(-EINVAL);
 8179}
 8180
 8181unsigned int bpf_object__kversion(const struct bpf_object *obj)
 8182{
 8183	return obj ? obj->kern_version : 0;
 8184}
 8185
 8186struct btf *bpf_object__btf(const struct bpf_object *obj)
 8187{
 8188	return obj ? obj->btf : NULL;
 8189}
 8190
 8191int bpf_object__btf_fd(const struct bpf_object *obj)
 8192{
 8193	return obj->btf ? btf__fd(obj->btf) : -1;
 8194}
 8195
 8196int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
 8197{
 8198	if (obj->loaded)
 8199		return libbpf_err(-EINVAL);
 8200
 8201	obj->kern_version = kern_version;
 8202
 8203	return 0;
 8204}
 8205
 8206int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
 8207{
 8208	struct bpf_gen *gen;
 8209
 8210	if (!opts)
 8211		return -EFAULT;
 8212	if (!OPTS_VALID(opts, gen_loader_opts))
 8213		return -EINVAL;
 8214	gen = calloc(sizeof(*gen), 1);
 8215	if (!gen)
 8216		return -ENOMEM;
 8217	gen->opts = opts;
 8218	obj->gen_loader = gen;
 8219	return 0;
 8220}
 8221
 8222static struct bpf_program *
 8223__bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
 8224		    bool forward)
 8225{
 8226	size_t nr_programs = obj->nr_programs;
 8227	ssize_t idx;
 8228
 8229	if (!nr_programs)
 8230		return NULL;
 8231
 8232	if (!p)
 8233		/* Iter from the beginning */
 8234		return forward ? &obj->programs[0] :
 8235			&obj->programs[nr_programs - 1];
 8236
 8237	if (p->obj != obj) {
 8238		pr_warn("error: program handler doesn't match object\n");
 8239		return errno = EINVAL, NULL;
 8240	}
 8241
 8242	idx = (p - obj->programs) + (forward ? 1 : -1);
 8243	if (idx >= obj->nr_programs || idx < 0)
 8244		return NULL;
 8245	return &obj->programs[idx];
 8246}
 8247
 8248struct bpf_program *
 8249bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
 8250{
 8251	struct bpf_program *prog = prev;
 8252
 8253	do {
 8254		prog = __bpf_program__iter(prog, obj, true);
 8255	} while (prog && prog_is_subprog(obj, prog));
 8256
 8257	return prog;
 8258}
 8259
 8260struct bpf_program *
 8261bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
 8262{
 8263	struct bpf_program *prog = next;
 8264
 8265	do {
 8266		prog = __bpf_program__iter(prog, obj, false);
 8267	} while (prog && prog_is_subprog(obj, prog));
 8268
 8269	return prog;
 8270}
 8271
 8272void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
 8273{
 8274	prog->prog_ifindex = ifindex;
 8275}
 8276
 8277const char *bpf_program__name(const struct bpf_program *prog)
 8278{
 8279	return prog->name;
 8280}
 8281
 8282const char *bpf_program__section_name(const struct bpf_program *prog)
 8283{
 8284	return prog->sec_name;
 8285}
 8286
 8287bool bpf_program__autoload(const struct bpf_program *prog)
 8288{
 8289	return prog->autoload;
 8290}
 8291
 8292int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 8293{
 8294	if (prog->obj->loaded)
 8295		return libbpf_err(-EINVAL);
 8296
 8297	prog->autoload = autoload;
 8298	return 0;
 8299}
 8300
 8301const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
 8302{
 8303	return prog->insns;
 8304}
 8305
 8306size_t bpf_program__insn_cnt(const struct bpf_program *prog)
 8307{
 8308	return prog->insns_cnt;
 8309}
 8310
 8311int bpf_program__set_insns(struct bpf_program *prog,
 8312			   struct bpf_insn *new_insns, size_t new_insn_cnt)
 8313{
 8314	struct bpf_insn *insns;
 8315
 8316	if (prog->obj->loaded)
 8317		return -EBUSY;
 8318
 8319	insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
 8320	if (!insns) {
 8321		pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
 8322		return -ENOMEM;
 8323	}
 8324	memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
 8325
 8326	prog->insns = insns;
 8327	prog->insns_cnt = new_insn_cnt;
 8328	return 0;
 8329}
 8330
 8331int bpf_program__fd(const struct bpf_program *prog)
 8332{
 8333	if (!prog)
 8334		return libbpf_err(-EINVAL);
 8335
 8336	if (prog->fd < 0)
 8337		return libbpf_err(-ENOENT);
 8338
 8339	return prog->fd;
 8340}
 8341
 8342__alias(bpf_program__type)
 8343enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
 8344
 8345enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
 8346{
 8347	return prog->type;
 8348}
 8349
 8350int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
 8351{
 8352	if (prog->obj->loaded)
 8353		return libbpf_err(-EBUSY);
 8354
 8355	prog->type = type;
 8356	return 0;
 8357}
 8358
 8359__alias(bpf_program__expected_attach_type)
 8360enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
 8361
 8362enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
 8363{
 8364	return prog->expected_attach_type;
 8365}
 8366
 8367int bpf_program__set_expected_attach_type(struct bpf_program *prog,
 8368					   enum bpf_attach_type type)
 8369{
 8370	if (prog->obj->loaded)
 8371		return libbpf_err(-EBUSY);
 8372
 8373	prog->expected_attach_type = type;
 8374	return 0;
 8375}
 8376
 8377__u32 bpf_program__flags(const struct bpf_program *prog)
 8378{
 8379	return prog->prog_flags;
 8380}
 8381
 8382int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
 8383{
 8384	if (prog->obj->loaded)
 8385		return libbpf_err(-EBUSY);
 8386
 8387	prog->prog_flags = flags;
 8388	return 0;
 8389}
 8390
 8391__u32 bpf_program__log_level(const struct bpf_program *prog)
 8392{
 8393	return prog->log_level;
 8394}
 8395
 8396int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
 8397{
 8398	if (prog->obj->loaded)
 8399		return libbpf_err(-EBUSY);
 8400
 8401	prog->log_level = log_level;
 8402	return 0;
 8403}
 8404
 8405const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
 8406{
 8407	*log_size = prog->log_size;
 8408	return prog->log_buf;
 8409}
 8410
 8411int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
 8412{
 8413	if (log_size && !log_buf)
 8414		return -EINVAL;
 8415	if (prog->log_size > UINT_MAX)
 8416		return -EINVAL;
 8417	if (prog->obj->loaded)
 8418		return -EBUSY;
 8419
 8420	prog->log_buf = log_buf;
 8421	prog->log_size = log_size;
 8422	return 0;
 8423}
 8424
 8425#define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {			    \
 8426	.sec = (char *)sec_pfx,						    \
 8427	.prog_type = BPF_PROG_TYPE_##ptype,				    \
 8428	.expected_attach_type = atype,					    \
 8429	.cookie = (long)(flags),					    \
 8430	.prog_prepare_load_fn = libbpf_prepare_prog_load,		    \
 8431	__VA_ARGS__							    \
 8432}
 8433
 8434static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8435static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8436static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8437static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8438static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8439static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8440static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8441static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8442static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8443static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
 8444
 8445static const struct bpf_sec_def section_defs[] = {
 8446	SEC_DEF("socket",		SOCKET_FILTER, 0, SEC_NONE),
 8447	SEC_DEF("sk_reuseport/migrate",	SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
 8448	SEC_DEF("sk_reuseport",		SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
 8449	SEC_DEF("kprobe+",		KPROBE,	0, SEC_NONE, attach_kprobe),
 8450	SEC_DEF("uprobe+",		KPROBE,	0, SEC_NONE, attach_uprobe),
 8451	SEC_DEF("uprobe.s+",		KPROBE,	0, SEC_SLEEPABLE, attach_uprobe),
 8452	SEC_DEF("kretprobe+",		KPROBE, 0, SEC_NONE, attach_kprobe),
 8453	SEC_DEF("uretprobe+",		KPROBE, 0, SEC_NONE, attach_uprobe),
 8454	SEC_DEF("uretprobe.s+",		KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
 8455	SEC_DEF("kprobe.multi+",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
 8456	SEC_DEF("kretprobe.multi+",	KPROBE,	BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
 8457	SEC_DEF("ksyscall+",		KPROBE,	0, SEC_NONE, attach_ksyscall),
 8458	SEC_DEF("kretsyscall+",		KPROBE, 0, SEC_NONE, attach_ksyscall),
 8459	SEC_DEF("usdt+",		KPROBE,	0, SEC_NONE, attach_usdt),
 8460	SEC_DEF("tc",			SCHED_CLS, 0, SEC_NONE),
 8461	SEC_DEF("classifier",		SCHED_CLS, 0, SEC_NONE),
 8462	SEC_DEF("action",		SCHED_ACT, 0, SEC_NONE),
 8463	SEC_DEF("tracepoint+",		TRACEPOINT, 0, SEC_NONE, attach_tp),
 8464	SEC_DEF("tp+",			TRACEPOINT, 0, SEC_NONE, attach_tp),
 8465	SEC_DEF("raw_tracepoint+",	RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
 8466	SEC_DEF("raw_tp+",		RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
 8467	SEC_DEF("raw_tracepoint.w+",	RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
 8468	SEC_DEF("raw_tp.w+",		RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
 8469	SEC_DEF("tp_btf+",		TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
 8470	SEC_DEF("fentry+",		TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
 8471	SEC_DEF("fmod_ret+",		TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
 8472	SEC_DEF("fexit+",		TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
 8473	SEC_DEF("fentry.s+",		TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 8474	SEC_DEF("fmod_ret.s+",		TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 8475	SEC_DEF("fexit.s+",		TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
 8476	SEC_DEF("freplace+",		EXT, 0, SEC_ATTACH_BTF, attach_trace),
 8477	SEC_DEF("lsm+",			LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
 8478	SEC_DEF("lsm.s+",		LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
 8479	SEC_DEF("lsm_cgroup+",		LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
 8480	SEC_DEF("iter+",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
 8481	SEC_DEF("iter.s+",		TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
 8482	SEC_DEF("syscall",		SYSCALL, 0, SEC_SLEEPABLE),
 8483	SEC_DEF("xdp.frags/devmap",	XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
 8484	SEC_DEF("xdp/devmap",		XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
 8485	SEC_DEF("xdp.frags/cpumap",	XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
 8486	SEC_DEF("xdp/cpumap",		XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
 8487	SEC_DEF("xdp.frags",		XDP, BPF_XDP, SEC_XDP_FRAGS),
 8488	SEC_DEF("xdp",			XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
 8489	SEC_DEF("perf_event",		PERF_EVENT, 0, SEC_NONE),
 8490	SEC_DEF("lwt_in",		LWT_IN, 0, SEC_NONE),
 8491	SEC_DEF("lwt_out",		LWT_OUT, 0, SEC_NONE),
 8492	SEC_DEF("lwt_xmit",		LWT_XMIT, 0, SEC_NONE),
 8493	SEC_DEF("lwt_seg6local",	LWT_SEG6LOCAL, 0, SEC_NONE),
 8494	SEC_DEF("sockops",		SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
 8495	SEC_DEF("sk_skb/stream_parser",	SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
 8496	SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
 8497	SEC_DEF("sk_skb",		SK_SKB, 0, SEC_NONE),
 8498	SEC_DEF("sk_msg",		SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
 8499	SEC_DEF("lirc_mode2",		LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
 8500	SEC_DEF("flow_dissector",	FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
 8501	SEC_DEF("cgroup_skb/ingress",	CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
 8502	SEC_DEF("cgroup_skb/egress",	CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
 8503	SEC_DEF("cgroup/skb",		CGROUP_SKB, 0, SEC_NONE),
 8504	SEC_DEF("cgroup/sock_create",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
 8505	SEC_DEF("cgroup/sock_release",	CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
 8506	SEC_DEF("cgroup/sock",		CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
 8507	SEC_DEF("cgroup/post_bind4",	CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
 8508	SEC_DEF("cgroup/post_bind6",	CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
 8509	SEC_DEF("cgroup/bind4",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
 8510	SEC_DEF("cgroup/bind6",		CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
 8511	SEC_DEF("cgroup/connect4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
 8512	SEC_DEF("cgroup/connect6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
 8513	SEC_DEF("cgroup/sendmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
 8514	SEC_DEF("cgroup/sendmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
 8515	SEC_DEF("cgroup/recvmsg4",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
 8516	SEC_DEF("cgroup/recvmsg6",	CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
 8517	SEC_DEF("cgroup/getpeername4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
 8518	SEC_DEF("cgroup/getpeername6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
 8519	SEC_DEF("cgroup/getsockname4",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
 8520	SEC_DEF("cgroup/getsockname6",	CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
 8521	SEC_DEF("cgroup/sysctl",	CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
 8522	SEC_DEF("cgroup/getsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
 8523	SEC_DEF("cgroup/setsockopt",	CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
 8524	SEC_DEF("cgroup/dev",		CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
 8525	SEC_DEF("struct_ops+",		STRUCT_OPS, 0, SEC_NONE),
 8526	SEC_DEF("sk_lookup",		SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
 8527};
 8528
 8529static size_t custom_sec_def_cnt;
 8530static struct bpf_sec_def *custom_sec_defs;
 8531static struct bpf_sec_def custom_fallback_def;
 8532static bool has_custom_fallback_def;
 8533
 8534static int last_custom_sec_def_handler_id;
 8535
 8536int libbpf_register_prog_handler(const char *sec,
 8537				 enum bpf_prog_type prog_type,
 8538				 enum bpf_attach_type exp_attach_type,
 8539				 const struct libbpf_prog_handler_opts *opts)
 8540{
 8541	struct bpf_sec_def *sec_def;
 8542
 8543	if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
 8544		return libbpf_err(-EINVAL);
 8545
 8546	if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
 8547		return libbpf_err(-E2BIG);
 8548
 8549	if (sec) {
 8550		sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
 8551					      sizeof(*sec_def));
 8552		if (!sec_def)
 8553			return libbpf_err(-ENOMEM);
 8554
 8555		custom_sec_defs = sec_def;
 8556		sec_def = &custom_sec_defs[custom_sec_def_cnt];
 8557	} else {
 8558		if (has_custom_fallback_def)
 8559			return libbpf_err(-EBUSY);
 8560
 8561		sec_def = &custom_fallback_def;
 8562	}
 8563
 8564	sec_def->sec = sec ? strdup(sec) : NULL;
 8565	if (sec && !sec_def->sec)
 8566		return libbpf_err(-ENOMEM);
 8567
 8568	sec_def->prog_type = prog_type;
 8569	sec_def->expected_attach_type = exp_attach_type;
 8570	sec_def->cookie = OPTS_GET(opts, cookie, 0);
 8571
 8572	sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
 8573	sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
 8574	sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
 8575
 8576	sec_def->handler_id = ++last_custom_sec_def_handler_id;
 8577
 8578	if (sec)
 8579		custom_sec_def_cnt++;
 8580	else
 8581		has_custom_fallback_def = true;
 8582
 8583	return sec_def->handler_id;
 8584}
 8585
 8586int libbpf_unregister_prog_handler(int handler_id)
 8587{
 8588	struct bpf_sec_def *sec_defs;
 8589	int i;
 8590
 8591	if (handler_id <= 0)
 8592		return libbpf_err(-EINVAL);
 8593
 8594	if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
 8595		memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
 8596		has_custom_fallback_def = false;
 8597		return 0;
 8598	}
 8599
 8600	for (i = 0; i < custom_sec_def_cnt; i++) {
 8601		if (custom_sec_defs[i].handler_id == handler_id)
 8602			break;
 8603	}
 8604
 8605	if (i == custom_sec_def_cnt)
 8606		return libbpf_err(-ENOENT);
 8607
 8608	free(custom_sec_defs[i].sec);
 8609	for (i = i + 1; i < custom_sec_def_cnt; i++)
 8610		custom_sec_defs[i - 1] = custom_sec_defs[i];
 8611	custom_sec_def_cnt--;
 8612
 8613	/* try to shrink the array, but it's ok if we couldn't */
 8614	sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
 8615	if (sec_defs)
 8616		custom_sec_defs = sec_defs;
 8617
 8618	return 0;
 8619}
 8620
 8621static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
 8622{
 8623	size_t len = strlen(sec_def->sec);
 8624
 8625	/* "type/" always has to have proper SEC("type/extras") form */
 8626	if (sec_def->sec[len - 1] == '/') {
 8627		if (str_has_pfx(sec_name, sec_def->sec))
 8628			return true;
 8629		return false;
 8630	}
 8631
 8632	/* "type+" means it can be either exact SEC("type") or
 8633	 * well-formed SEC("type/extras") with proper '/' separator
 8634	 */
 8635	if (sec_def->sec[len - 1] == '+') {
 8636		len--;
 8637		/* not even a prefix */
 8638		if (strncmp(sec_name, sec_def->sec, len) != 0)
 8639			return false;
 8640		/* exact match or has '/' separator */
 8641		if (sec_name[len] == '\0' || sec_name[len] == '/')
 8642			return true;
 8643		return false;
 8644	}
 8645
 8646	return strcmp(sec_name, sec_def->sec) == 0;
 8647}
 8648
 8649static const struct bpf_sec_def *find_sec_def(const char *sec_name)
 8650{
 8651	const struct bpf_sec_def *sec_def;
 8652	int i, n;
 8653
 8654	n = custom_sec_def_cnt;
 8655	for (i = 0; i < n; i++) {
 8656		sec_def = &custom_sec_defs[i];
 8657		if (sec_def_matches(sec_def, sec_name))
 8658			return sec_def;
 8659	}
 8660
 8661	n = ARRAY_SIZE(section_defs);
 8662	for (i = 0; i < n; i++) {
 8663		sec_def = &section_defs[i];
 8664		if (sec_def_matches(sec_def, sec_name))
 8665			return sec_def;
 8666	}
 8667
 8668	if (has_custom_fallback_def)
 8669		return &custom_fallback_def;
 8670
 8671	return NULL;
 8672}
 8673
 8674#define MAX_TYPE_NAME_SIZE 32
 8675
 8676static char *libbpf_get_type_names(bool attach_type)
 8677{
 8678	int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
 8679	char *buf;
 8680
 8681	buf = malloc(len);
 8682	if (!buf)
 8683		return NULL;
 8684
 8685	buf[0] = '\0';
 8686	/* Forge string buf with all available names */
 8687	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
 8688		const struct bpf_sec_def *sec_def = &section_defs[i];
 8689
 8690		if (attach_type) {
 8691			if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
 8692				continue;
 8693
 8694			if (!(sec_def->cookie & SEC_ATTACHABLE))
 8695				continue;
 8696		}
 8697
 8698		if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
 8699			free(buf);
 8700			return NULL;
 8701		}
 8702		strcat(buf, " ");
 8703		strcat(buf, section_defs[i].sec);
 8704	}
 8705
 8706	return buf;
 8707}
 8708
 8709int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
 8710			     enum bpf_attach_type *expected_attach_type)
 8711{
 8712	const struct bpf_sec_def *sec_def;
 8713	char *type_names;
 8714
 8715	if (!name)
 8716		return libbpf_err(-EINVAL);
 8717
 8718	sec_def = find_sec_def(name);
 8719	if (sec_def) {
 8720		*prog_type = sec_def->prog_type;
 8721		*expected_attach_type = sec_def->expected_attach_type;
 8722		return 0;
 8723	}
 8724
 8725	pr_debug("failed to guess program type from ELF section '%s'\n", name);
 8726	type_names = libbpf_get_type_names(false);
 8727	if (type_names != NULL) {
 8728		pr_debug("supported section(type) names are:%s\n", type_names);
 8729		free(type_names);
 8730	}
 8731
 8732	return libbpf_err(-ESRCH);
 8733}
 8734
 8735const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
 8736{
 8737	if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
 8738		return NULL;
 8739
 8740	return attach_type_name[t];
 8741}
 8742
 8743const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
 8744{
 8745	if (t < 0 || t >= ARRAY_SIZE(link_type_name))
 8746		return NULL;
 8747
 8748	return link_type_name[t];
 8749}
 8750
 8751const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
 8752{
 8753	if (t < 0 || t >= ARRAY_SIZE(map_type_name))
 8754		return NULL;
 8755
 8756	return map_type_name[t];
 8757}
 8758
 8759const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
 8760{
 8761	if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
 8762		return NULL;
 8763
 8764	return prog_type_name[t];
 8765}
 8766
 8767static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
 8768						     size_t offset)
 8769{
 8770	struct bpf_map *map;
 8771	size_t i;
 8772
 8773	for (i = 0; i < obj->nr_maps; i++) {
 8774		map = &obj->maps[i];
 8775		if (!bpf_map__is_struct_ops(map))
 8776			continue;
 8777		if (map->sec_offset <= offset &&
 8778		    offset - map->sec_offset < map->def.value_size)
 8779			return map;
 8780	}
 8781
 8782	return NULL;
 8783}
 8784
 8785/* Collect the reloc from ELF and populate the st_ops->progs[] */
 8786static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 8787					    Elf64_Shdr *shdr, Elf_Data *data)
 8788{
 8789	const struct btf_member *member;
 8790	struct bpf_struct_ops *st_ops;
 8791	struct bpf_program *prog;
 8792	unsigned int shdr_idx;
 8793	const struct btf *btf;
 8794	struct bpf_map *map;
 8795	unsigned int moff, insn_idx;
 8796	const char *name;
 8797	__u32 member_idx;
 8798	Elf64_Sym *sym;
 8799	Elf64_Rel *rel;
 8800	int i, nrels;
 8801
 8802	btf = obj->btf;
 8803	nrels = shdr->sh_size / shdr->sh_entsize;
 8804	for (i = 0; i < nrels; i++) {
 8805		rel = elf_rel_by_idx(data, i);
 8806		if (!rel) {
 8807			pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
 8808			return -LIBBPF_ERRNO__FORMAT;
 8809		}
 8810
 8811		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
 8812		if (!sym) {
 8813			pr_warn("struct_ops reloc: symbol %zx not found\n",
 8814				(size_t)ELF64_R_SYM(rel->r_info));
 8815			return -LIBBPF_ERRNO__FORMAT;
 8816		}
 8817
 8818		name = elf_sym_str(obj, sym->st_name) ?: "<?>";
 8819		map = find_struct_ops_map_by_offset(obj, rel->r_offset);
 8820		if (!map) {
 8821			pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
 8822				(size_t)rel->r_offset);
 8823			return -EINVAL;
 8824		}
 8825
 8826		moff = rel->r_offset - map->sec_offset;
 8827		shdr_idx = sym->st_shndx;
 8828		st_ops = map->st_ops;
 8829		pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
 8830			 map->name,
 8831			 (long long)(rel->r_info >> 32),
 8832			 (long long)sym->st_value,
 8833			 shdr_idx, (size_t)rel->r_offset,
 8834			 map->sec_offset, sym->st_name, name);
 8835
 8836		if (shdr_idx >= SHN_LORESERVE) {
 8837			pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
 8838				map->name, (size_t)rel->r_offset, shdr_idx);
 8839			return -LIBBPF_ERRNO__RELOC;
 8840		}
 8841		if (sym->st_value % BPF_INSN_SZ) {
 8842			pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
 8843				map->name, (unsigned long long)sym->st_value);
 8844			return -LIBBPF_ERRNO__FORMAT;
 8845		}
 8846		insn_idx = sym->st_value / BPF_INSN_SZ;
 8847
 8848		member = find_member_by_offset(st_ops->type, moff * 8);
 8849		if (!member) {
 8850			pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
 8851				map->name, moff);
 8852			return -EINVAL;
 8853		}
 8854		member_idx = member - btf_members(st_ops->type);
 8855		name = btf__name_by_offset(btf, member->name_off);
 8856
 8857		if (!resolve_func_ptr(btf, member->type, NULL)) {
 8858			pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
 8859				map->name, name);
 8860			return -EINVAL;
 8861		}
 8862
 8863		prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
 8864		if (!prog) {
 8865			pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
 8866				map->name, shdr_idx, name);
 8867			return -EINVAL;
 8868		}
 8869
 8870		/* prevent the use of BPF prog with invalid type */
 8871		if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
 8872			pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
 8873				map->name, prog->name);
 8874			return -EINVAL;
 8875		}
 8876
 8877		/* if we haven't yet processed this BPF program, record proper
 8878		 * attach_btf_id and member_idx
 8879		 */
 8880		if (!prog->attach_btf_id) {
 8881			prog->attach_btf_id = st_ops->type_id;
 8882			prog->expected_attach_type = member_idx;
 8883		}
 8884
 8885		/* struct_ops BPF prog can be re-used between multiple
 8886		 * .struct_ops as long as it's the same struct_ops struct
 8887		 * definition and the same function pointer field
 8888		 */
 8889		if (prog->attach_btf_id != st_ops->type_id ||
 8890		    prog->expected_attach_type != member_idx) {
 8891			pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
 8892				map->name, prog->name, prog->sec_name, prog->type,
 8893				prog->attach_btf_id, prog->expected_attach_type, name);
 8894			return -EINVAL;
 8895		}
 8896
 8897		st_ops->progs[member_idx] = prog;
 8898	}
 8899
 8900	return 0;
 8901}
 8902
 8903#define BTF_TRACE_PREFIX "btf_trace_"
 8904#define BTF_LSM_PREFIX "bpf_lsm_"
 8905#define BTF_ITER_PREFIX "bpf_iter_"
 8906#define BTF_MAX_NAME_SIZE 128
 8907
 8908void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
 8909				const char **prefix, int *kind)
 8910{
 8911	switch (attach_type) {
 8912	case BPF_TRACE_RAW_TP:
 8913		*prefix = BTF_TRACE_PREFIX;
 8914		*kind = BTF_KIND_TYPEDEF;
 8915		break;
 8916	case BPF_LSM_MAC:
 8917	case BPF_LSM_CGROUP:
 8918		*prefix = BTF_LSM_PREFIX;
 8919		*kind = BTF_KIND_FUNC;
 8920		break;
 8921	case BPF_TRACE_ITER:
 8922		*prefix = BTF_ITER_PREFIX;
 8923		*kind = BTF_KIND_FUNC;
 8924		break;
 8925	default:
 8926		*prefix = "";
 8927		*kind = BTF_KIND_FUNC;
 8928	}
 8929}
 8930
 8931static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 8932				   const char *name, __u32 kind)
 8933{
 8934	char btf_type_name[BTF_MAX_NAME_SIZE];
 8935	int ret;
 8936
 8937	ret = snprintf(btf_type_name, sizeof(btf_type_name),
 8938		       "%s%s", prefix, name);
 8939	/* snprintf returns the number of characters written excluding the
 8940	 * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
 8941	 * indicates truncation.
 8942	 */
 8943	if (ret < 0 || ret >= sizeof(btf_type_name))
 8944		return -ENAMETOOLONG;
 8945	return btf__find_by_name_kind(btf, btf_type_name, kind);
 8946}
 8947
 8948static inline int find_attach_btf_id(struct btf *btf, const char *name,
 8949				     enum bpf_attach_type attach_type)
 8950{
 8951	const char *prefix;
 8952	int kind;
 8953
 8954	btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
 8955	return find_btf_by_prefix_kind(btf, prefix, name, kind);
 8956}
 8957
 8958int libbpf_find_vmlinux_btf_id(const char *name,
 8959			       enum bpf_attach_type attach_type)
 8960{
 8961	struct btf *btf;
 8962	int err;
 8963
 8964	btf = btf__load_vmlinux_btf();
 8965	err = libbpf_get_error(btf);
 8966	if (err) {
 8967		pr_warn("vmlinux BTF is not found\n");
 8968		return libbpf_err(err);
 8969	}
 8970
 8971	err = find_attach_btf_id(btf, name, attach_type);
 8972	if (err <= 0)
 8973		pr_warn("%s is not found in vmlinux BTF\n", name);
 8974
 8975	btf__free(btf);
 8976	return libbpf_err(err);
 8977}
 8978
 8979static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 8980{
 8981	struct bpf_prog_info info = {};
 8982	__u32 info_len = sizeof(info);
 8983	struct btf *btf;
 8984	int err;
 8985
 8986	err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
 8987	if (err) {
 8988		pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n",
 8989			attach_prog_fd, err);
 8990		return err;
 8991	}
 8992
 8993	err = -EINVAL;
 8994	if (!info.btf_id) {
 8995		pr_warn("The target program doesn't have BTF\n");
 8996		goto out;
 8997	}
 8998	btf = btf__load_from_kernel_by_id(info.btf_id);
 8999	err = libbpf_get_error(btf);
 9000	if (err) {
 9001		pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
 9002		goto out;
 9003	}
 9004	err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 9005	btf__free(btf);
 9006	if (err <= 0) {
 9007		pr_warn("%s is not found in prog's BTF\n", name);
 9008		goto out;
 9009	}
 9010out:
 9011	return err;
 9012}
 9013
 9014static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
 9015			      enum bpf_attach_type attach_type,
 9016			      int *btf_obj_fd, int *btf_type_id)
 9017{
 9018	int ret, i;
 9019
 9020	ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
 9021	if (ret > 0) {
 9022		*btf_obj_fd = 0; /* vmlinux BTF */
 9023		*btf_type_id = ret;
 9024		return 0;
 9025	}
 9026	if (ret != -ENOENT)
 9027		return ret;
 9028
 9029	ret = load_module_btfs(obj);
 9030	if (ret)
 9031		return ret;
 9032
 9033	for (i = 0; i < obj->btf_module_cnt; i++) {
 9034		const struct module_btf *mod = &obj->btf_modules[i];
 9035
 9036		ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
 9037		if (ret > 0) {
 9038			*btf_obj_fd = mod->fd;
 9039			*btf_type_id = ret;
 9040			return 0;
 9041		}
 9042		if (ret == -ENOENT)
 9043			continue;
 9044
 9045		return ret;
 9046	}
 9047
 9048	return -ESRCH;
 9049}
 9050
 9051static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
 9052				     int *btf_obj_fd, int *btf_type_id)
 9053{
 9054	enum bpf_attach_type attach_type = prog->expected_attach_type;
 9055	__u32 attach_prog_fd = prog->attach_prog_fd;
 9056	int err = 0;
 9057
 9058	/* BPF program's BTF ID */
 9059	if (attach_prog_fd) {
 9060		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
 9061		if (err < 0) {
 9062			pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
 9063				 attach_prog_fd, attach_name, err);
 9064			return err;
 9065		}
 9066		*btf_obj_fd = 0;
 9067		*btf_type_id = err;
 9068		return 0;
 9069	}
 9070
 9071	/* kernel/module BTF ID */
 9072	if (prog->obj->gen_loader) {
 9073		bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
 9074		*btf_obj_fd = 0;
 9075		*btf_type_id = 1;
 9076	} else {
 9077		err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
 9078	}
 9079	if (err) {
 9080		pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
 9081		return err;
 9082	}
 9083	return 0;
 9084}
 9085
 9086int libbpf_attach_type_by_name(const char *name,
 9087			       enum bpf_attach_type *attach_type)
 9088{
 9089	char *type_names;
 9090	const struct bpf_sec_def *sec_def;
 9091
 9092	if (!name)
 9093		return libbpf_err(-EINVAL);
 9094
 9095	sec_def = find_sec_def(name);
 9096	if (!sec_def) {
 9097		pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
 9098		type_names = libbpf_get_type_names(true);
 9099		if (type_names != NULL) {
 9100			pr_debug("attachable section(type) names are:%s\n", type_names);
 9101			free(type_names);
 9102		}
 9103
 9104		return libbpf_err(-EINVAL);
 9105	}
 9106
 9107	if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
 9108		return libbpf_err(-EINVAL);
 9109	if (!(sec_def->cookie & SEC_ATTACHABLE))
 9110		return libbpf_err(-EINVAL);
 9111
 9112	*attach_type = sec_def->expected_attach_type;
 9113	return 0;
 9114}
 9115
 9116int bpf_map__fd(const struct bpf_map *map)
 9117{
 9118	return map ? map->fd : libbpf_err(-EINVAL);
 9119}
 9120
 9121static bool map_uses_real_name(const struct bpf_map *map)
 9122{
 9123	/* Since libbpf started to support custom .data.* and .rodata.* maps,
 9124	 * their user-visible name differs from kernel-visible name. Users see
 9125	 * such map's corresponding ELF section name as a map name.
 9126	 * This check distinguishes .data/.rodata from .data.* and .rodata.*
 9127	 * maps to know which name has to be returned to the user.
 9128	 */
 9129	if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
 9130		return true;
 9131	if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
 9132		return true;
 9133	return false;
 9134}
 9135
 9136const char *bpf_map__name(const struct bpf_map *map)
 9137{
 9138	if (!map)
 9139		return NULL;
 9140
 9141	if (map_uses_real_name(map))
 9142		return map->real_name;
 9143
 9144	return map->name;
 9145}
 9146
 9147enum bpf_map_type bpf_map__type(const struct bpf_map *map)
 9148{
 9149	return map->def.type;
 9150}
 9151
 9152int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
 9153{
 9154	if (map->fd >= 0)
 9155		return libbpf_err(-EBUSY);
 9156	map->def.type = type;
 9157	return 0;
 9158}
 9159
 9160__u32 bpf_map__map_flags(const struct bpf_map *map)
 9161{
 9162	return map->def.map_flags;
 9163}
 9164
 9165int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
 9166{
 9167	if (map->fd >= 0)
 9168		return libbpf_err(-EBUSY);
 9169	map->def.map_flags = flags;
 9170	return 0;
 9171}
 9172
 9173__u64 bpf_map__map_extra(const struct bpf_map *map)
 9174{
 9175	return map->map_extra;
 9176}
 9177
 9178int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
 9179{
 9180	if (map->fd >= 0)
 9181		return libbpf_err(-EBUSY);
 9182	map->map_extra = map_extra;
 9183	return 0;
 9184}
 9185
 9186__u32 bpf_map__numa_node(const struct bpf_map *map)
 9187{
 9188	return map->numa_node;
 9189}
 9190
 9191int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
 9192{
 9193	if (map->fd >= 0)
 9194		return libbpf_err(-EBUSY);
 9195	map->numa_node = numa_node;
 9196	return 0;
 9197}
 9198
 9199__u32 bpf_map__key_size(const struct bpf_map *map)
 9200{
 9201	return map->def.key_size;
 9202}
 9203
 9204int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
 9205{
 9206	if (map->fd >= 0)
 9207		return libbpf_err(-EBUSY);
 9208	map->def.key_size = size;
 9209	return 0;
 9210}
 9211
 9212__u32 bpf_map__value_size(const struct bpf_map *map)
 9213{
 9214	return map->def.value_size;
 9215}
 9216
 9217int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 9218{
 9219	if (map->fd >= 0)
 9220		return libbpf_err(-EBUSY);
 9221	map->def.value_size = size;
 9222	return 0;
 9223}
 9224
 9225__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
 9226{
 9227	return map ? map->btf_key_type_id : 0;
 9228}
 9229
 9230__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
 9231{
 9232	return map ? map->btf_value_type_id : 0;
 9233}
 9234
 9235int bpf_map__set_initial_value(struct bpf_map *map,
 9236			       const void *data, size_t size)
 9237{
 9238	if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
 9239	    size != map->def.value_size || map->fd >= 0)
 9240		return libbpf_err(-EINVAL);
 9241
 9242	memcpy(map->mmaped, data, size);
 9243	return 0;
 9244}
 9245
 9246const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
 9247{
 9248	if (!map->mmaped)
 9249		return NULL;
 9250	*psize = map->def.value_size;
 9251	return map->mmaped;
 9252}
 9253
 9254bool bpf_map__is_internal(const struct bpf_map *map)
 9255{
 9256	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 9257}
 9258
 9259__u32 bpf_map__ifindex(const struct bpf_map *map)
 9260{
 9261	return map->map_ifindex;
 9262}
 9263
 9264int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 9265{
 9266	if (map->fd >= 0)
 9267		return libbpf_err(-EBUSY);
 9268	map->map_ifindex = ifindex;
 9269	return 0;
 9270}
 9271
 9272int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 9273{
 9274	if (!bpf_map_type__is_map_in_map(map->def.type)) {
 9275		pr_warn("error: unsupported map type\n");
 9276		return libbpf_err(-EINVAL);
 9277	}
 9278	if (map->inner_map_fd != -1) {
 9279		pr_warn("error: inner_map_fd already specified\n");
 9280		return libbpf_err(-EINVAL);
 9281	}
 9282	if (map->inner_map) {
 9283		bpf_map__destroy(map->inner_map);
 9284		zfree(&map->inner_map);
 9285	}
 9286	map->inner_map_fd = fd;
 9287	return 0;
 9288}
 9289
 9290static struct bpf_map *
 9291__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
 9292{
 9293	ssize_t idx;
 9294	struct bpf_map *s, *e;
 9295
 9296	if (!obj || !obj->maps)
 9297		return errno = EINVAL, NULL;
 9298
 9299	s = obj->maps;
 9300	e = obj->maps + obj->nr_maps;
 9301
 9302	if ((m < s) || (m >= e)) {
 9303		pr_warn("error in %s: map handler doesn't belong to object\n",
 9304			 __func__);
 9305		return errno = EINVAL, NULL;
 9306	}
 9307
 9308	idx = (m - obj->maps) + i;
 9309	if (idx >= obj->nr_maps || idx < 0)
 9310		return NULL;
 9311	return &obj->maps[idx];
 9312}
 9313
 9314struct bpf_map *
 9315bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
 9316{
 9317	if (prev == NULL)
 9318		return obj->maps;
 9319
 9320	return __bpf_map__iter(prev, obj, 1);
 9321}
 9322
 9323struct bpf_map *
 9324bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
 9325{
 9326	if (next == NULL) {
 9327		if (!obj->nr_maps)
 9328			return NULL;
 9329		return obj->maps + obj->nr_maps - 1;
 9330	}
 9331
 9332	return __bpf_map__iter(next, obj, -1);
 9333}
 9334
 9335struct bpf_map *
 9336bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
 9337{
 9338	struct bpf_map *pos;
 9339
 9340	bpf_object__for_each_map(pos, obj) {
 9341		/* if it's a special internal map name (which always starts
 9342		 * with dot) then check if that special name matches the
 9343		 * real map name (ELF section name)
 9344		 */
 9345		if (name[0] == '.') {
 9346			if (pos->real_name && strcmp(pos->real_name, name) == 0)
 9347				return pos;
 9348			continue;
 9349		}
 9350		/* otherwise map name has to be an exact match */
 9351		if (map_uses_real_name(pos)) {
 9352			if (strcmp(pos->real_name, name) == 0)
 9353				return pos;
 9354			continue;
 9355		}
 9356		if (strcmp(pos->name, name) == 0)
 9357			return pos;
 9358	}
 9359	return errno = ENOENT, NULL;
 9360}
 9361
 9362int
 9363bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 9364{
 9365	return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
 9366}
 9367
 9368static int validate_map_op(const struct bpf_map *map, size_t key_sz,
 9369			   size_t value_sz, bool check_value_sz)
 9370{
 9371	if (map->fd <= 0)
 9372		return -ENOENT;
 9373
 9374	if (map->def.key_size != key_sz) {
 9375		pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
 9376			map->name, key_sz, map->def.key_size);
 9377		return -EINVAL;
 9378	}
 9379
 9380	if (!check_value_sz)
 9381		return 0;
 9382
 9383	switch (map->def.type) {
 9384	case BPF_MAP_TYPE_PERCPU_ARRAY:
 9385	case BPF_MAP_TYPE_PERCPU_HASH:
 9386	case BPF_MAP_TYPE_LRU_PERCPU_HASH:
 9387	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
 9388		int num_cpu = libbpf_num_possible_cpus();
 9389		size_t elem_sz = roundup(map->def.value_size, 8);
 9390
 9391		if (value_sz != num_cpu * elem_sz) {
 9392			pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
 9393				map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
 9394			return -EINVAL;
 9395		}
 9396		break;
 9397	}
 9398	default:
 9399		if (map->def.value_size != value_sz) {
 9400			pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
 9401				map->name, value_sz, map->def.value_size);
 9402			return -EINVAL;
 9403		}
 9404		break;
 9405	}
 9406	return 0;
 9407}
 9408
 9409int bpf_map__lookup_elem(const struct bpf_map *map,
 9410			 const void *key, size_t key_sz,
 9411			 void *value, size_t value_sz, __u64 flags)
 9412{
 9413	int err;
 9414
 9415	err = validate_map_op(map, key_sz, value_sz, true);
 9416	if (err)
 9417		return libbpf_err(err);
 9418
 9419	return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
 9420}
 9421
 9422int bpf_map__update_elem(const struct bpf_map *map,
 9423			 const void *key, size_t key_sz,
 9424			 const void *value, size_t value_sz, __u64 flags)
 9425{
 9426	int err;
 9427
 9428	err = validate_map_op(map, key_sz, value_sz, true);
 9429	if (err)
 9430		return libbpf_err(err);
 9431
 9432	return bpf_map_update_elem(map->fd, key, value, flags);
 9433}
 9434
 9435int bpf_map__delete_elem(const struct bpf_map *map,
 9436			 const void *key, size_t key_sz, __u64 flags)
 9437{
 9438	int err;
 9439
 9440	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
 9441	if (err)
 9442		return libbpf_err(err);
 9443
 9444	return bpf_map_delete_elem_flags(map->fd, key, flags);
 9445}
 9446
 9447int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
 9448				    const void *key, size_t key_sz,
 9449				    void *value, size_t value_sz, __u64 flags)
 9450{
 9451	int err;
 9452
 9453	err = validate_map_op(map, key_sz, value_sz, true);
 9454	if (err)
 9455		return libbpf_err(err);
 9456
 9457	return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
 9458}
 9459
 9460int bpf_map__get_next_key(const struct bpf_map *map,
 9461			  const void *cur_key, void *next_key, size_t key_sz)
 9462{
 9463	int err;
 9464
 9465	err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
 9466	if (err)
 9467		return libbpf_err(err);
 9468
 9469	return bpf_map_get_next_key(map->fd, cur_key, next_key);
 9470}
 9471
 9472long libbpf_get_error(const void *ptr)
 9473{
 9474	if (!IS_ERR_OR_NULL(ptr))
 9475		return 0;
 9476
 9477	if (IS_ERR(ptr))
 9478		errno = -PTR_ERR(ptr);
 9479
 9480	/* If ptr == NULL, then errno should be already set by the failing
 9481	 * API, because libbpf never returns NULL on success and it now always
 9482	 * sets errno on error. So no extra errno handling for ptr == NULL
 9483	 * case.
 9484	 */
 9485	return -errno;
 9486}
 9487
 9488/* Replace link's underlying BPF program with the new one */
 9489int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 9490{
 9491	int ret;
 9492
 9493	ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
 9494	return libbpf_err_errno(ret);
 9495}
 9496
 9497/* Release "ownership" of underlying BPF resource (typically, BPF program
 9498 * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
 9499 * link, when destructed through bpf_link__destroy() call won't attempt to
 9500 * detach/unregisted that BPF resource. This is useful in situations where,
 9501 * say, attached BPF program has to outlive userspace program that attached it
 9502 * in the system. Depending on type of BPF program, though, there might be
 9503 * additional steps (like pinning BPF program in BPF FS) necessary to ensure
 9504 * exit of userspace program doesn't trigger automatic detachment and clean up
 9505 * inside the kernel.
 9506 */
 9507void bpf_link__disconnect(struct bpf_link *link)
 9508{
 9509	link->disconnected = true;
 9510}
 9511
 9512int bpf_link__destroy(struct bpf_link *link)
 9513{
 9514	int err = 0;
 9515
 9516	if (IS_ERR_OR_NULL(link))
 9517		return 0;
 9518
 9519	if (!link->disconnected && link->detach)
 9520		err = link->detach(link);
 9521	if (link->pin_path)
 9522		free(link->pin_path);
 9523	if (link->dealloc)
 9524		link->dealloc(link);
 9525	else
 9526		free(link);
 9527
 9528	return libbpf_err(err);
 9529}
 9530
 9531int bpf_link__fd(const struct bpf_link *link)
 9532{
 9533	return link->fd;
 9534}
 9535
 9536const char *bpf_link__pin_path(const struct bpf_link *link)
 9537{
 9538	return link->pin_path;
 9539}
 9540
 9541static int bpf_link__detach_fd(struct bpf_link *link)
 9542{
 9543	return libbpf_err_errno(close(link->fd));
 9544}
 9545
 9546struct bpf_link *bpf_link__open(const char *path)
 9547{
 9548	struct bpf_link *link;
 9549	int fd;
 9550
 9551	fd = bpf_obj_get(path);
 9552	if (fd < 0) {
 9553		fd = -errno;
 9554		pr_warn("failed to open link at %s: %d\n", path, fd);
 9555		return libbpf_err_ptr(fd);
 9556	}
 9557
 9558	link = calloc(1, sizeof(*link));
 9559	if (!link) {
 9560		close(fd);
 9561		return libbpf_err_ptr(-ENOMEM);
 9562	}
 9563	link->detach = &bpf_link__detach_fd;
 9564	link->fd = fd;
 9565
 9566	link->pin_path = strdup(path);
 9567	if (!link->pin_path) {
 9568		bpf_link__destroy(link);
 9569		return libbpf_err_ptr(-ENOMEM);
 9570	}
 9571
 9572	return link;
 9573}
 9574
 9575int bpf_link__detach(struct bpf_link *link)
 9576{
 9577	return bpf_link_detach(link->fd) ? -errno : 0;
 9578}
 9579
 9580int bpf_link__pin(struct bpf_link *link, const char *path)
 9581{
 9582	int err;
 9583
 9584	if (link->pin_path)
 9585		return libbpf_err(-EBUSY);
 9586	err = make_parent_dir(path);
 9587	if (err)
 9588		return libbpf_err(err);
 9589	err = check_path(path);
 9590	if (err)
 9591		return libbpf_err(err);
 9592
 9593	link->pin_path = strdup(path);
 9594	if (!link->pin_path)
 9595		return libbpf_err(-ENOMEM);
 9596
 9597	if (bpf_obj_pin(link->fd, link->pin_path)) {
 9598		err = -errno;
 9599		zfree(&link->pin_path);
 9600		return libbpf_err(err);
 9601	}
 9602
 9603	pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
 9604	return 0;
 9605}
 9606
 9607int bpf_link__unpin(struct bpf_link *link)
 9608{
 9609	int err;
 9610
 9611	if (!link->pin_path)
 9612		return libbpf_err(-EINVAL);
 9613
 9614	err = unlink(link->pin_path);
 9615	if (err != 0)
 9616		return -errno;
 9617
 9618	pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
 9619	zfree(&link->pin_path);
 9620	return 0;
 9621}
 9622
 9623struct bpf_link_perf {
 9624	struct bpf_link link;
 9625	int perf_event_fd;
 9626	/* legacy kprobe support: keep track of probe identifier and type */
 9627	char *legacy_probe_name;
 9628	bool legacy_is_kprobe;
 9629	bool legacy_is_retprobe;
 9630};
 9631
 9632static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
 9633static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
 9634
 9635static int bpf_link_perf_detach(struct bpf_link *link)
 9636{
 9637	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
 9638	int err = 0;
 9639
 9640	if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
 9641		err = -errno;
 9642
 9643	if (perf_link->perf_event_fd != link->fd)
 9644		close(perf_link->perf_event_fd);
 9645	close(link->fd);
 9646
 9647	/* legacy uprobe/kprobe needs to be removed after perf event fd closure */
 9648	if (perf_link->legacy_probe_name) {
 9649		if (perf_link->legacy_is_kprobe) {
 9650			err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
 9651							 perf_link->legacy_is_retprobe);
 9652		} else {
 9653			err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
 9654							 perf_link->legacy_is_retprobe);
 9655		}
 9656	}
 9657
 9658	return err;
 9659}
 9660
 9661static void bpf_link_perf_dealloc(struct bpf_link *link)
 9662{
 9663	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
 9664
 9665	free(perf_link->legacy_probe_name);
 9666	free(perf_link);
 9667}
 9668
 9669struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
 9670						     const struct bpf_perf_event_opts *opts)
 9671{
 9672	char errmsg[STRERR_BUFSIZE];
 9673	struct bpf_link_perf *link;
 9674	int prog_fd, link_fd = -1, err;
 9675
 9676	if (!OPTS_VALID(opts, bpf_perf_event_opts))
 9677		return libbpf_err_ptr(-EINVAL);
 9678
 9679	if (pfd < 0) {
 9680		pr_warn("prog '%s': invalid perf event FD %d\n",
 9681			prog->name, pfd);
 9682		return libbpf_err_ptr(-EINVAL);
 9683	}
 9684	prog_fd = bpf_program__fd(prog);
 9685	if (prog_fd < 0) {
 9686		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
 9687			prog->name);
 9688		return libbpf_err_ptr(-EINVAL);
 9689	}
 9690
 9691	link = calloc(1, sizeof(*link));
 9692	if (!link)
 9693		return libbpf_err_ptr(-ENOMEM);
 9694	link->link.detach = &bpf_link_perf_detach;
 9695	link->link.dealloc = &bpf_link_perf_dealloc;
 9696	link->perf_event_fd = pfd;
 9697
 9698	if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
 9699		DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
 9700			.perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
 9701
 9702		link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
 9703		if (link_fd < 0) {
 9704			err = -errno;
 9705			pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
 9706				prog->name, pfd,
 9707				err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9708			goto err_out;
 9709		}
 9710		link->link.fd = link_fd;
 9711	} else {
 9712		if (OPTS_GET(opts, bpf_cookie, 0)) {
 9713			pr_warn("prog '%s': user context value is not supported\n", prog->name);
 9714			err = -EOPNOTSUPP;
 9715			goto err_out;
 9716		}
 9717
 9718		if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
 9719			err = -errno;
 9720			pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
 9721				prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9722			if (err == -EPROTO)
 9723				pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
 9724					prog->name, pfd);
 9725			goto err_out;
 9726		}
 9727		link->link.fd = pfd;
 9728	}
 9729	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
 9730		err = -errno;
 9731		pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
 9732			prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9733		goto err_out;
 9734	}
 9735
 9736	return &link->link;
 9737err_out:
 9738	if (link_fd >= 0)
 9739		close(link_fd);
 9740	free(link);
 9741	return libbpf_err_ptr(err);
 9742}
 9743
 9744struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
 9745{
 9746	return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
 9747}
 9748
 9749/*
 9750 * this function is expected to parse integer in the range of [0, 2^31-1] from
 9751 * given file using scanf format string fmt. If actual parsed value is
 9752 * negative, the result might be indistinguishable from error
 9753 */
 9754static int parse_uint_from_file(const char *file, const char *fmt)
 9755{
 9756	char buf[STRERR_BUFSIZE];
 9757	int err, ret;
 9758	FILE *f;
 9759
 9760	f = fopen(file, "r");
 9761	if (!f) {
 9762		err = -errno;
 9763		pr_debug("failed to open '%s': %s\n", file,
 9764			 libbpf_strerror_r(err, buf, sizeof(buf)));
 9765		return err;
 9766	}
 9767	err = fscanf(f, fmt, &ret);
 9768	if (err != 1) {
 9769		err = err == EOF ? -EIO : -errno;
 9770		pr_debug("failed to parse '%s': %s\n", file,
 9771			libbpf_strerror_r(err, buf, sizeof(buf)));
 9772		fclose(f);
 9773		return err;
 9774	}
 9775	fclose(f);
 9776	return ret;
 9777}
 9778
 9779static int determine_kprobe_perf_type(void)
 9780{
 9781	const char *file = "/sys/bus/event_source/devices/kprobe/type";
 9782
 9783	return parse_uint_from_file(file, "%d\n");
 9784}
 9785
 9786static int determine_uprobe_perf_type(void)
 9787{
 9788	const char *file = "/sys/bus/event_source/devices/uprobe/type";
 9789
 9790	return parse_uint_from_file(file, "%d\n");
 9791}
 9792
 9793static int determine_kprobe_retprobe_bit(void)
 9794{
 9795	const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
 9796
 9797	return parse_uint_from_file(file, "config:%d\n");
 9798}
 9799
 9800static int determine_uprobe_retprobe_bit(void)
 9801{
 9802	const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
 9803
 9804	return parse_uint_from_file(file, "config:%d\n");
 9805}
 9806
 9807#define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
 9808#define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
 9809
 9810static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 9811				 uint64_t offset, int pid, size_t ref_ctr_off)
 9812{
 9813	struct perf_event_attr attr = {};
 9814	char errmsg[STRERR_BUFSIZE];
 9815	int type, pfd;
 9816
 9817	if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
 9818		return -EINVAL;
 9819
 9820	type = uprobe ? determine_uprobe_perf_type()
 9821		      : determine_kprobe_perf_type();
 9822	if (type < 0) {
 9823		pr_warn("failed to determine %s perf type: %s\n",
 9824			uprobe ? "uprobe" : "kprobe",
 9825			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
 9826		return type;
 9827	}
 9828	if (retprobe) {
 9829		int bit = uprobe ? determine_uprobe_retprobe_bit()
 9830				 : determine_kprobe_retprobe_bit();
 9831
 9832		if (bit < 0) {
 9833			pr_warn("failed to determine %s retprobe bit: %s\n",
 9834				uprobe ? "uprobe" : "kprobe",
 9835				libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
 9836			return bit;
 9837		}
 9838		attr.config |= 1 << bit;
 9839	}
 9840	attr.size = sizeof(attr);
 9841	attr.type = type;
 9842	attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
 9843	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
 9844	attr.config2 = offset;		 /* kprobe_addr or probe_offset */
 9845
 9846	/* pid filter is meaningful only for uprobes */
 9847	pfd = syscall(__NR_perf_event_open, &attr,
 9848		      pid < 0 ? -1 : pid /* pid */,
 9849		      pid == -1 ? 0 : -1 /* cpu */,
 9850		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
 9851	return pfd >= 0 ? pfd : -errno;
 9852}
 9853
 9854static int append_to_file(const char *file, const char *fmt, ...)
 9855{
 9856	int fd, n, err = 0;
 9857	va_list ap;
 9858
 9859	fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
 9860	if (fd < 0)
 9861		return -errno;
 9862
 9863	va_start(ap, fmt);
 9864	n = vdprintf(fd, fmt, ap);
 9865	va_end(ap);
 9866
 9867	if (n < 0)
 9868		err = -errno;
 9869
 9870	close(fd);
 9871	return err;
 9872}
 9873
 9874#define DEBUGFS "/sys/kernel/debug/tracing"
 9875#define TRACEFS "/sys/kernel/tracing"
 9876
 9877static bool use_debugfs(void)
 9878{
 9879	static int has_debugfs = -1;
 9880
 9881	if (has_debugfs < 0)
 9882		has_debugfs = access(DEBUGFS, F_OK) == 0;
 9883
 9884	return has_debugfs == 1;
 9885}
 9886
 9887static const char *tracefs_path(void)
 9888{
 9889	return use_debugfs() ? DEBUGFS : TRACEFS;
 9890}
 9891
 9892static const char *tracefs_kprobe_events(void)
 9893{
 9894	return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
 9895}
 9896
 9897static const char *tracefs_uprobe_events(void)
 9898{
 9899	return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
 9900}
 9901
 9902static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
 9903					 const char *kfunc_name, size_t offset)
 9904{
 9905	static int index = 0;
 9906
 9907	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
 9908		 __sync_fetch_and_add(&index, 1));
 9909}
 9910
 9911static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
 9912				   const char *kfunc_name, size_t offset)
 9913{
 9914	return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
 9915			      retprobe ? 'r' : 'p',
 9916			      retprobe ? "kretprobes" : "kprobes",
 9917			      probe_name, kfunc_name, offset);
 9918}
 9919
 9920static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
 9921{
 9922	return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
 9923			      retprobe ? "kretprobes" : "kprobes", probe_name);
 9924}
 9925
 9926static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
 9927{
 9928	char file[256];
 9929
 9930	snprintf(file, sizeof(file), "%s/events/%s/%s/id",
 9931		 tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
 9932
 9933	return parse_uint_from_file(file, "%d\n");
 9934}
 9935
 9936static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
 9937					 const char *kfunc_name, size_t offset, int pid)
 9938{
 9939	struct perf_event_attr attr = {};
 9940	char errmsg[STRERR_BUFSIZE];
 9941	int type, pfd, err;
 9942
 9943	err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
 9944	if (err < 0) {
 9945		pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
 9946			kfunc_name, offset,
 9947			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9948		return err;
 9949	}
 9950	type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
 9951	if (type < 0) {
 9952		err = type;
 9953		pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
 9954			kfunc_name, offset,
 9955			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9956		goto err_clean_legacy;
 9957	}
 9958	attr.size = sizeof(attr);
 9959	attr.config = type;
 9960	attr.type = PERF_TYPE_TRACEPOINT;
 9961
 9962	pfd = syscall(__NR_perf_event_open, &attr,
 9963		      pid < 0 ? -1 : pid, /* pid */
 9964		      pid == -1 ? 0 : -1, /* cpu */
 9965		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
 9966	if (pfd < 0) {
 9967		err = -errno;
 9968		pr_warn("legacy kprobe perf_event_open() failed: %s\n",
 9969			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 9970		goto err_clean_legacy;
 9971	}
 9972	return pfd;
 9973
 9974err_clean_legacy:
 9975	/* Clear the newly added legacy kprobe_event */
 9976	remove_kprobe_event_legacy(probe_name, retprobe);
 9977	return err;
 9978}
 9979
 9980static const char *arch_specific_syscall_pfx(void)
 9981{
 9982#if defined(__x86_64__)
 9983	return "x64";
 9984#elif defined(__i386__)
 9985	return "ia32";
 9986#elif defined(__s390x__)
 9987	return "s390x";
 9988#elif defined(__s390__)
 9989	return "s390";
 9990#elif defined(__arm__)
 9991	return "arm";
 9992#elif defined(__aarch64__)
 9993	return "arm64";
 9994#elif defined(__mips__)
 9995	return "mips";
 9996#elif defined(__riscv)
 9997	return "riscv";
 9998#elif defined(__powerpc__)
 9999	return "powerpc";
10000#elif defined(__powerpc64__)
10001	return "powerpc64";
10002#else
10003	return NULL;
10004#endif
10005}
10006
10007static int probe_kern_syscall_wrapper(void)
10008{
10009	char syscall_name[64];
10010	const char *ksys_pfx;
10011
10012	ksys_pfx = arch_specific_syscall_pfx();
10013	if (!ksys_pfx)
10014		return 0;
10015
10016	snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10017
10018	if (determine_kprobe_perf_type() >= 0) {
10019		int pfd;
10020
10021		pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10022		if (pfd >= 0)
10023			close(pfd);
10024
10025		return pfd >= 0 ? 1 : 0;
10026	} else { /* legacy mode */
10027		char probe_name[128];
10028
10029		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10030		if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10031			return 0;
10032
10033		(void)remove_kprobe_event_legacy(probe_name, false);
10034		return 1;
10035	}
10036}
10037
10038struct bpf_link *
10039bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10040				const char *func_name,
10041				const struct bpf_kprobe_opts *opts)
10042{
10043	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10044	char errmsg[STRERR_BUFSIZE];
10045	char *legacy_probe = NULL;
10046	struct bpf_link *link;
10047	size_t offset;
10048	bool retprobe, legacy;
10049	int pfd, err;
10050
10051	if (!OPTS_VALID(opts, bpf_kprobe_opts))
10052		return libbpf_err_ptr(-EINVAL);
10053
10054	retprobe = OPTS_GET(opts, retprobe, false);
10055	offset = OPTS_GET(opts, offset, 0);
10056	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10057
10058	legacy = determine_kprobe_perf_type() < 0;
10059	if (!legacy) {
10060		pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10061					    func_name, offset,
10062					    -1 /* pid */, 0 /* ref_ctr_off */);
10063	} else {
10064		char probe_name[256];
10065
10066		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10067					     func_name, offset);
10068
10069		legacy_probe = strdup(probe_name);
10070		if (!legacy_probe)
10071			return libbpf_err_ptr(-ENOMEM);
10072
10073		pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10074						    offset, -1 /* pid */);
10075	}
10076	if (pfd < 0) {
10077		err = -errno;
10078		pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10079			prog->name, retprobe ? "kretprobe" : "kprobe",
10080			func_name, offset,
10081			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10082		goto err_out;
10083	}
10084	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10085	err = libbpf_get_error(link);
10086	if (err) {
10087		close(pfd);
10088		pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10089			prog->name, retprobe ? "kretprobe" : "kprobe",
10090			func_name, offset,
10091			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10092		goto err_clean_legacy;
10093	}
10094	if (legacy) {
10095		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10096
10097		perf_link->legacy_probe_name = legacy_probe;
10098		perf_link->legacy_is_kprobe = true;
10099		perf_link->legacy_is_retprobe = retprobe;
10100	}
10101
10102	return link;
10103
10104err_clean_legacy:
10105	if (legacy)
10106		remove_kprobe_event_legacy(legacy_probe, retprobe);
10107err_out:
10108	free(legacy_probe);
10109	return libbpf_err_ptr(err);
10110}
10111
10112struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10113					    bool retprobe,
10114					    const char *func_name)
10115{
10116	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10117		.retprobe = retprobe,
10118	);
10119
10120	return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10121}
10122
10123struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10124					      const char *syscall_name,
10125					      const struct bpf_ksyscall_opts *opts)
10126{
10127	LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10128	char func_name[128];
10129
10130	if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10131		return libbpf_err_ptr(-EINVAL);
10132
10133	if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10134		/* arch_specific_syscall_pfx() should never return NULL here
10135		 * because it is guarded by kernel_supports(). However, since
10136		 * compiler does not know that we have an explicit conditional
10137		 * as well.
10138		 */
10139		snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10140			 arch_specific_syscall_pfx() ? : "", syscall_name);
10141	} else {
10142		snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10143	}
10144
10145	kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10146	kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10147
10148	return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10149}
10150
10151/* Adapted from perf/util/string.c */
10152static bool glob_match(const char *str, const char *pat)
10153{
10154	while (*str && *pat && *pat != '*') {
10155		if (*pat == '?') {      /* Matches any single character */
10156			str++;
10157			pat++;
10158			continue;
10159		}
10160		if (*str != *pat)
10161			return false;
10162		str++;
10163		pat++;
10164	}
10165	/* Check wild card */
10166	if (*pat == '*') {
10167		while (*pat == '*')
10168			pat++;
10169		if (!*pat) /* Tail wild card matches all */
10170			return true;
10171		while (*str)
10172			if (glob_match(str++, pat))
10173				return true;
10174	}
10175	return !*str && !*pat;
10176}
10177
10178struct kprobe_multi_resolve {
10179	const char *pattern;
10180	unsigned long *addrs;
10181	size_t cap;
10182	size_t cnt;
10183};
10184
10185static int
10186resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
10187			const char *sym_name, void *ctx)
10188{
10189	struct kprobe_multi_resolve *res = ctx;
10190	int err;
10191
10192	if (!glob_match(sym_name, res->pattern))
10193		return 0;
10194
10195	err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
10196				res->cnt + 1);
10197	if (err)
10198		return err;
10199
10200	res->addrs[res->cnt++] = (unsigned long) sym_addr;
10201	return 0;
10202}
10203
10204struct bpf_link *
10205bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
10206				      const char *pattern,
10207				      const struct bpf_kprobe_multi_opts *opts)
10208{
10209	LIBBPF_OPTS(bpf_link_create_opts, lopts);
10210	struct kprobe_multi_resolve res = {
10211		.pattern = pattern,
10212	};
10213	struct bpf_link *link = NULL;
10214	char errmsg[STRERR_BUFSIZE];
10215	const unsigned long *addrs;
10216	int err, link_fd, prog_fd;
10217	const __u64 *cookies;
10218	const char **syms;
10219	bool retprobe;
10220	size_t cnt;
10221
10222	if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
10223		return libbpf_err_ptr(-EINVAL);
10224
10225	syms    = OPTS_GET(opts, syms, false);
10226	addrs   = OPTS_GET(opts, addrs, false);
10227	cnt     = OPTS_GET(opts, cnt, false);
10228	cookies = OPTS_GET(opts, cookies, false);
10229
10230	if (!pattern && !addrs && !syms)
10231		return libbpf_err_ptr(-EINVAL);
10232	if (pattern && (addrs || syms || cookies || cnt))
10233		return libbpf_err_ptr(-EINVAL);
10234	if (!pattern && !cnt)
10235		return libbpf_err_ptr(-EINVAL);
10236	if (addrs && syms)
10237		return libbpf_err_ptr(-EINVAL);
10238
10239	if (pattern) {
10240		err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res);
10241		if (err)
10242			goto error;
10243		if (!res.cnt) {
10244			err = -ENOENT;
10245			goto error;
10246		}
10247		addrs = res.addrs;
10248		cnt = res.cnt;
10249	}
10250
10251	retprobe = OPTS_GET(opts, retprobe, false);
10252
10253	lopts.kprobe_multi.syms = syms;
10254	lopts.kprobe_multi.addrs = addrs;
10255	lopts.kprobe_multi.cookies = cookies;
10256	lopts.kprobe_multi.cnt = cnt;
10257	lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
10258
10259	link = calloc(1, sizeof(*link));
10260	if (!link) {
10261		err = -ENOMEM;
10262		goto error;
10263	}
10264	link->detach = &bpf_link__detach_fd;
10265
10266	prog_fd = bpf_program__fd(prog);
10267	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
10268	if (link_fd < 0) {
10269		err = -errno;
10270		pr_warn("prog '%s': failed to attach: %s\n",
10271			prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10272		goto error;
10273	}
10274	link->fd = link_fd;
10275	free(res.addrs);
10276	return link;
10277
10278error:
10279	free(link);
10280	free(res.addrs);
10281	return libbpf_err_ptr(err);
10282}
10283
10284static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10285{
10286	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
10287	unsigned long offset = 0;
10288	const char *func_name;
10289	char *func;
10290	int n;
10291
10292	*link = NULL;
10293
10294	/* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
10295	if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
10296		return 0;
10297
10298	opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
10299	if (opts.retprobe)
10300		func_name = prog->sec_name + sizeof("kretprobe/") - 1;
10301	else
10302		func_name = prog->sec_name + sizeof("kprobe/") - 1;
10303
10304	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10305	if (n < 1) {
10306		pr_warn("kprobe name is invalid: %s\n", func_name);
10307		return -EINVAL;
10308	}
10309	if (opts.retprobe && offset != 0) {
10310		free(func);
10311		pr_warn("kretprobes do not support offset specification\n");
10312		return -EINVAL;
10313	}
10314
10315	opts.offset = offset;
10316	*link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10317	free(func);
10318	return libbpf_get_error(*link);
10319}
10320
10321static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10322{
10323	LIBBPF_OPTS(bpf_ksyscall_opts, opts);
10324	const char *syscall_name;
10325
10326	*link = NULL;
10327
10328	/* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
10329	if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
10330		return 0;
10331
10332	opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
10333	if (opts.retprobe)
10334		syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
10335	else
10336		syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
10337
10338	*link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
10339	return *link ? 0 : -errno;
10340}
10341
10342static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10343{
10344	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
10345	const char *spec;
10346	char *pattern;
10347	int n;
10348
10349	*link = NULL;
10350
10351	/* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
10352	if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
10353	    strcmp(prog->sec_name, "kretprobe.multi") == 0)
10354		return 0;
10355
10356	opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
10357	if (opts.retprobe)
10358		spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
10359	else
10360		spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
10361
10362	n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
10363	if (n < 1) {
10364		pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
10365		return -EINVAL;
10366	}
10367
10368	*link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
10369	free(pattern);
10370	return libbpf_get_error(*link);
10371}
10372
10373static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
10374					 const char *binary_path, uint64_t offset)
10375{
10376	int i;
10377
10378	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
10379
10380	/* sanitize binary_path in the probe name */
10381	for (i = 0; buf[i]; i++) {
10382		if (!isalnum(buf[i]))
10383			buf[i] = '_';
10384	}
10385}
10386
10387static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
10388					  const char *binary_path, size_t offset)
10389{
10390	return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
10391			      retprobe ? 'r' : 'p',
10392			      retprobe ? "uretprobes" : "uprobes",
10393			      probe_name, binary_path, offset);
10394}
10395
10396static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
10397{
10398	return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
10399			      retprobe ? "uretprobes" : "uprobes", probe_name);
10400}
10401
10402static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10403{
10404	char file[512];
10405
10406	snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10407		 tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
10408
10409	return parse_uint_from_file(file, "%d\n");
10410}
10411
10412static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
10413					 const char *binary_path, size_t offset, int pid)
10414{
10415	struct perf_event_attr attr;
10416	int type, pfd, err;
10417
10418	err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
10419	if (err < 0) {
10420		pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
10421			binary_path, (size_t)offset, err);
10422		return err;
10423	}
10424	type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
10425	if (type < 0) {
10426		err = type;
10427		pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
10428			binary_path, offset, err);
10429		goto err_clean_legacy;
10430	}
10431
10432	memset(&attr, 0, sizeof(attr));
10433	attr.size = sizeof(attr);
10434	attr.config = type;
10435	attr.type = PERF_TYPE_TRACEPOINT;
10436
10437	pfd = syscall(__NR_perf_event_open, &attr,
10438		      pid < 0 ? -1 : pid, /* pid */
10439		      pid == -1 ? 0 : -1, /* cpu */
10440		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10441	if (pfd < 0) {
10442		err = -errno;
10443		pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
10444		goto err_clean_legacy;
10445	}
10446	return pfd;
10447
10448err_clean_legacy:
10449	/* Clear the newly added legacy uprobe_event */
10450	remove_uprobe_event_legacy(probe_name, retprobe);
10451	return err;
10452}
10453
10454/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
10455static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
10456{
10457	while ((scn = elf_nextscn(elf, scn)) != NULL) {
10458		GElf_Shdr sh;
10459
10460		if (!gelf_getshdr(scn, &sh))
10461			continue;
10462		if (sh.sh_type == sh_type)
10463			return scn;
10464	}
10465	return NULL;
10466}
10467
10468/* Find offset of function name in object specified by path.  "name" matches
10469 * symbol name or name@@LIB for library functions.
10470 */
10471static long elf_find_func_offset(const char *binary_path, const char *name)
10472{
10473	int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
10474	bool is_shared_lib, is_name_qualified;
10475	char errmsg[STRERR_BUFSIZE];
10476	long ret = -ENOENT;
10477	size_t name_len;
10478	GElf_Ehdr ehdr;
10479	Elf *elf;
10480
10481	fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10482	if (fd < 0) {
10483		ret = -errno;
10484		pr_warn("failed to open %s: %s\n", binary_path,
10485			libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10486		return ret;
10487	}
10488	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10489	if (!elf) {
10490		pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10491		close(fd);
10492		return -LIBBPF_ERRNO__FORMAT;
10493	}
10494	if (!gelf_getehdr(elf, &ehdr)) {
10495		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
10496		ret = -LIBBPF_ERRNO__FORMAT;
10497		goto out;
10498	}
10499	/* for shared lib case, we do not need to calculate relative offset */
10500	is_shared_lib = ehdr.e_type == ET_DYN;
10501
10502	name_len = strlen(name);
10503	/* Does name specify "@@LIB"? */
10504	is_name_qualified = strstr(name, "@@") != NULL;
10505
10506	/* Search SHT_DYNSYM, SHT_SYMTAB for symbol.  This search order is used because if
10507	 * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
10508	 * linked binary may not have SHT_DYMSYM, so absence of a section should not be
10509	 * reported as a warning/error.
10510	 */
10511	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
10512		size_t nr_syms, strtabidx, idx;
10513		Elf_Data *symbols = NULL;
10514		Elf_Scn *scn = NULL;
10515		int last_bind = -1;
10516		const char *sname;
10517		GElf_Shdr sh;
10518
10519		scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
10520		if (!scn) {
10521			pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
10522				 binary_path);
10523			continue;
10524		}
10525		if (!gelf_getshdr(scn, &sh))
10526			continue;
10527		strtabidx = sh.sh_link;
10528		symbols = elf_getdata(scn, 0);
10529		if (!symbols) {
10530			pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
10531				binary_path, elf_errmsg(-1));
10532			ret = -LIBBPF_ERRNO__FORMAT;
10533			goto out;
10534		}
10535		nr_syms = symbols->d_size / sh.sh_entsize;
10536
10537		for (idx = 0; idx < nr_syms; idx++) {
10538			int curr_bind;
10539			GElf_Sym sym;
10540			Elf_Scn *sym_scn;
10541			GElf_Shdr sym_sh;
10542
10543			if (!gelf_getsym(symbols, idx, &sym))
10544				continue;
10545
10546			if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
10547				continue;
10548
10549			sname = elf_strptr(elf, strtabidx, sym.st_name);
10550			if (!sname)
10551				continue;
10552
10553			curr_bind = GELF_ST_BIND(sym.st_info);
10554
10555			/* User can specify func, func@@LIB or func@@LIB_VERSION. */
10556			if (strncmp(sname, name, name_len) != 0)
10557				continue;
10558			/* ...but we don't want a search for "foo" to match 'foo2" also, so any
10559			 * additional characters in sname should be of the form "@@LIB".
10560			 */
10561			if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
10562				continue;
10563
10564			if (ret >= 0) {
10565				/* handle multiple matches */
10566				if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
10567					/* Only accept one non-weak bind. */
10568					pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
10569						sname, name, binary_path);
10570					ret = -LIBBPF_ERRNO__FORMAT;
10571					goto out;
10572				} else if (curr_bind == STB_WEAK) {
10573					/* already have a non-weak bind, and
10574					 * this is a weak bind, so ignore.
10575					 */
10576					continue;
10577				}
10578			}
10579
10580			/* Transform symbol's virtual address (absolute for
10581			 * binaries and relative for shared libs) into file
10582			 * offset, which is what kernel is expecting for
10583			 * uprobe/uretprobe attachment.
10584			 * See Documentation/trace/uprobetracer.rst for more
10585			 * details.
10586			 * This is done by looking up symbol's containing
10587			 * section's header and using it's virtual address
10588			 * (sh_addr) and corresponding file offset (sh_offset)
10589			 * to transform sym.st_value (virtual address) into
10590			 * desired final file offset.
10591			 */
10592			sym_scn = elf_getscn(elf, sym.st_shndx);
10593			if (!sym_scn)
10594				continue;
10595			if (!gelf_getshdr(sym_scn, &sym_sh))
10596				continue;
10597
10598			ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
10599			last_bind = curr_bind;
10600		}
10601		if (ret > 0)
10602			break;
10603	}
10604
10605	if (ret > 0) {
10606		pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
10607			 ret);
10608	} else {
10609		if (ret == 0) {
10610			pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
10611				is_shared_lib ? "should not be 0 in a shared library" :
10612						"try using shared library path instead");
10613			ret = -ENOENT;
10614		} else {
10615			pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
10616		}
10617	}
10618out:
10619	elf_end(elf);
10620	close(fd);
10621	return ret;
10622}
10623
10624static const char *arch_specific_lib_paths(void)
10625{
10626	/*
10627	 * Based on https://packages.debian.org/sid/libc6.
10628	 *
10629	 * Assume that the traced program is built for the same architecture
10630	 * as libbpf, which should cover the vast majority of cases.
10631	 */
10632#if defined(__x86_64__)
10633	return "/lib/x86_64-linux-gnu";
10634#elif defined(__i386__)
10635	return "/lib/i386-linux-gnu";
10636#elif defined(__s390x__)
10637	return "/lib/s390x-linux-gnu";
10638#elif defined(__s390__)
10639	return "/lib/s390-linux-gnu";
10640#elif defined(__arm__) && defined(__SOFTFP__)
10641	return "/lib/arm-linux-gnueabi";
10642#elif defined(__arm__) && !defined(__SOFTFP__)
10643	return "/lib/arm-linux-gnueabihf";
10644#elif defined(__aarch64__)
10645	return "/lib/aarch64-linux-gnu";
10646#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
10647	return "/lib/mips64el-linux-gnuabi64";
10648#elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
10649	return "/lib/mipsel-linux-gnu";
10650#elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
10651	return "/lib/powerpc64le-linux-gnu";
10652#elif defined(__sparc__) && defined(__arch64__)
10653	return "/lib/sparc64-linux-gnu";
10654#elif defined(__riscv) && __riscv_xlen == 64
10655	return "/lib/riscv64-linux-gnu";
10656#else
10657	return NULL;
10658#endif
10659}
10660
10661/* Get full path to program/shared library. */
10662static int resolve_full_path(const char *file, char *result, size_t result_sz)
10663{
10664	const char *search_paths[3] = {};
10665	int i;
10666
10667	if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
10668		search_paths[0] = getenv("LD_LIBRARY_PATH");
10669		search_paths[1] = "/usr/lib64:/usr/lib";
10670		search_paths[2] = arch_specific_lib_paths();
10671	} else {
10672		search_paths[0] = getenv("PATH");
10673		search_paths[1] = "/usr/bin:/usr/sbin";
10674	}
10675
10676	for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
10677		const char *s;
10678
10679		if (!search_paths[i])
10680			continue;
10681		for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
10682			char *next_path;
10683			int seg_len;
10684
10685			if (s[0] == ':')
10686				s++;
10687			next_path = strchr(s, ':');
10688			seg_len = next_path ? next_path - s : strlen(s);
10689			if (!seg_len)
10690				continue;
10691			snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
10692			/* ensure it is an executable file/link */
10693			if (access(result, R_OK | X_OK) < 0)
10694				continue;
10695			pr_debug("resolved '%s' to '%s'\n", file, result);
10696			return 0;
10697		}
10698	}
10699	return -ENOENT;
10700}
10701
10702LIBBPF_API struct bpf_link *
10703bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
10704				const char *binary_path, size_t func_offset,
10705				const struct bpf_uprobe_opts *opts)
10706{
10707	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10708	char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
10709	char full_binary_path[PATH_MAX];
10710	struct bpf_link *link;
10711	size_t ref_ctr_off;
10712	int pfd, err;
10713	bool retprobe, legacy;
10714	const char *func_name;
10715
10716	if (!OPTS_VALID(opts, bpf_uprobe_opts))
10717		return libbpf_err_ptr(-EINVAL);
10718
10719	retprobe = OPTS_GET(opts, retprobe, false);
10720	ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
10721	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10722
10723	if (!binary_path)
10724		return libbpf_err_ptr(-EINVAL);
10725
10726	if (!strchr(binary_path, '/')) {
10727		err = resolve_full_path(binary_path, full_binary_path,
10728					sizeof(full_binary_path));
10729		if (err) {
10730			pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10731				prog->name, binary_path, err);
10732			return libbpf_err_ptr(err);
10733		}
10734		binary_path = full_binary_path;
10735	}
10736	func_name = OPTS_GET(opts, func_name, NULL);
10737	if (func_name) {
10738		long sym_off;
10739
10740		sym_off = elf_find_func_offset(binary_path, func_name);
10741		if (sym_off < 0)
10742			return libbpf_err_ptr(sym_off);
10743		func_offset += sym_off;
10744	}
10745
10746	legacy = determine_uprobe_perf_type() < 0;
10747	if (!legacy) {
10748		pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
10749					    func_offset, pid, ref_ctr_off);
10750	} else {
10751		char probe_name[PATH_MAX + 64];
10752
10753		if (ref_ctr_off)
10754			return libbpf_err_ptr(-EINVAL);
10755
10756		gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
10757					     binary_path, func_offset);
10758
10759		legacy_probe = strdup(probe_name);
10760		if (!legacy_probe)
10761			return libbpf_err_ptr(-ENOMEM);
10762
10763		pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
10764						    binary_path, func_offset, pid);
10765	}
10766	if (pfd < 0) {
10767		err = -errno;
10768		pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
10769			prog->name, retprobe ? "uretprobe" : "uprobe",
10770			binary_path, func_offset,
10771			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10772		goto err_out;
10773	}
10774
10775	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10776	err = libbpf_get_error(link);
10777	if (err) {
10778		close(pfd);
10779		pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
10780			prog->name, retprobe ? "uretprobe" : "uprobe",
10781			binary_path, func_offset,
10782			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10783		goto err_clean_legacy;
10784	}
10785	if (legacy) {
10786		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10787
10788		perf_link->legacy_probe_name = legacy_probe;
10789		perf_link->legacy_is_kprobe = false;
10790		perf_link->legacy_is_retprobe = retprobe;
10791	}
10792	return link;
10793
10794err_clean_legacy:
10795	if (legacy)
10796		remove_uprobe_event_legacy(legacy_probe, retprobe);
10797err_out:
10798	free(legacy_probe);
10799	return libbpf_err_ptr(err);
10800}
10801
10802/* Format of u[ret]probe section definition supporting auto-attach:
10803 * u[ret]probe/binary:function[+offset]
10804 *
10805 * binary can be an absolute/relative path or a filename; the latter is resolved to a
10806 * full binary path via bpf_program__attach_uprobe_opts.
10807 *
10808 * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
10809 * specified (and auto-attach is not possible) or the above format is specified for
10810 * auto-attach.
10811 */
10812static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10813{
10814	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
10815	char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
10816	int n, ret = -EINVAL;
10817	long offset = 0;
10818
10819	*link = NULL;
10820
10821	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
10822		   &probe_type, &binary_path, &func_name, &offset);
10823	switch (n) {
10824	case 1:
10825		/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
10826		ret = 0;
10827		break;
10828	case 2:
10829		pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
10830			prog->name, prog->sec_name);
10831		break;
10832	case 3:
10833	case 4:
10834		opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
10835				strcmp(probe_type, "uretprobe.s") == 0;
10836		if (opts.retprobe && offset != 0) {
10837			pr_warn("prog '%s': uretprobes do not support offset specification\n",
10838				prog->name);
10839			break;
10840		}
10841		opts.func_name = func_name;
10842		*link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
10843		ret = libbpf_get_error(*link);
10844		break;
10845	default:
10846		pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
10847			prog->sec_name);
10848		break;
10849	}
10850	free(probe_type);
10851	free(binary_path);
10852	free(func_name);
10853
10854	return ret;
10855}
10856
10857struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
10858					    bool retprobe, pid_t pid,
10859					    const char *binary_path,
10860					    size_t func_offset)
10861{
10862	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
10863
10864	return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
10865}
10866
10867struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
10868					  pid_t pid, const char *binary_path,
10869					  const char *usdt_provider, const char *usdt_name,
10870					  const struct bpf_usdt_opts *opts)
10871{
10872	char resolved_path[512];
10873	struct bpf_object *obj = prog->obj;
10874	struct bpf_link *link;
10875	__u64 usdt_cookie;
10876	int err;
10877
10878	if (!OPTS_VALID(opts, bpf_uprobe_opts))
10879		return libbpf_err_ptr(-EINVAL);
10880
10881	if (bpf_program__fd(prog) < 0) {
10882		pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10883			prog->name);
10884		return libbpf_err_ptr(-EINVAL);
10885	}
10886
10887	if (!binary_path)
10888		return libbpf_err_ptr(-EINVAL);
10889
10890	if (!strchr(binary_path, '/')) {
10891		err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
10892		if (err) {
10893			pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10894				prog->name, binary_path, err);
10895			return libbpf_err_ptr(err);
10896		}
10897		binary_path = resolved_path;
10898	}
10899
10900	/* USDT manager is instantiated lazily on first USDT attach. It will
10901	 * be destroyed together with BPF object in bpf_object__close().
10902	 */
10903	if (IS_ERR(obj->usdt_man))
10904		return libbpf_ptr(obj->usdt_man);
10905	if (!obj->usdt_man) {
10906		obj->usdt_man = usdt_manager_new(obj);
10907		if (IS_ERR(obj->usdt_man))
10908			return libbpf_ptr(obj->usdt_man);
10909	}
10910
10911	usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
10912	link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
10913				        usdt_provider, usdt_name, usdt_cookie);
10914	err = libbpf_get_error(link);
10915	if (err)
10916		return libbpf_err_ptr(err);
10917	return link;
10918}
10919
10920static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10921{
10922	char *path = NULL, *provider = NULL, *name = NULL;
10923	const char *sec_name;
10924	int n, err;
10925
10926	sec_name = bpf_program__section_name(prog);
10927	if (strcmp(sec_name, "usdt") == 0) {
10928		/* no auto-attach for just SEC("usdt") */
10929		*link = NULL;
10930		return 0;
10931	}
10932
10933	n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
10934	if (n != 3) {
10935		pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
10936			sec_name);
10937		err = -EINVAL;
10938	} else {
10939		*link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
10940						 provider, name, NULL);
10941		err = libbpf_get_error(*link);
10942	}
10943	free(path);
10944	free(provider);
10945	free(name);
10946	return err;
10947}
10948
10949static int determine_tracepoint_id(const char *tp_category,
10950				   const char *tp_name)
10951{
10952	char file[PATH_MAX];
10953	int ret;
10954
10955	ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10956		       tracefs_path(), tp_category, tp_name);
10957	if (ret < 0)
10958		return -errno;
10959	if (ret >= sizeof(file)) {
10960		pr_debug("tracepoint %s/%s path is too long\n",
10961			 tp_category, tp_name);
10962		return -E2BIG;
10963	}
10964	return parse_uint_from_file(file, "%d\n");
10965}
10966
10967static int perf_event_open_tracepoint(const char *tp_category,
10968				      const char *tp_name)
10969{
10970	struct perf_event_attr attr = {};
10971	char errmsg[STRERR_BUFSIZE];
10972	int tp_id, pfd, err;
10973
10974	tp_id = determine_tracepoint_id(tp_category, tp_name);
10975	if (tp_id < 0) {
10976		pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
10977			tp_category, tp_name,
10978			libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
10979		return tp_id;
10980	}
10981
10982	attr.type = PERF_TYPE_TRACEPOINT;
10983	attr.size = sizeof(attr);
10984	attr.config = tp_id;
10985
10986	pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
10987		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10988	if (pfd < 0) {
10989		err = -errno;
10990		pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
10991			tp_category, tp_name,
10992			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10993		return err;
10994	}
10995	return pfd;
10996}
10997
10998struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
10999						     const char *tp_category,
11000						     const char *tp_name,
11001						     const struct bpf_tracepoint_opts *opts)
11002{
11003	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11004	char errmsg[STRERR_BUFSIZE];
11005	struct bpf_link *link;
11006	int pfd, err;
11007
11008	if (!OPTS_VALID(opts, bpf_tracepoint_opts))
11009		return libbpf_err_ptr(-EINVAL);
11010
11011	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11012
11013	pfd = perf_event_open_tracepoint(tp_category, tp_name);
11014	if (pfd < 0) {
11015		pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
11016			prog->name, tp_category, tp_name,
11017			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11018		return libbpf_err_ptr(pfd);
11019	}
11020	link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11021	err = libbpf_get_error(link);
11022	if (err) {
11023		close(pfd);
11024		pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
11025			prog->name, tp_category, tp_name,
11026			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11027		return libbpf_err_ptr(err);
11028	}
11029	return link;
11030}
11031
11032struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
11033						const char *tp_category,
11034						const char *tp_name)
11035{
11036	return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
11037}
11038
11039static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11040{
11041	char *sec_name, *tp_cat, *tp_name;
11042
11043	*link = NULL;
11044
11045	/* no auto-attach for SEC("tp") or SEC("tracepoint") */
11046	if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
11047		return 0;
11048
11049	sec_name = strdup(prog->sec_name);
11050	if (!sec_name)
11051		return -ENOMEM;
11052
11053	/* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
11054	if (str_has_pfx(prog->sec_name, "tp/"))
11055		tp_cat = sec_name + sizeof("tp/") - 1;
11056	else
11057		tp_cat = sec_name + sizeof("tracepoint/") - 1;
11058	tp_name = strchr(tp_cat, '/');
11059	if (!tp_name) {
11060		free(sec_name);
11061		return -EINVAL;
11062	}
11063	*tp_name = '\0';
11064	tp_name++;
11065
11066	*link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
11067	free(sec_name);
11068	return libbpf_get_error(*link);
11069}
11070
11071struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
11072						    const char *tp_name)
11073{
11074	char errmsg[STRERR_BUFSIZE];
11075	struct bpf_link *link;
11076	int prog_fd, pfd;
11077
11078	prog_fd = bpf_program__fd(prog);
11079	if (prog_fd < 0) {
11080		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11081		return libbpf_err_ptr(-EINVAL);
11082	}
11083
11084	link = calloc(1, sizeof(*link));
11085	if (!link)
11086		return libbpf_err_ptr(-ENOMEM);
11087	link->detach = &bpf_link__detach_fd;
11088
11089	pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
11090	if (pfd < 0) {
11091		pfd = -errno;
11092		free(link);
11093		pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
11094			prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11095		return libbpf_err_ptr(pfd);
11096	}
11097	link->fd = pfd;
11098	return link;
11099}
11100
11101static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11102{
11103	static const char *const prefixes[] = {
11104		"raw_tp",
11105		"raw_tracepoint",
11106		"raw_tp.w",
11107		"raw_tracepoint.w",
11108	};
11109	size_t i;
11110	const char *tp_name = NULL;
11111
11112	*link = NULL;
11113
11114	for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
11115		size_t pfx_len;
11116
11117		if (!str_has_pfx(prog->sec_name, prefixes[i]))
11118			continue;
11119
11120		pfx_len = strlen(prefixes[i]);
11121		/* no auto-attach case of, e.g., SEC("raw_tp") */
11122		if (prog->sec_name[pfx_len] == '\0')
11123			return 0;
11124
11125		if (prog->sec_name[pfx_len] != '/')
11126			continue;
11127
11128		tp_name = prog->sec_name + pfx_len + 1;
11129		break;
11130	}
11131
11132	if (!tp_name) {
11133		pr_warn("prog '%s': invalid section name '%s'\n",
11134			prog->name, prog->sec_name);
11135		return -EINVAL;
11136	}
11137
11138	*link = bpf_program__attach_raw_tracepoint(prog, tp_name);
11139	return libbpf_get_error(link);
11140}
11141
11142/* Common logic for all BPF program types that attach to a btf_id */
11143static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
11144						   const struct bpf_trace_opts *opts)
11145{
11146	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
11147	char errmsg[STRERR_BUFSIZE];
11148	struct bpf_link *link;
11149	int prog_fd, pfd;
11150
11151	if (!OPTS_VALID(opts, bpf_trace_opts))
11152		return libbpf_err_ptr(-EINVAL);
11153
11154	prog_fd = bpf_program__fd(prog);
11155	if (prog_fd < 0) {
11156		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11157		return libbpf_err_ptr(-EINVAL);
11158	}
11159
11160	link = calloc(1, sizeof(*link));
11161	if (!link)
11162		return libbpf_err_ptr(-ENOMEM);
11163	link->detach = &bpf_link__detach_fd;
11164
11165	/* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
11166	link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
11167	pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
11168	if (pfd < 0) {
11169		pfd = -errno;
11170		free(link);
11171		pr_warn("prog '%s': failed to attach: %s\n",
11172			prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11173		return libbpf_err_ptr(pfd);
11174	}
11175	link->fd = pfd;
11176	return link;
11177}
11178
11179struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
11180{
11181	return bpf_program__attach_btf_id(prog, NULL);
11182}
11183
11184struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
11185						const struct bpf_trace_opts *opts)
11186{
11187	return bpf_program__attach_btf_id(prog, opts);
11188}
11189
11190struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
11191{
11192	return bpf_program__attach_btf_id(prog, NULL);
11193}
11194
11195static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11196{
11197	*link = bpf_program__attach_trace(prog);
11198	return libbpf_get_error(*link);
11199}
11200
11201static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11202{
11203	*link = bpf_program__attach_lsm(prog);
11204	return libbpf_get_error(*link);
11205}
11206
11207static struct bpf_link *
11208bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
11209		       const char *target_name)
11210{
11211	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
11212			    .target_btf_id = btf_id);
11213	enum bpf_attach_type attach_type;
11214	char errmsg[STRERR_BUFSIZE];
11215	struct bpf_link *link;
11216	int prog_fd, link_fd;
11217
11218	prog_fd = bpf_program__fd(prog);
11219	if (prog_fd < 0) {
11220		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11221		return libbpf_err_ptr(-EINVAL);
11222	}
11223
11224	link = calloc(1, sizeof(*link));
11225	if (!link)
11226		return libbpf_err_ptr(-ENOMEM);
11227	link->detach = &bpf_link__detach_fd;
11228
11229	attach_type = bpf_program__expected_attach_type(prog);
11230	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
11231	if (link_fd < 0) {
11232		link_fd = -errno;
11233		free(link);
11234		pr_warn("prog '%s': failed to attach to %s: %s\n",
11235			prog->name, target_name,
11236			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11237		return libbpf_err_ptr(link_fd);
11238	}
11239	link->fd = link_fd;
11240	return link;
11241}
11242
11243struct bpf_link *
11244bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
11245{
11246	return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
11247}
11248
11249struct bpf_link *
11250bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
11251{
11252	return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
11253}
11254
11255struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
11256{
11257	/* target_fd/target_ifindex use the same field in LINK_CREATE */
11258	return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
11259}
11260
11261struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
11262					      int target_fd,
11263					      const char *attach_func_name)
11264{
11265	int btf_id;
11266
11267	if (!!target_fd != !!attach_func_name) {
11268		pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
11269			prog->name);
11270		return libbpf_err_ptr(-EINVAL);
11271	}
11272
11273	if (prog->type != BPF_PROG_TYPE_EXT) {
11274		pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
11275			prog->name);
11276		return libbpf_err_ptr(-EINVAL);
11277	}
11278
11279	if (target_fd) {
11280		btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
11281		if (btf_id < 0)
11282			return libbpf_err_ptr(btf_id);
11283
11284		return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
11285	} else {
11286		/* no target, so use raw_tracepoint_open for compatibility
11287		 * with old kernels
11288		 */
11289		return bpf_program__attach_trace(prog);
11290	}
11291}
11292
11293struct bpf_link *
11294bpf_program__attach_iter(const struct bpf_program *prog,
11295			 const struct bpf_iter_attach_opts *opts)
11296{
11297	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
11298	char errmsg[STRERR_BUFSIZE];
11299	struct bpf_link *link;
11300	int prog_fd, link_fd;
11301	__u32 target_fd = 0;
11302
11303	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
11304		return libbpf_err_ptr(-EINVAL);
11305
11306	link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
11307	link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
11308
11309	prog_fd = bpf_program__fd(prog);
11310	if (prog_fd < 0) {
11311		pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11312		return libbpf_err_ptr(-EINVAL);
11313	}
11314
11315	link = calloc(1, sizeof(*link));
11316	if (!link)
11317		return libbpf_err_ptr(-ENOMEM);
11318	link->detach = &bpf_link__detach_fd;
11319
11320	link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
11321				  &link_create_opts);
11322	if (link_fd < 0) {
11323		link_fd = -errno;
11324		free(link);
11325		pr_warn("prog '%s': failed to attach to iterator: %s\n",
11326			prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11327		return libbpf_err_ptr(link_fd);
11328	}
11329	link->fd = link_fd;
11330	return link;
11331}
11332
11333static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11334{
11335	*link = bpf_program__attach_iter(prog, NULL);
11336	return libbpf_get_error(*link);
11337}
11338
11339struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
11340{
11341	struct bpf_link *link = NULL;
11342	int err;
11343
11344	if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
11345		return libbpf_err_ptr(-EOPNOTSUPP);
11346
11347	err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
11348	if (err)
11349		return libbpf_err_ptr(err);
11350
11351	/* When calling bpf_program__attach() explicitly, auto-attach support
11352	 * is expected to work, so NULL returned link is considered an error.
11353	 * This is different for skeleton's attach, see comment in
11354	 * bpf_object__attach_skeleton().
11355	 */
11356	if (!link)
11357		return libbpf_err_ptr(-EOPNOTSUPP);
11358
11359	return link;
11360}
11361
11362static int bpf_link__detach_struct_ops(struct bpf_link *link)
11363{
11364	__u32 zero = 0;
11365
11366	if (bpf_map_delete_elem(link->fd, &zero))
11367		return -errno;
11368
11369	return 0;
11370}
11371
11372struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
11373{
11374	struct bpf_struct_ops *st_ops;
11375	struct bpf_link *link;
11376	__u32 i, zero = 0;
11377	int err;
11378
11379	if (!bpf_map__is_struct_ops(map) || map->fd == -1)
11380		return libbpf_err_ptr(-EINVAL);
11381
11382	link = calloc(1, sizeof(*link));
11383	if (!link)
11384		return libbpf_err_ptr(-EINVAL);
11385
11386	st_ops = map->st_ops;
11387	for (i = 0; i < btf_vlen(st_ops->type); i++) {
11388		struct bpf_program *prog = st_ops->progs[i];
11389		void *kern_data;
11390		int prog_fd;
11391
11392		if (!prog)
11393			continue;
11394
11395		prog_fd = bpf_program__fd(prog);
11396		kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
11397		*(unsigned long *)kern_data = prog_fd;
11398	}
11399
11400	err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
11401	if (err) {
11402		err = -errno;
11403		free(link);
11404		return libbpf_err_ptr(err);
11405	}
11406
11407	link->detach = bpf_link__detach_struct_ops;
11408	link->fd = map->fd;
11409
11410	return link;
11411}
11412
11413typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
11414							  void *private_data);
11415
11416static enum bpf_perf_event_ret
11417perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
11418		       void **copy_mem, size_t *copy_size,
11419		       bpf_perf_event_print_t fn, void *private_data)
11420{
11421	struct perf_event_mmap_page *header = mmap_mem;
11422	__u64 data_head = ring_buffer_read_head(header);
11423	__u64 data_tail = header->data_tail;
11424	void *base = ((__u8 *)header) + page_size;
11425	int ret = LIBBPF_PERF_EVENT_CONT;
11426	struct perf_event_header *ehdr;
11427	size_t ehdr_size;
11428
11429	while (data_head != data_tail) {
11430		ehdr = base + (data_tail & (mmap_size - 1));
11431		ehdr_size = ehdr->size;
11432
11433		if (((void *)ehdr) + ehdr_size > base + mmap_size) {
11434			void *copy_start = ehdr;
11435			size_t len_first = base + mmap_size - copy_start;
11436			size_t len_secnd = ehdr_size - len_first;
11437
11438			if (*copy_size < ehdr_size) {
11439				free(*copy_mem);
11440				*copy_mem = malloc(ehdr_size);
11441				if (!*copy_mem) {
11442					*copy_size = 0;
11443					ret = LIBBPF_PERF_EVENT_ERROR;
11444					break;
11445				}
11446				*copy_size = ehdr_size;
11447			}
11448
11449			memcpy(*copy_mem, copy_start, len_first);
11450			memcpy(*copy_mem + len_first, base, len_secnd);
11451			ehdr = *copy_mem;
11452		}
11453
11454		ret = fn(ehdr, private_data);
11455		data_tail += ehdr_size;
11456		if (ret != LIBBPF_PERF_EVENT_CONT)
11457			break;
11458	}
11459
11460	ring_buffer_write_tail(header, data_tail);
11461	return libbpf_err(ret);
11462}
11463
11464struct perf_buffer;
11465
11466struct perf_buffer_params {
11467	struct perf_event_attr *attr;
11468	/* if event_cb is specified, it takes precendence */
11469	perf_buffer_event_fn event_cb;
11470	/* sample_cb and lost_cb are higher-level common-case callbacks */
11471	perf_buffer_sample_fn sample_cb;
11472	perf_buffer_lost_fn lost_cb;
11473	void *ctx;
11474	int cpu_cnt;
11475	int *cpus;
11476	int *map_keys;
11477};
11478
11479struct perf_cpu_buf {
11480	struct perf_buffer *pb;
11481	void *base; /* mmap()'ed memory */
11482	void *buf; /* for reconstructing segmented data */
11483	size_t buf_size;
11484	int fd;
11485	int cpu;
11486	int map_key;
11487};
11488
11489struct perf_buffer {
11490	perf_buffer_event_fn event_cb;
11491	perf_buffer_sample_fn sample_cb;
11492	perf_buffer_lost_fn lost_cb;
11493	void *ctx; /* passed into callbacks */
11494
11495	size_t page_size;
11496	size_t mmap_size;
11497	struct perf_cpu_buf **cpu_bufs;
11498	struct epoll_event *events;
11499	int cpu_cnt; /* number of allocated CPU buffers */
11500	int epoll_fd; /* perf event FD */
11501	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
11502};
11503
11504static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
11505				      struct perf_cpu_buf *cpu_buf)
11506{
11507	if (!cpu_buf)
11508		return;
11509	if (cpu_buf->base &&
11510	    munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
11511		pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
11512	if (cpu_buf->fd >= 0) {
11513		ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
11514		close(cpu_buf->fd);
11515	}
11516	free(cpu_buf->buf);
11517	free(cpu_buf);
11518}
11519
11520void perf_buffer__free(struct perf_buffer *pb)
11521{
11522	int i;
11523
11524	if (IS_ERR_OR_NULL(pb))
11525		return;
11526	if (pb->cpu_bufs) {
11527		for (i = 0; i < pb->cpu_cnt; i++) {
11528			struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11529
11530			if (!cpu_buf)
11531				continue;
11532
11533			bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
11534			perf_buffer__free_cpu_buf(pb, cpu_buf);
11535		}
11536		free(pb->cpu_bufs);
11537	}
11538	if (pb->epoll_fd >= 0)
11539		close(pb->epoll_fd);
11540	free(pb->events);
11541	free(pb);
11542}
11543
11544static struct perf_cpu_buf *
11545perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
11546			  int cpu, int map_key)
11547{
11548	struct perf_cpu_buf *cpu_buf;
11549	char msg[STRERR_BUFSIZE];
11550	int err;
11551
11552	cpu_buf = calloc(1, sizeof(*cpu_buf));
11553	if (!cpu_buf)
11554		return ERR_PTR(-ENOMEM);
11555
11556	cpu_buf->pb = pb;
11557	cpu_buf->cpu = cpu;
11558	cpu_buf->map_key = map_key;
11559
11560	cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
11561			      -1, PERF_FLAG_FD_CLOEXEC);
11562	if (cpu_buf->fd < 0) {
11563		err = -errno;
11564		pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
11565			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11566		goto error;
11567	}
11568
11569	cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
11570			     PROT_READ | PROT_WRITE, MAP_SHARED,
11571			     cpu_buf->fd, 0);
11572	if (cpu_buf->base == MAP_FAILED) {
11573		cpu_buf->base = NULL;
11574		err = -errno;
11575		pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
11576			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11577		goto error;
11578	}
11579
11580	if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11581		err = -errno;
11582		pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
11583			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11584		goto error;
11585	}
11586
11587	return cpu_buf;
11588
11589error:
11590	perf_buffer__free_cpu_buf(pb, cpu_buf);
11591	return (struct perf_cpu_buf *)ERR_PTR(err);
11592}
11593
11594static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11595					      struct perf_buffer_params *p);
11596
11597struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
11598				     perf_buffer_sample_fn sample_cb,
11599				     perf_buffer_lost_fn lost_cb,
11600				     void *ctx,
11601				     const struct perf_buffer_opts *opts)
11602{
11603	struct perf_buffer_params p = {};
11604	struct perf_event_attr attr = {};
11605
11606	if (!OPTS_VALID(opts, perf_buffer_opts))
11607		return libbpf_err_ptr(-EINVAL);
11608
11609	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
11610	attr.type = PERF_TYPE_SOFTWARE;
11611	attr.sample_type = PERF_SAMPLE_RAW;
11612	attr.sample_period = 1;
11613	attr.wakeup_events = 1;
11614
11615	p.attr = &attr;
11616	p.sample_cb = sample_cb;
11617	p.lost_cb = lost_cb;
11618	p.ctx = ctx;
11619
11620	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11621}
11622
11623struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
11624					 struct perf_event_attr *attr,
11625					 perf_buffer_event_fn event_cb, void *ctx,
11626					 const struct perf_buffer_raw_opts *opts)
11627{
11628	struct perf_buffer_params p = {};
11629
11630	if (!attr)
11631		return libbpf_err_ptr(-EINVAL);
11632
11633	if (!OPTS_VALID(opts, perf_buffer_raw_opts))
11634		return libbpf_err_ptr(-EINVAL);
11635
11636	p.attr = attr;
11637	p.event_cb = event_cb;
11638	p.ctx = ctx;
11639	p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
11640	p.cpus = OPTS_GET(opts, cpus, NULL);
11641	p.map_keys = OPTS_GET(opts, map_keys, NULL);
11642
11643	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11644}
11645
11646static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11647					      struct perf_buffer_params *p)
11648{
11649	const char *online_cpus_file = "/sys/devices/system/cpu/online";
11650	struct bpf_map_info map;
11651	char msg[STRERR_BUFSIZE];
11652	struct perf_buffer *pb;
11653	bool *online = NULL;
11654	__u32 map_info_len;
11655	int err, i, j, n;
11656
11657	if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
11658		pr_warn("page count should be power of two, but is %zu\n",
11659			page_cnt);
11660		return ERR_PTR(-EINVAL);
11661	}
11662
11663	/* best-effort sanity checks */
11664	memset(&map, 0, sizeof(map));
11665	map_info_len = sizeof(map);
11666	err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
11667	if (err) {
11668		err = -errno;
11669		/* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
11670		 * -EBADFD, -EFAULT, or -E2BIG on real error
11671		 */
11672		if (err != -EINVAL) {
11673			pr_warn("failed to get map info for map FD %d: %s\n",
11674				map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
11675			return ERR_PTR(err);
11676		}
11677		pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
11678			 map_fd);
11679	} else {
11680		if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
11681			pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
11682				map.name);
11683			return ERR_PTR(-EINVAL);
11684		}
11685	}
11686
11687	pb = calloc(1, sizeof(*pb));
11688	if (!pb)
11689		return ERR_PTR(-ENOMEM);
11690
11691	pb->event_cb = p->event_cb;
11692	pb->sample_cb = p->sample_cb;
11693	pb->lost_cb = p->lost_cb;
11694	pb->ctx = p->ctx;
11695
11696	pb->page_size = getpagesize();
11697	pb->mmap_size = pb->page_size * page_cnt;
11698	pb->map_fd = map_fd;
11699
11700	pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
11701	if (pb->epoll_fd < 0) {
11702		err = -errno;
11703		pr_warn("failed to create epoll instance: %s\n",
11704			libbpf_strerror_r(err, msg, sizeof(msg)));
11705		goto error;
11706	}
11707
11708	if (p->cpu_cnt > 0) {
11709		pb->cpu_cnt = p->cpu_cnt;
11710	} else {
11711		pb->cpu_cnt = libbpf_num_possible_cpus();
11712		if (pb->cpu_cnt < 0) {
11713			err = pb->cpu_cnt;
11714			goto error;
11715		}
11716		if (map.max_entries && map.max_entries < pb->cpu_cnt)
11717			pb->cpu_cnt = map.max_entries;
11718	}
11719
11720	pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
11721	if (!pb->events) {
11722		err = -ENOMEM;
11723		pr_warn("failed to allocate events: out of memory\n");
11724		goto error;
11725	}
11726	pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
11727	if (!pb->cpu_bufs) {
11728		err = -ENOMEM;
11729		pr_warn("failed to allocate buffers: out of memory\n");
11730		goto error;
11731	}
11732
11733	err = parse_cpu_mask_file(online_cpus_file, &online, &n);
11734	if (err) {
11735		pr_warn("failed to get online CPU mask: %d\n", err);
11736		goto error;
11737	}
11738
11739	for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
11740		struct perf_cpu_buf *cpu_buf;
11741		int cpu, map_key;
11742
11743		cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
11744		map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
11745
11746		/* in case user didn't explicitly requested particular CPUs to
11747		 * be attached to, skip offline/not present CPUs
11748		 */
11749		if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
11750			continue;
11751
11752		cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
11753		if (IS_ERR(cpu_buf)) {
11754			err = PTR_ERR(cpu_buf);
11755			goto error;
11756		}
11757
11758		pb->cpu_bufs[j] = cpu_buf;
11759
11760		err = bpf_map_update_elem(pb->map_fd, &map_key,
11761					  &cpu_buf->fd, 0);
11762		if (err) {
11763			err = -errno;
11764			pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
11765				cpu, map_key, cpu_buf->fd,
11766				libbpf_strerror_r(err, msg, sizeof(msg)));
11767			goto error;
11768		}
11769
11770		pb->events[j].events = EPOLLIN;
11771		pb->events[j].data.ptr = cpu_buf;
11772		if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
11773			      &pb->events[j]) < 0) {
11774			err = -errno;
11775			pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
11776				cpu, cpu_buf->fd,
11777				libbpf_strerror_r(err, msg, sizeof(msg)));
11778			goto error;
11779		}
11780		j++;
11781	}
11782	pb->cpu_cnt = j;
11783	free(online);
11784
11785	return pb;
11786
11787error:
11788	free(online);
11789	if (pb)
11790		perf_buffer__free(pb);
11791	return ERR_PTR(err);
11792}
11793
11794struct perf_sample_raw {
11795	struct perf_event_header header;
11796	uint32_t size;
11797	char data[];
11798};
11799
11800struct perf_sample_lost {
11801	struct perf_event_header header;
11802	uint64_t id;
11803	uint64_t lost;
11804	uint64_t sample_id;
11805};
11806
11807static enum bpf_perf_event_ret
11808perf_buffer__process_record(struct perf_event_header *e, void *ctx)
11809{
11810	struct perf_cpu_buf *cpu_buf = ctx;
11811	struct perf_buffer *pb = cpu_buf->pb;
11812	void *data = e;
11813
11814	/* user wants full control over parsing perf event */
11815	if (pb->event_cb)
11816		return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
11817
11818	switch (e->type) {
11819	case PERF_RECORD_SAMPLE: {
11820		struct perf_sample_raw *s = data;
11821
11822		if (pb->sample_cb)
11823			pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
11824		break;
11825	}
11826	case PERF_RECORD_LOST: {
11827		struct perf_sample_lost *s = data;
11828
11829		if (pb->lost_cb)
11830			pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
11831		break;
11832	}
11833	default:
11834		pr_warn("unknown perf sample type %d\n", e->type);
11835		return LIBBPF_PERF_EVENT_ERROR;
11836	}
11837	return LIBBPF_PERF_EVENT_CONT;
11838}
11839
11840static int perf_buffer__process_records(struct perf_buffer *pb,
11841					struct perf_cpu_buf *cpu_buf)
11842{
11843	enum bpf_perf_event_ret ret;
11844
11845	ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
11846				     pb->page_size, &cpu_buf->buf,
11847				     &cpu_buf->buf_size,
11848				     perf_buffer__process_record, cpu_buf);
11849	if (ret != LIBBPF_PERF_EVENT_CONT)
11850		return ret;
11851	return 0;
11852}
11853
11854int perf_buffer__epoll_fd(const struct perf_buffer *pb)
11855{
11856	return pb->epoll_fd;
11857}
11858
11859int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
11860{
11861	int i, cnt, err;
11862
11863	cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
11864	if (cnt < 0)
11865		return -errno;
11866
11867	for (i = 0; i < cnt; i++) {
11868		struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
11869
11870		err = perf_buffer__process_records(pb, cpu_buf);
11871		if (err) {
11872			pr_warn("error while processing records: %d\n", err);
11873			return libbpf_err(err);
11874		}
11875	}
11876	return cnt;
11877}
11878
11879/* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
11880 * manager.
11881 */
11882size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
11883{
11884	return pb->cpu_cnt;
11885}
11886
11887/*
11888 * Return perf_event FD of a ring buffer in *buf_idx* slot of
11889 * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
11890 * select()/poll()/epoll() Linux syscalls.
11891 */
11892int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
11893{
11894	struct perf_cpu_buf *cpu_buf;
11895
11896	if (buf_idx >= pb->cpu_cnt)
11897		return libbpf_err(-EINVAL);
11898
11899	cpu_buf = pb->cpu_bufs[buf_idx];
11900	if (!cpu_buf)
11901		return libbpf_err(-ENOENT);
11902
11903	return cpu_buf->fd;
11904}
11905
11906int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
11907{
11908	struct perf_cpu_buf *cpu_buf;
11909
11910	if (buf_idx >= pb->cpu_cnt)
11911		return libbpf_err(-EINVAL);
11912
11913	cpu_buf = pb->cpu_bufs[buf_idx];
11914	if (!cpu_buf)
11915		return libbpf_err(-ENOENT);
11916
11917	*buf = cpu_buf->base;
11918	*buf_size = pb->mmap_size;
11919	return 0;
11920}
11921
11922/*
11923 * Consume data from perf ring buffer corresponding to slot *buf_idx* in
11924 * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
11925 * consume, do nothing and return success.
11926 * Returns:
11927 *   - 0 on success;
11928 *   - <0 on failure.
11929 */
11930int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
11931{
11932	struct perf_cpu_buf *cpu_buf;
11933
11934	if (buf_idx >= pb->cpu_cnt)
11935		return libbpf_err(-EINVAL);
11936
11937	cpu_buf = pb->cpu_bufs[buf_idx];
11938	if (!cpu_buf)
11939		return libbpf_err(-ENOENT);
11940
11941	return perf_buffer__process_records(pb, cpu_buf);
11942}
11943
11944int perf_buffer__consume(struct perf_buffer *pb)
11945{
11946	int i, err;
11947
11948	for (i = 0; i < pb->cpu_cnt; i++) {
11949		struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11950
11951		if (!cpu_buf)
11952			continue;
11953
11954		err = perf_buffer__process_records(pb, cpu_buf);
11955		if (err) {
11956			pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
11957			return libbpf_err(err);
11958		}
11959	}
11960	return 0;
11961}
11962
11963int bpf_program__set_attach_target(struct bpf_program *prog,
11964				   int attach_prog_fd,
11965				   const char *attach_func_name)
11966{
11967	int btf_obj_fd = 0, btf_id = 0, err;
11968
11969	if (!prog || attach_prog_fd < 0)
11970		return libbpf_err(-EINVAL);
11971
11972	if (prog->obj->loaded)
11973		return libbpf_err(-EINVAL);
11974
11975	if (attach_prog_fd && !attach_func_name) {
11976		/* remember attach_prog_fd and let bpf_program__load() find
11977		 * BTF ID during the program load
11978		 */
11979		prog->attach_prog_fd = attach_prog_fd;
11980		return 0;
11981	}
11982
11983	if (attach_prog_fd) {
11984		btf_id = libbpf_find_prog_btf_id(attach_func_name,
11985						 attach_prog_fd);
11986		if (btf_id < 0)
11987			return libbpf_err(btf_id);
11988	} else {
11989		if (!attach_func_name)
11990			return libbpf_err(-EINVAL);
11991
11992		/* load btf_vmlinux, if not yet */
11993		err = bpf_object__load_vmlinux_btf(prog->obj, true);
11994		if (err)
11995			return libbpf_err(err);
11996		err = find_kernel_btf_id(prog->obj, attach_func_name,
11997					 prog->expected_attach_type,
11998					 &btf_obj_fd, &btf_id);
11999		if (err)
12000			return libbpf_err(err);
12001	}
12002
12003	prog->attach_btf_id = btf_id;
12004	prog->attach_btf_obj_fd = btf_obj_fd;
12005	prog->attach_prog_fd = attach_prog_fd;
12006	return 0;
12007}
12008
12009int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
12010{
12011	int err = 0, n, len, start, end = -1;
12012	bool *tmp;
12013
12014	*mask = NULL;
12015	*mask_sz = 0;
12016
12017	/* Each sub string separated by ',' has format \d+-\d+ or \d+ */
12018	while (*s) {
12019		if (*s == ',' || *s == '\n') {
12020			s++;
12021			continue;
12022		}
12023		n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
12024		if (n <= 0 || n > 2) {
12025			pr_warn("Failed to get CPU range %s: %d\n", s, n);
12026			err = -EINVAL;
12027			goto cleanup;
12028		} else if (n == 1) {
12029			end = start;
12030		}
12031		if (start < 0 || start > end) {
12032			pr_warn("Invalid CPU range [%d,%d] in %s\n",
12033				start, end, s);
12034			err = -EINVAL;
12035			goto cleanup;
12036		}
12037		tmp = realloc(*mask, end + 1);
12038		if (!tmp) {
12039			err = -ENOMEM;
12040			goto cleanup;
12041		}
12042		*mask = tmp;
12043		memset(tmp + *mask_sz, 0, start - *mask_sz);
12044		memset(tmp + start, 1, end - start + 1);
12045		*mask_sz = end + 1;
12046		s += len;
12047	}
12048	if (!*mask_sz) {
12049		pr_warn("Empty CPU range\n");
12050		return -EINVAL;
12051	}
12052	return 0;
12053cleanup:
12054	free(*mask);
12055	*mask = NULL;
12056	return err;
12057}
12058
12059int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
12060{
12061	int fd, err = 0, len;
12062	char buf[128];
12063
12064	fd = open(fcpu, O_RDONLY | O_CLOEXEC);
12065	if (fd < 0) {
12066		err = -errno;
12067		pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
12068		return err;
12069	}
12070	len = read(fd, buf, sizeof(buf));
12071	close(fd);
12072	if (len <= 0) {
12073		err = len ? -errno : -EINVAL;
12074		pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
12075		return err;
12076	}
12077	if (len >= sizeof(buf)) {
12078		pr_warn("CPU mask is too big in file %s\n", fcpu);
12079		return -E2BIG;
12080	}
12081	buf[len] = '\0';
12082
12083	return parse_cpu_mask_str(buf, mask, mask_sz);
12084}
12085
12086int libbpf_num_possible_cpus(void)
12087{
12088	static const char *fcpu = "/sys/devices/system/cpu/possible";
12089	static int cpus;
12090	int err, n, i, tmp_cpus;
12091	bool *mask;
12092
12093	tmp_cpus = READ_ONCE(cpus);
12094	if (tmp_cpus > 0)
12095		return tmp_cpus;
12096
12097	err = parse_cpu_mask_file(fcpu, &mask, &n);
12098	if (err)
12099		return libbpf_err(err);
12100
12101	tmp_cpus = 0;
12102	for (i = 0; i < n; i++) {
12103		if (mask[i])
12104			tmp_cpus++;
12105	}
12106	free(mask);
12107
12108	WRITE_ONCE(cpus, tmp_cpus);
12109	return tmp_cpus;
12110}
12111
12112static int populate_skeleton_maps(const struct bpf_object *obj,
12113				  struct bpf_map_skeleton *maps,
12114				  size_t map_cnt)
12115{
12116	int i;
12117
12118	for (i = 0; i < map_cnt; i++) {
12119		struct bpf_map **map = maps[i].map;
12120		const char *name = maps[i].name;
12121		void **mmaped = maps[i].mmaped;
12122
12123		*map = bpf_object__find_map_by_name(obj, name);
12124		if (!*map) {
12125			pr_warn("failed to find skeleton map '%s'\n", name);
12126			return -ESRCH;
12127		}
12128
12129		/* externs shouldn't be pre-setup from user code */
12130		if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
12131			*mmaped = (*map)->mmaped;
12132	}
12133	return 0;
12134}
12135
12136static int populate_skeleton_progs(const struct bpf_object *obj,
12137				   struct bpf_prog_skeleton *progs,
12138				   size_t prog_cnt)
12139{
12140	int i;
12141
12142	for (i = 0; i < prog_cnt; i++) {
12143		struct bpf_program **prog = progs[i].prog;
12144		const char *name = progs[i].name;
12145
12146		*prog = bpf_object__find_program_by_name(obj, name);
12147		if (!*prog) {
12148			pr_warn("failed to find skeleton program '%s'\n", name);
12149			return -ESRCH;
12150		}
12151	}
12152	return 0;
12153}
12154
12155int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
12156			      const struct bpf_object_open_opts *opts)
12157{
12158	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
12159		.object_name = s->name,
12160	);
12161	struct bpf_object *obj;
12162	int err;
12163
12164	/* Attempt to preserve opts->object_name, unless overriden by user
12165	 * explicitly. Overwriting object name for skeletons is discouraged,
12166	 * as it breaks global data maps, because they contain object name
12167	 * prefix as their own map name prefix. When skeleton is generated,
12168	 * bpftool is making an assumption that this name will stay the same.
12169	 */
12170	if (opts) {
12171		memcpy(&skel_opts, opts, sizeof(*opts));
12172		if (!opts->object_name)
12173			skel_opts.object_name = s->name;
12174	}
12175
12176	obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
12177	err = libbpf_get_error(obj);
12178	if (err) {
12179		pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
12180			s->name, err);
12181		return libbpf_err(err);
12182	}
12183
12184	*s->obj = obj;
12185	err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
12186	if (err) {
12187		pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
12188		return libbpf_err(err);
12189	}
12190
12191	err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
12192	if (err) {
12193		pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
12194		return libbpf_err(err);
12195	}
12196
12197	return 0;
12198}
12199
12200int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
12201{
12202	int err, len, var_idx, i;
12203	const char *var_name;
12204	const struct bpf_map *map;
12205	struct btf *btf;
12206	__u32 map_type_id;
12207	const struct btf_type *map_type, *var_type;
12208	const struct bpf_var_skeleton *var_skel;
12209	struct btf_var_secinfo *var;
12210
12211	if (!s->obj)
12212		return libbpf_err(-EINVAL);
12213
12214	btf = bpf_object__btf(s->obj);
12215	if (!btf) {
12216		pr_warn("subskeletons require BTF at runtime (object %s)\n",
12217		        bpf_object__name(s->obj));
12218		return libbpf_err(-errno);
12219	}
12220
12221	err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
12222	if (err) {
12223		pr_warn("failed to populate subskeleton maps: %d\n", err);
12224		return libbpf_err(err);
12225	}
12226
12227	err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
12228	if (err) {
12229		pr_warn("failed to populate subskeleton maps: %d\n", err);
12230		return libbpf_err(err);
12231	}
12232
12233	for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
12234		var_skel = &s->vars[var_idx];
12235		map = *var_skel->map;
12236		map_type_id = bpf_map__btf_value_type_id(map);
12237		map_type = btf__type_by_id(btf, map_type_id);
12238
12239		if (!btf_is_datasec(map_type)) {
12240			pr_warn("type for map '%1$s' is not a datasec: %2$s",
12241				bpf_map__name(map),
12242				__btf_kind_str(btf_kind(map_type)));
12243			return libbpf_err(-EINVAL);
12244		}
12245
12246		len = btf_vlen(map_type);
12247		var = btf_var_secinfos(map_type);
12248		for (i = 0; i < len; i++, var++) {
12249			var_type = btf__type_by_id(btf, var->type);
12250			var_name = btf__name_by_offset(btf, var_type->name_off);
12251			if (strcmp(var_name, var_skel->name) == 0) {
12252				*var_skel->addr = map->mmaped + var->offset;
12253				break;
12254			}
12255		}
12256	}
12257	return 0;
12258}
12259
12260void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
12261{
12262	if (!s)
12263		return;
12264	free(s->maps);
12265	free(s->progs);
12266	free(s->vars);
12267	free(s);
12268}
12269
12270int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
12271{
12272	int i, err;
12273
12274	err = bpf_object__load(*s->obj);
12275	if (err) {
12276		pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
12277		return libbpf_err(err);
12278	}
12279
12280	for (i = 0; i < s->map_cnt; i++) {
12281		struct bpf_map *map = *s->maps[i].map;
12282		size_t mmap_sz = bpf_map_mmap_sz(map);
12283		int prot, map_fd = bpf_map__fd(map);
12284		void **mmaped = s->maps[i].mmaped;
12285
12286		if (!mmaped)
12287			continue;
12288
12289		if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
12290			*mmaped = NULL;
12291			continue;
12292		}
12293
12294		if (map->def.map_flags & BPF_F_RDONLY_PROG)
12295			prot = PROT_READ;
12296		else
12297			prot = PROT_READ | PROT_WRITE;
12298
12299		/* Remap anonymous mmap()-ed "map initialization image" as
12300		 * a BPF map-backed mmap()-ed memory, but preserving the same
12301		 * memory address. This will cause kernel to change process'
12302		 * page table to point to a different piece of kernel memory,
12303		 * but from userspace point of view memory address (and its
12304		 * contents, being identical at this point) will stay the
12305		 * same. This mapping will be released by bpf_object__close()
12306		 * as per normal clean up procedure, so we don't need to worry
12307		 * about it from skeleton's clean up perspective.
12308		 */
12309		*mmaped = mmap(map->mmaped, mmap_sz, prot,
12310				MAP_SHARED | MAP_FIXED, map_fd, 0);
12311		if (*mmaped == MAP_FAILED) {
12312			err = -errno;
12313			*mmaped = NULL;
12314			pr_warn("failed to re-mmap() map '%s': %d\n",
12315				 bpf_map__name(map), err);
12316			return libbpf_err(err);
12317		}
12318	}
12319
12320	return 0;
12321}
12322
12323int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
12324{
12325	int i, err;
12326
12327	for (i = 0; i < s->prog_cnt; i++) {
12328		struct bpf_program *prog = *s->progs[i].prog;
12329		struct bpf_link **link = s->progs[i].link;
12330
12331		if (!prog->autoload)
12332			continue;
12333
12334		/* auto-attaching not supported for this program */
12335		if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12336			continue;
12337
12338		/* if user already set the link manually, don't attempt auto-attach */
12339		if (*link)
12340			continue;
12341
12342		err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
12343		if (err) {
12344			pr_warn("prog '%s': failed to auto-attach: %d\n",
12345				bpf_program__name(prog), err);
12346			return libbpf_err(err);
12347		}
12348
12349		/* It's possible that for some SEC() definitions auto-attach
12350		 * is supported in some cases (e.g., if definition completely
12351		 * specifies target information), but is not in other cases.
12352		 * SEC("uprobe") is one such case. If user specified target
12353		 * binary and function name, such BPF program can be
12354		 * auto-attached. But if not, it shouldn't trigger skeleton's
12355		 * attach to fail. It should just be skipped.
12356		 * attach_fn signals such case with returning 0 (no error) and
12357		 * setting link to NULL.
12358		 */
12359	}
12360
12361	return 0;
12362}
12363
12364void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
12365{
12366	int i;
12367
12368	for (i = 0; i < s->prog_cnt; i++) {
12369		struct bpf_link **link = s->progs[i].link;
12370
12371		bpf_link__destroy(*link);
12372		*link = NULL;
12373	}
12374}
12375
12376void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
12377{
12378	if (!s)
12379		return;
12380
12381	if (s->progs)
12382		bpf_object__detach_skeleton(s);
12383	if (s->obj)
12384		bpf_object__close(*s->obj);
12385	free(s->maps);
12386	free(s->progs);
12387	free(s);
12388}