Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: Introduce fd_idx

Typical program loading sequence involves creating bpf maps and applying
map FDs into bpf instructions in various places in the bpf program.
This job is done by libbpf that is using compiler generated ELF relocations
to patch certain instruction after maps are created and BTFs are loaded.
The goal of fd_idx is to allow bpf instructions to stay immutable
after compilation. At load time the libbpf would still create maps as usual,
but it wouldn't need to patch instructions. It would store map_fds into
__u32 fd_array[] and would pass that pointer to sys_bpf(BPF_PROG_LOAD).

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210514003623.28033-9-alexei.starovoitov@gmail.com

authored by

Alexei Starovoitov and committed by
Daniel Borkmann
387544bf 2341d6bb

+61 -21
+1
include/linux/bpf_verifier.h
··· 450 450 u32 peak_states; 451 451 /* longest register parentage chain walked for liveness marking */ 452 452 u32 longest_mark_read_walk; 453 + bpfptr_t fd_array; 453 454 }; 454 455 455 456 __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
+11 -5
include/uapi/linux/bpf.h
··· 1098 1098 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have 1099 1099 * the following extensions: 1100 1100 * 1101 - * insn[0].src_reg: BPF_PSEUDO_MAP_FD 1102 - * insn[0].imm: map fd 1101 + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] 1102 + * insn[0].imm: map fd or fd_idx 1103 1103 * insn[1].imm: 0 1104 1104 * insn[0].off: 0 1105 1105 * insn[1].off: 0 ··· 1107 1107 * verifier type: CONST_PTR_TO_MAP 1108 1108 */ 1109 1109 #define BPF_PSEUDO_MAP_FD 1 1110 - /* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE 1111 - * insn[0].imm: map fd 1110 + #define BPF_PSEUDO_MAP_IDX 5 1111 + 1112 + /* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE 1113 + * insn[0].imm: map fd or fd_idx 1112 1114 * insn[1].imm: offset into value 1113 1115 * insn[0].off: 0 1114 1116 * insn[1].off: 0 1115 1117 * ldimm64 rewrite: address of map[0]+offset 1116 1118 * verifier type: PTR_TO_MAP_VALUE 1117 1119 */ 1118 - #define BPF_PSEUDO_MAP_VALUE 2 1120 + #define BPF_PSEUDO_MAP_VALUE 2 1121 + #define BPF_PSEUDO_MAP_IDX_VALUE 6 1122 + 1119 1123 /* insn[0].src_reg: BPF_PSEUDO_BTF_ID 1120 1124 * insn[0].imm: kernel btd id of VAR 1121 1125 * insn[1].imm: 0 ··· 1319 1315 /* or valid module BTF object fd or 0 to attach to vmlinux */ 1320 1316 __u32 attach_btf_obj_fd; 1321 1317 }; 1318 + __u32 :32; /* pad */ 1319 + __aligned_u64 fd_array; /* array of FDs */ 1322 1320 }; 1323 1321 1324 1322 struct { /* anonymous struct used by BPF_OBJ_* commands */
+1 -1
kernel/bpf/syscall.c
··· 2089 2089 } 2090 2090 2091 2091 /* last field in 'union bpf_attr' used by this command */ 2092 - #define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd 2092 + #define BPF_PROG_LOAD_LAST_FIELD fd_array 2093 2093 2094 2094 static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr) 2095 2095 {
+37 -10
kernel/bpf/verifier.c
··· 8915 8915 mark_reg_known_zero(env, regs, insn->dst_reg); 8916 8916 dst_reg->map_ptr = map; 8917 8917 8918 - if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { 8918 + if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || 8919 + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { 8919 8920 dst_reg->type = PTR_TO_MAP_VALUE; 8920 8921 dst_reg->off = aux->map_off; 8921 8922 if (map_value_has_spin_lock(map)) 8922 8923 dst_reg->id = ++env->id_gen; 8923 - } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 8924 + } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || 8925 + insn->src_reg == BPF_PSEUDO_MAP_IDX) { 8924 8926 dst_reg->type = CONST_PTR_TO_MAP; 8925 8927 } else { 8926 8928 verbose(env, "bpf verifier is misconfigured\n"); ··· 11175 11173 struct bpf_map *map; 11176 11174 struct fd f; 11177 11175 u64 addr; 11176 + u32 fd; 11178 11177 11179 11178 if (i == insn_cnt - 1 || insn[1].code != 0 || 11180 11179 insn[1].dst_reg != 0 || insn[1].src_reg != 0 || ··· 11205 11202 /* In final convert_pseudo_ld_imm64() step, this is 11206 11203 * converted into regular 64-bit imm load insn. 11207 11204 */ 11208 - if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && 11209 - insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || 11210 - (insn[0].src_reg == BPF_PSEUDO_MAP_FD && 11211 - insn[1].imm != 0)) { 11212 - verbose(env, 11213 - "unrecognized bpf_ld_imm64 insn\n"); 11205 + switch (insn[0].src_reg) { 11206 + case BPF_PSEUDO_MAP_VALUE: 11207 + case BPF_PSEUDO_MAP_IDX_VALUE: 11208 + break; 11209 + case BPF_PSEUDO_MAP_FD: 11210 + case BPF_PSEUDO_MAP_IDX: 11211 + if (insn[1].imm == 0) 11212 + break; 11213 + fallthrough; 11214 + default: 11215 + verbose(env, "unrecognized bpf_ld_imm64 insn\n"); 11214 11216 return -EINVAL; 11215 11217 } 11216 11218 11217 - f = fdget(insn[0].imm); 11219 + switch (insn[0].src_reg) { 11220 + case BPF_PSEUDO_MAP_IDX_VALUE: 11221 + case BPF_PSEUDO_MAP_IDX: 11222 + if (bpfptr_is_null(env->fd_array)) { 11223 + verbose(env, "fd_idx without fd_array is invalid\n"); 11224 + return -EPROTO; 11225 + } 11226 + if (copy_from_bpfptr_offset(&fd, env->fd_array, 11227 + insn[0].imm * sizeof(fd), 11228 + sizeof(fd))) 11229 + return -EFAULT; 11230 + break; 11231 + default: 11232 + fd = insn[0].imm; 11233 + break; 11234 + } 11235 + 11236 + f = fdget(fd); 11218 11237 map = __bpf_map_get(f); 11219 11238 if (IS_ERR(map)) { 11220 11239 verbose(env, "fd %d is not pointing to valid bpf_map\n", ··· 11251 11226 } 11252 11227 11253 11228 aux = &env->insn_aux_data[i]; 11254 - if (insn->src_reg == BPF_PSEUDO_MAP_FD) { 11229 + if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || 11230 + insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { 11255 11231 addr = (unsigned long)map; 11256 11232 } else { 11257 11233 u32 off = insn[1].imm; ··· 13334 13308 env->insn_aux_data[i].orig_idx = i; 13335 13309 env->prog = *prog; 13336 13310 env->ops = bpf_verifier_ops[env->prog->type]; 13311 + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); 13337 13312 is_priv = bpf_capable(); 13338 13313 13339 13314 bpf_get_btf_vmlinux();
+11 -5
tools/include/uapi/linux/bpf.h
··· 1098 1098 /* When BPF ldimm64's insn[0].src_reg != 0 then this can have 1099 1099 * the following extensions: 1100 1100 * 1101 - * insn[0].src_reg: BPF_PSEUDO_MAP_FD 1102 - * insn[0].imm: map fd 1101 + * insn[0].src_reg: BPF_PSEUDO_MAP_[FD|IDX] 1102 + * insn[0].imm: map fd or fd_idx 1103 1103 * insn[1].imm: 0 1104 1104 * insn[0].off: 0 1105 1105 * insn[1].off: 0 ··· 1107 1107 * verifier type: CONST_PTR_TO_MAP 1108 1108 */ 1109 1109 #define BPF_PSEUDO_MAP_FD 1 1110 - /* insn[0].src_reg: BPF_PSEUDO_MAP_VALUE 1111 - * insn[0].imm: map fd 1110 + #define BPF_PSEUDO_MAP_IDX 5 1111 + 1112 + /* insn[0].src_reg: BPF_PSEUDO_MAP_[IDX_]VALUE 1113 + * insn[0].imm: map fd or fd_idx 1112 1114 * insn[1].imm: offset into value 1113 1115 * insn[0].off: 0 1114 1116 * insn[1].off: 0 1115 1117 * ldimm64 rewrite: address of map[0]+offset 1116 1118 * verifier type: PTR_TO_MAP_VALUE 1117 1119 */ 1118 - #define BPF_PSEUDO_MAP_VALUE 2 1120 + #define BPF_PSEUDO_MAP_VALUE 2 1121 + #define BPF_PSEUDO_MAP_IDX_VALUE 6 1122 + 1119 1123 /* insn[0].src_reg: BPF_PSEUDO_BTF_ID 1120 1124 * insn[0].imm: kernel btd id of VAR 1121 1125 * insn[1].imm: 0 ··· 1319 1315 /* or valid module BTF object fd or 0 to attach to vmlinux */ 1320 1316 __u32 attach_btf_obj_fd; 1321 1317 }; 1318 + __u32 :32; /* pad */ 1319 + __aligned_u64 fd_array; /* array of FDs */ 1322 1320 }; 1323 1321 1324 1322 struct { /* anonymous struct used by BPF_OBJ_* commands */