Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf, arm64: Add BPF exception tables

When a tracing BPF program attempts to read memory without using the
bpf_probe_read() helper, the verifier marks the load instruction with
the BPF_PROBE_MEM flag. Since the arm64 JIT does not currently recognize
this flag it falls back to the interpreter.

Add support for BPF_PROBE_MEM, by appending an exception table to the
BPF program. If the load instruction causes a data abort, the fixup
infrastructure finds the exception table and fixes up the fault, by
clearing the destination register and jumping over the faulting
instruction.

To keep the compact exception table entry format, inspect the pc in
fixup_exception(). A more generic solution would add a "handler" field
to the table entry, like on x86 and s390.

Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200728152122.1292756-2-jean-philippe@linaro.org

authored by

Jean-Philippe Brucker and committed by
Daniel Borkmann
80083428 310ad797

+108 -9
+12
arch/arm64/include/asm/extable.h
··· 22 22 23 23 #define ARCH_HAS_RELATIVE_EXTABLE 24 24 25 + #ifdef CONFIG_BPF_JIT 26 + int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, 27 + struct pt_regs *regs); 28 + #else /* !CONFIG_BPF_JIT */ 29 + static inline 30 + int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, 31 + struct pt_regs *regs) 32 + { 33 + return 0; 34 + } 35 + #endif /* !CONFIG_BPF_JIT */ 36 + 25 37 extern int fixup_exception(struct pt_regs *regs); 26 38 #endif
+9 -3
arch/arm64/mm/extable.c
··· 11 11 const struct exception_table_entry *fixup; 12 12 13 13 fixup = search_exception_tables(instruction_pointer(regs)); 14 - if (fixup) 15 - regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; 14 + if (!fixup) 15 + return 0; 16 16 17 - return fixup != NULL; 17 + if (IS_ENABLED(CONFIG_BPF_JIT) && 18 + regs->pc >= BPF_JIT_REGION_START && 19 + regs->pc < BPF_JIT_REGION_END) 20 + return arm64_bpf_fixup_exception(fixup, regs); 21 + 22 + regs->pc = (unsigned long)&fixup->fixup + fixup->fixup; 23 + return 1; 18 24 }
+87 -6
arch/arm64/net/bpf_jit_comp.c
··· 7 7 8 8 #define pr_fmt(fmt) "bpf_jit: " fmt 9 9 10 + #include <linux/bitfield.h> 10 11 #include <linux/bpf.h> 11 12 #include <linux/filter.h> 12 13 #include <linux/printk.h> ··· 57 56 int idx; 58 57 int epilogue_offset; 59 58 int *offset; 59 + int exentry_idx; 60 60 __le32 *image; 61 61 u32 stack_size; 62 62 }; ··· 353 351 emit(A64_RET(A64_LR), ctx); 354 352 } 355 353 354 + #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 355 + #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 356 + 357 + int arm64_bpf_fixup_exception(const struct exception_table_entry *ex, 358 + struct pt_regs *regs) 359 + { 360 + off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); 361 + int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); 362 + 363 + regs->regs[dst_reg] = 0; 364 + regs->pc = (unsigned long)&ex->fixup - offset; 365 + return 1; 366 + } 367 + 368 + /* For accesses to BTF pointers, add an entry to the exception table */ 369 + static int add_exception_handler(const struct bpf_insn *insn, 370 + struct jit_ctx *ctx, 371 + int dst_reg) 372 + { 373 + off_t offset; 374 + unsigned long pc; 375 + struct exception_table_entry *ex; 376 + 377 + if (!ctx->image) 378 + /* First pass */ 379 + return 0; 380 + 381 + if (BPF_MODE(insn->code) != BPF_PROBE_MEM) 382 + return 0; 383 + 384 + if (!ctx->prog->aux->extable || 385 + WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) 386 + return -EINVAL; 387 + 388 + ex = &ctx->prog->aux->extable[ctx->exentry_idx]; 389 + pc = (unsigned long)&ctx->image[ctx->idx - 1]; 390 + 391 + offset = pc - (long)&ex->insn; 392 + if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) 393 + return -ERANGE; 394 + ex->insn = offset; 395 + 396 + /* 397 + * Since the extable follows the program, the fixup offset is always 398 + * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value 399 + * to keep things simple, and put the destination register in the upper 400 + * bits. We don't need to worry about buildtime or runtime sort 401 + * modifying the upper bits because the table is already sorted, and 402 + * isn't part of the main exception table. 403 + */ 404 + offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); 405 + if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset)) 406 + return -ERANGE; 407 + 408 + ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | 409 + FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); 410 + 411 + ctx->exentry_idx++; 412 + return 0; 413 + } 414 + 356 415 /* JITs an eBPF instruction. 357 416 * Returns: 358 417 * 0 - successfully JITed an 8-byte eBPF instruction. ··· 438 375 u8 jmp_cond, reg; 439 376 s32 jmp_offset; 440 377 u32 a64_insn; 378 + int ret; 441 379 442 380 #define check_imm(bits, imm) do { \ 443 381 if ((((imm) > 0) && ((imm) >> (bits))) || \ ··· 758 694 const u8 r0 = bpf2a64[BPF_REG_0]; 759 695 bool func_addr_fixed; 760 696 u64 func_addr; 761 - int ret; 762 697 763 698 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, 764 699 &func_addr, &func_addr_fixed); ··· 801 738 case BPF_LDX | BPF_MEM | BPF_H: 802 739 case BPF_LDX | BPF_MEM | BPF_B: 803 740 case BPF_LDX | BPF_MEM | BPF_DW: 741 + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 742 + case BPF_LDX | BPF_PROBE_MEM | BPF_W: 743 + case BPF_LDX | BPF_PROBE_MEM | BPF_H: 744 + case BPF_LDX | BPF_PROBE_MEM | BPF_B: 804 745 emit_a64_mov_i(1, tmp, off, ctx); 805 746 switch (BPF_SIZE(code)) { 806 747 case BPF_W: ··· 820 753 emit(A64_LDR64(dst, src, tmp), ctx); 821 754 break; 822 755 } 756 + 757 + ret = add_exception_handler(insn, ctx, dst); 758 + if (ret) 759 + return ret; 823 760 break; 824 761 825 762 /* ST: *(size *)(dst + off) = imm */ ··· 939 868 return -1; 940 869 } 941 870 871 + if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) 872 + return -1; 873 + 942 874 return 0; 943 875 } 944 876 ··· 958 884 959 885 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 960 886 { 887 + int image_size, prog_size, extable_size; 961 888 struct bpf_prog *tmp, *orig_prog = prog; 962 889 struct bpf_binary_header *header; 963 890 struct arm64_jit_data *jit_data; ··· 966 891 bool tmp_blinded = false; 967 892 bool extra_pass = false; 968 893 struct jit_ctx ctx; 969 - int image_size; 970 894 u8 *image_ptr; 971 895 972 896 if (!prog->jit_requested) ··· 996 922 image_ptr = jit_data->image; 997 923 header = jit_data->header; 998 924 extra_pass = true; 999 - image_size = sizeof(u32) * ctx.idx; 925 + prog_size = sizeof(u32) * ctx.idx; 1000 926 goto skip_init_ctx; 1001 927 } 1002 928 memset(&ctx, 0, sizeof(ctx)); ··· 1024 950 ctx.epilogue_offset = ctx.idx; 1025 951 build_epilogue(&ctx); 1026 952 953 + extable_size = prog->aux->num_exentries * 954 + sizeof(struct exception_table_entry); 955 + 1027 956 /* Now we know the actual image size. */ 1028 - image_size = sizeof(u32) * ctx.idx; 957 + prog_size = sizeof(u32) * ctx.idx; 958 + image_size = prog_size + extable_size; 1029 959 header = bpf_jit_binary_alloc(image_size, &image_ptr, 1030 960 sizeof(u32), jit_fill_hole); 1031 961 if (header == NULL) { ··· 1040 962 /* 2. Now, the actual pass. */ 1041 963 1042 964 ctx.image = (__le32 *)image_ptr; 965 + if (extable_size) 966 + prog->aux->extable = (void *)image_ptr + prog_size; 1043 967 skip_init_ctx: 1044 968 ctx.idx = 0; 969 + ctx.exentry_idx = 0; 1045 970 1046 971 build_prologue(&ctx, was_classic); 1047 972 ··· 1065 984 1066 985 /* And we're done. */ 1067 986 if (bpf_jit_enable > 1) 1068 - bpf_jit_dump(prog->len, image_size, 2, ctx.image); 987 + bpf_jit_dump(prog->len, prog_size, 2, ctx.image); 1069 988 1070 989 bpf_flush_icache(header, ctx.image + ctx.idx); 1071 990 ··· 1086 1005 } 1087 1006 prog->bpf_func = (void *)ctx.image; 1088 1007 prog->jited = 1; 1089 - prog->jited_len = image_size; 1008 + prog->jited_len = prog_size; 1090 1009 1091 1010 if (!prog->is_func || extra_pass) { 1092 1011 bpf_prog_fill_jited_linfo(prog, ctx.offset);