x86/cfi,bpf: Fix bpf_struct_ops CFI · tjh.dev/kernel@2cd3e37

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

x86/cfi,bpf: Fix bpf_struct_ops CFI

BPF struct_ops uses __arch_prepare_bpf_trampoline() to write
trampolines for indirect function calls. These tramplines much have
matching CFI.

In order to obtain the correct CFI hash for the various methods, add a
matching structure that contains stub functions, the compiler will
generate correct CFI which we can pilfer for the trampolines.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20231215092707.566977112@infradead.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Peter Zijlstra and committed by

Alexei Starovoitov 2 years ago 2cd3e377 e72d88d1

+191 -32

7 changed files

expand all

arch

x86

include

asm

cfi.h

kernel

alternative.c

net

bpf_jit_comp.c

include

linux

bpf.h

kernel

bpf

bpf_struct_ops.c

net

bpf

bpf_dummy_struct_ops.c

ipv4

bpf_tcp_ca.c

arch/x86/include/asm/cfi.h

··· 123 123 } 124 124 #define cfi_get_offset cfi_get_offset 125 125 126 + extern u32 cfi_get_func_hash(void *func); 127 + 126 128 #else 127 129 static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs) 128 130 { ··· 132 130 } 133 131 #define cfi_bpf_hash 0U 134 132 #define cfi_bpf_subprog_hash 0U 133 + static inline u32 cfi_get_func_hash(void *func) 134 + { 135 + return 0; 136 + } 135 137 #endif /* CONFIG_CFI_CLANG */ 136 138 137 139 #endif /* _ASM_X86_CFI_H */

+22

arch/x86/kernel/alternative.c

··· 883 883 " .size cfi_bpf_subprog_hash, 4 \n" 884 884 " .popsection \n" 885 885 ); 886 + 887 + u32 cfi_get_func_hash(void *func) 888 + { 889 + u32 hash; 890 + 891 + func -= cfi_get_offset(); 892 + switch (cfi_mode) { 893 + case CFI_FINEIBT: 894 + func += 7; 895 + break; 896 + case CFI_KCFI: 897 + func += 1; 898 + break; 899 + default: 900 + return 0; 901 + } 902 + 903 + if (get_kernel_nofault(hash, func)) 904 + return 0; 905 + 906 + return hash; 907 + } 886 908 #endif 887 909 888 910 #ifdef CONFIG_FINEIBT

+43 -23

arch/x86/net/bpf_jit_comp.c

··· 312 312 * in arch/x86/kernel/alternative.c 313 313 */ 314 314 315 - static void emit_fineibt(u8 **pprog, bool is_subprog) 315 + static void emit_fineibt(u8 **pprog, u32 hash) 316 316 { 317 - u32 hash = is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash; 318 317 u8 *prog = *pprog; 319 318 320 319 EMIT_ENDBR(); ··· 326 327 *pprog = prog; 327 328 } 328 329 329 - static void emit_kcfi(u8 **pprog, bool is_subprog) 330 + static void emit_kcfi(u8 **pprog, u32 hash) 330 331 { 331 - u32 hash = is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash; 332 332 u8 *prog = *pprog; 333 333 334 334 EMIT1_off32(0xb8, hash); /* movl $hash, %eax */ ··· 349 351 *pprog = prog; 350 352 } 351 353 352 - static void emit_cfi(u8 **pprog, bool is_subprog) 354 + static void emit_cfi(u8 **pprog, u32 hash) 353 355 { 354 356 u8 *prog = *pprog; 355 357 356 358 switch (cfi_mode) { 357 359 case CFI_FINEIBT: 358 - emit_fineibt(&prog, is_subprog); 360 + emit_fineibt(&prog, hash); 359 361 break; 360 362 361 363 case CFI_KCFI: 362 - emit_kcfi(&prog, is_subprog); 364 + emit_kcfi(&prog, hash); 363 365 break; 364 366 365 367 default: ··· 381 383 { 382 384 u8 *prog = *pprog; 383 385 384 - emit_cfi(&prog, is_subprog); 386 + emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash); 385 387 /* BPF trampoline can be made to work without these nops, 386 388 * but let's waste 5 bytes for now and optimize later 387 389 */ ··· 2508 2510 u8 *prog; 2509 2511 bool save_ret; 2510 2512 2513 + /* 2514 + * F_INDIRECT is only compatible with F_RET_FENTRY_RET, it is 2515 + * explicitly incompatible with F_CALL_ORIG | F_SKIP_FRAME | F_IP_ARG 2516 + * because @func_addr. 2517 + */ 2518 + WARN_ON_ONCE((flags & BPF_TRAMP_F_INDIRECT) && 2519 + (flags & ~(BPF_TRAMP_F_INDIRECT | BPF_TRAMP_F_RET_FENTRY_RET))); 2520 + 2511 2521 /* extra registers for struct arguments */ 2512 - for (i = 0; i < m->nr_args; i++) 2522 + for (i = 0; i < m->nr_args; i++) { 2513 2523 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) 2514 2524 nr_regs += (m->arg_size[i] + 7) / 8 - 1; 2525 + } 2515 2526 2516 2527 /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6 2517 2528 * are passed through regs, the remains are through stack. ··· 2603 2596 2604 2597 prog = rw_image; 2605 2598 2606 - EMIT_ENDBR(); 2607 - /* 2608 - * This is the direct-call trampoline, as such it needs accounting 2609 - * for the __fentry__ call. 2610 - */ 2611 - x86_call_depth_emit_accounting(&prog, NULL); 2599 + if (flags & BPF_TRAMP_F_INDIRECT) { 2600 + /* 2601 + * Indirect call for bpf_struct_ops 2602 + */ 2603 + emit_cfi(&prog, cfi_get_func_hash(func_addr)); 2604 + } else { 2605 + /* 2606 + * Direct-call fentry stub, as such it needs accounting for the 2607 + * __fentry__ call. 2608 + */ 2609 + x86_call_depth_emit_accounting(&prog, NULL); 2610 + } 2612 2611 EMIT1(0x55); /* push rbp */ 2613 2612 EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 2614 - if (!is_imm8(stack_size)) 2613 + if (!is_imm8(stack_size)) { 2615 2614 /* sub rsp, stack_size */ 2616 2615 EMIT3_off32(0x48, 0x81, 0xEC, stack_size); 2617 - else 2616 + } else { 2618 2617 /* sub rsp, stack_size */ 2619 2618 EMIT4(0x48, 0x83, 0xEC, stack_size); 2619 + } 2620 2620 if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 2621 2621 EMIT1(0x50); /* push rax */ 2622 2622 /* mov QWORD PTR [rbp - rbx_off], rbx */ ··· 2657 2643 } 2658 2644 } 2659 2645 2660 - if (fentry->nr_links) 2646 + if (fentry->nr_links) { 2661 2647 if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, 2662 2648 flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image)) 2663 2649 return -EINVAL; 2650 + } 2664 2651 2665 2652 if (fmod_ret->nr_links) { 2666 2653 branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), ··· 2680 2665 restore_regs(m, &prog, regs_off); 2681 2666 save_args(m, &prog, arg_stack_off, true); 2682 2667 2683 - if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 2668 + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { 2684 2669 /* Before calling the original function, restore the 2685 2670 * tail_call_cnt from stack to rax. 2686 2671 */ 2687 2672 RESTORE_TAIL_CALL_CNT(stack_size); 2673 + } 2688 2674 2689 2675 if (flags & BPF_TRAMP_F_ORIG_STACK) { 2690 2676 emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8); ··· 2714 2698 /* Update the branches saved in invoke_bpf_mod_ret with the 2715 2699 * aligned address of do_fexit. 2716 2700 */ 2717 - for (i = 0; i < fmod_ret->nr_links; i++) 2701 + for (i = 0; i < fmod_ret->nr_links; i++) { 2718 2702 emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image), 2719 2703 image + (branches[i] - (u8 *)rw_image), X86_JNE); 2704 + } 2720 2705 } 2721 2706 2722 - if (fexit->nr_links) 2707 + if (fexit->nr_links) { 2723 2708 if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, 2724 2709 false, image, rw_image)) { 2725 2710 ret = -EINVAL; 2726 2711 goto cleanup; 2727 2712 } 2713 + } 2728 2714 2729 2715 if (flags & BPF_TRAMP_F_RESTORE_REGS) 2730 2716 restore_regs(m, &prog, regs_off); ··· 2743 2725 ret = -EINVAL; 2744 2726 goto cleanup; 2745 2727 } 2746 - } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 2728 + } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) { 2747 2729 /* Before running the original function, restore the 2748 2730 * tail_call_cnt from stack to rax. 2749 2731 */ 2750 2732 RESTORE_TAIL_CALL_CNT(stack_size); 2733 + } 2751 2734 2752 2735 /* restore return value of orig_call or fentry prog back into RAX */ 2753 2736 if (save_ret) ··· 2756 2737 2757 2738 emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); 2758 2739 EMIT1(0xC9); /* leave */ 2759 - if (flags & BPF_TRAMP_F_SKIP_FRAME) 2740 + if (flags & BPF_TRAMP_F_SKIP_FRAME) { 2760 2741 /* skip our return address and return to parent */ 2761 2742 EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ 2743 + } 2762 2744 emit_return(&prog, image + (prog - (u8 *)rw_image)); 2763 2745 /* Make sure the trampoline generation logic doesn't overflow */ 2764 2746 if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) {

+13

include/linux/bpf.h

··· 1060 1060 */ 1061 1061 #define BPF_TRAMP_F_TAIL_CALL_CTX BIT(7) 1062 1062 1063 + /* 1064 + * Indicate the trampoline should be suitable to receive indirect calls; 1065 + * without this indirectly calling the generated code can result in #UD/#CP, 1066 + * depending on the CFI options. 1067 + * 1068 + * Used by bpf_struct_ops. 1069 + * 1070 + * Incompatible with FENTRY usage, overloads @func_addr argument. 1071 + */ 1072 + #define BPF_TRAMP_F_INDIRECT BIT(8) 1073 + 1063 1074 /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 1064 1075 * bytes on x86. 1065 1076 */ ··· 1708 1697 struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS]; 1709 1698 u32 type_id; 1710 1699 u32 value_id; 1700 + void *cfi_stubs; 1711 1701 }; 1712 1702 1713 1703 #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL) ··· 1722 1710 int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, 1723 1711 struct bpf_tramp_link *link, 1724 1712 const struct btf_func_model *model, 1713 + void *stub_func, 1725 1714 void *image, void *image_end); 1726 1715 static inline bool bpf_try_module_get(const void *data, struct module *owner) 1727 1716 {

+8 -8

kernel/bpf/bpf_struct_ops.c

··· 352 352 int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks, 353 353 struct bpf_tramp_link *link, 354 354 const struct btf_func_model *model, 355 - void *image, void *image_end) 355 + void *stub_func, void *image, void *image_end) 356 356 { 357 - u32 flags; 357 + u32 flags = BPF_TRAMP_F_INDIRECT; 358 358 int size; 359 359 360 360 tlinks[BPF_TRAMP_FENTRY].links[0] = link; 361 361 tlinks[BPF_TRAMP_FENTRY].nr_links = 1; 362 - /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, 363 - * and it must be used alone. 364 - */ 365 - flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; 362 + 363 + if (model->ret_size > 0) 364 + flags |= BPF_TRAMP_F_RET_FENTRY_RET; 366 365 367 366 size = arch_bpf_trampoline_size(model, flags, tlinks, NULL); 368 367 if (size < 0) ··· 369 370 if (size > (unsigned long)image_end - (unsigned long)image) 370 371 return -E2BIG; 371 372 return arch_prepare_bpf_trampoline(NULL, image, image_end, 372 - model, flags, tlinks, NULL); 373 + model, flags, tlinks, stub_func); 373 374 } 374 375 375 376 static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, ··· 503 504 504 505 err = bpf_struct_ops_prepare_trampoline(tlinks, link, 505 506 &st_ops->func_models[i], 507 + *(void **)(st_ops->cfi_stubs + moff), 506 508 image, image_end); 507 509 if (err < 0) 508 510 goto reset_unlock; 509 511 510 - *(void **)(kdata + moff) = image; 512 + *(void **)(kdata + moff) = image + cfi_get_offset(); 511 513 image += err; 512 514 513 515 /* put prog_id to udata */

+30 -1

net/bpf/bpf_dummy_struct_ops.c

··· 12 12 /* A common type for test_N with return value in bpf_dummy_ops */ 13 13 typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...); 14 14 15 + static int dummy_ops_test_ret_function(struct bpf_dummy_ops_state *state, ...) 16 + { 17 + return 0; 18 + } 19 + 15 20 struct bpf_dummy_ops_test_args { 16 21 u64 args[MAX_BPF_FUNC_ARGS]; 17 22 struct bpf_dummy_ops_state state; ··· 67 62 68 63 static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args) 69 64 { 70 - dummy_ops_test_ret_fn test = (void *)image; 65 + dummy_ops_test_ret_fn test = (void *)image + cfi_get_offset(); 71 66 struct bpf_dummy_ops_state *state = NULL; 72 67 73 68 /* state needs to be NULL if args[0] is 0 */ ··· 124 119 op_idx = prog->expected_attach_type; 125 120 err = bpf_struct_ops_prepare_trampoline(tlinks, link, 126 121 &st_ops->func_models[op_idx], 122 + &dummy_ops_test_ret_function, 127 123 image, image + PAGE_SIZE); 128 124 if (err < 0) 129 125 goto out; ··· 225 219 { 226 220 } 227 221 222 + static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb) 223 + { 224 + return 0; 225 + } 226 + 227 + static int bpf_dummy_test_2(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2, 228 + char a3, unsigned long a4) 229 + { 230 + return 0; 231 + } 232 + 233 + static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb) 234 + { 235 + return 0; 236 + } 237 + 238 + static struct bpf_dummy_ops __bpf_bpf_dummy_ops = { 239 + .test_1 = bpf_dummy_test_1, 240 + .test_2 = bpf_dummy_test_2, 241 + .test_sleepable = bpf_dummy_test_sleepable, 242 + }; 243 + 228 244 struct bpf_struct_ops bpf_bpf_dummy_ops = { 229 245 .verifier_ops = &bpf_dummy_verifier_ops, 230 246 .init = bpf_dummy_init, ··· 255 227 .reg = bpf_dummy_reg, 256 228 .unreg = bpf_dummy_unreg, 257 229 .name = "bpf_dummy_ops", 230 + .cfi_stubs = &__bpf_bpf_dummy_ops, 258 231 };

+69

net/ipv4/bpf_tcp_ca.c

··· 271 271 return tcp_validate_congestion_control(kdata); 272 272 } 273 273 274 + static u32 bpf_tcp_ca_ssthresh(struct sock *sk) 275 + { 276 + return 0; 277 + } 278 + 279 + static void bpf_tcp_ca_cong_avoid(struct sock *sk, u32 ack, u32 acked) 280 + { 281 + } 282 + 283 + static void bpf_tcp_ca_set_state(struct sock *sk, u8 new_state) 284 + { 285 + } 286 + 287 + static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev) 288 + { 289 + } 290 + 291 + static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags) 292 + { 293 + } 294 + 295 + static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *sample) 296 + { 297 + } 298 + 299 + static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk) 300 + { 301 + return 0; 302 + } 303 + 304 + static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs) 305 + { 306 + } 307 + 308 + static u32 bpf_tcp_ca_undo_cwnd(struct sock *sk) 309 + { 310 + return 0; 311 + } 312 + 313 + static u32 bpf_tcp_ca_sndbuf_expand(struct sock *sk) 314 + { 315 + return 0; 316 + } 317 + 318 + static void __bpf_tcp_ca_init(struct sock *sk) 319 + { 320 + } 321 + 322 + static void __bpf_tcp_ca_release(struct sock *sk) 323 + { 324 + } 325 + 326 + static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { 327 + .ssthresh = bpf_tcp_ca_ssthresh, 328 + .cong_avoid = bpf_tcp_ca_cong_avoid, 329 + .set_state = bpf_tcp_ca_set_state, 330 + .cwnd_event = bpf_tcp_ca_cwnd_event, 331 + .in_ack_event = bpf_tcp_ca_in_ack_event, 332 + .pkts_acked = bpf_tcp_ca_pkts_acked, 333 + .min_tso_segs = bpf_tcp_ca_min_tso_segs, 334 + .cong_control = bpf_tcp_ca_cong_control, 335 + .undo_cwnd = bpf_tcp_ca_undo_cwnd, 336 + .sndbuf_expand = bpf_tcp_ca_sndbuf_expand, 337 + 338 + .init = __bpf_tcp_ca_init, 339 + .release = __bpf_tcp_ca_release, 340 + }; 341 + 274 342 struct bpf_struct_ops bpf_tcp_congestion_ops = { 275 343 .verifier_ops = &bpf_tcp_ca_verifier_ops, 276 344 .reg = bpf_tcp_ca_reg, ··· 349 281 .init = bpf_tcp_ca_init, 350 282 .validate = bpf_tcp_ca_validate, 351 283 .name = "tcp_congestion_ops", 284 + .cfi_stubs = &__bpf_ops_tcp_congestion_ops, 352 285 }; 353 286 354 287 static int __init bpf_tcp_ca_kfunc_init(void)