Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'bpf-arm64-support-for-timed-may_goto'

Puranjay Mohan says:

====================
bpf, arm64: support for timed may_goto

Changes in v2->v3:
v2: https://lore.kernel.org/all/20250809204833.44803-1-puranjay@kernel.org/
- Rebased on bpf-next/master
- Added Acked-by: tags from Xu and Kumar

Changes in v1->v2:
v1: https://lore.kernel.org/bpf/20250724125443.26182-1-puranjay@kernel.org/
- Added comment in arch_bpf_timed_may_goto() about BPF_REG_FP setup (Xu
Kuohai)

This set adds support for the timed may_goto instruction for the arm64.
The timed may_goto instruction is implemented by the verifier by
reserving 2 8byte slots in the program stack and then calling
arch_bpf_timed_may_goto() in a loop with the stack offset of these two
slots in BPF_REG_AX. It expects the function to put a timestamp in the
first slot and the returned count in BPF_REG_AX is put into the second
slot by a store instruction emitted by the verifier.

arch_bpf_timed_may_goto() is special as it receives the parameter in
BPF_REG_AX and is expected to return the result in BPF_REG_AX as well.
It can't clobber any caller saved registers because verifier doesn't
save anything before emitting the call.

So, arch_bpf_timed_may_goto() is implemented in assembly so the exact
registers that are stored/restored can be controlled (BPF caller saved
registers here) and it also needs to take care of moving arguments and
return values to and from BPF_REG_AX <-> arm64 R0.

So, arch_bpf_timed_may_goto() acts as a trampoline to call
bpf_check_timed_may_goto() which does the main logic of placing the
timestamp and returning the count.

All tests that use may_goto instruction pass after the changing some of
them in patch 2

#404 stream_errors:OK
[...]
#406/2 stream_success/stream_cond_break:OK
[...]
#494/23 verifier_bpf_fastcall/may_goto_interaction_x86_64:SKIP
#494/24 verifier_bpf_fastcall/may_goto_interaction_arm64:OK
[...]
#539/1 verifier_may_goto_1/may_goto 0:OK
#539/2 verifier_may_goto_1/batch 2 of may_goto 0:OK
#539/3 verifier_may_goto_1/may_goto batch with offsets 2/1/0:OK
#539/4 verifier_may_goto_1/may_goto batch with offsets 2/0:OK
#539 verifier_may_goto_1:OK
#540/1 verifier_may_goto_2/C code with may_goto 0:OK
#540 verifier_may_goto_2:OK
Summary: 7/16 PASSED, 25 SKIPPED, 0 FAILED
====================

Link: https://patch.msgid.link/20250827113245.52629-1-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

+76 -42
+1 -1
arch/arm64/net/Makefile
··· 2 2 # 3 3 # ARM64 networking code 4 4 # 5 - obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o 5 + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
+12 -1
arch/arm64/net/bpf_jit_comp.c
··· 1558 1558 if (ret < 0) 1559 1559 return ret; 1560 1560 emit_call(func_addr, ctx); 1561 - emit(A64_MOV(1, r0, A64_R(0)), ctx); 1561 + /* 1562 + * Call to arch_bpf_timed_may_goto() is emitted by the 1563 + * verifier and called with custom calling convention with 1564 + * first argument and return value in BPF_REG_AX (x9). 1565 + */ 1566 + if (func_addr != (u64)arch_bpf_timed_may_goto) 1567 + emit(A64_MOV(1, r0, A64_R(0)), ctx); 1562 1568 break; 1563 1569 } 1564 1570 /* tail call */ ··· 3041 3035 * no need to provide any additional instructions. Therefore, skip 3042 3036 * inserting nospec insns against Spectre v4. 3043 3037 */ 3038 + return true; 3039 + } 3040 + 3041 + bool bpf_jit_supports_timed_may_goto(void) 3042 + { 3044 3043 return true; 3045 3044 } 3046 3045
+40
arch/arm64/net/bpf_timed_may_goto.S
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* Copyright (c) 2025 Puranjay Mohan <puranjay@kernel.org> */ 3 + 4 + #include <linux/linkage.h> 5 + 6 + SYM_FUNC_START(arch_bpf_timed_may_goto) 7 + /* Allocate stack space and emit frame record */ 8 + stp x29, x30, [sp, #-64]! 9 + mov x29, sp 10 + 11 + /* Save BPF registers R0 - R5 (x7, x0-x4)*/ 12 + stp x7, x0, [sp, #16] 13 + stp x1, x2, [sp, #32] 14 + stp x3, x4, [sp, #48] 15 + 16 + /* 17 + * Stack depth was passed in BPF_REG_AX (x9), add it to the BPF_FP 18 + * (x25) to get the pointer to count and timestamp and pass it as the 19 + * first argument in x0. 20 + * 21 + * Before generating the call to arch_bpf_timed_may_goto, the verifier 22 + * generates a load instruction using FP, i.e. REG_AX = *(u64 *)(FP - 23 + * stack_off_cnt), so BPF_REG_FP (x25) is always set up by the arm64 24 + * jit in this case. 25 + */ 26 + add x0, x9, x25 27 + bl bpf_check_timed_may_goto 28 + /* BPF_REG_AX(x9) will be stored into count, so move return value to it. */ 29 + mov x9, x0 30 + 31 + /* Restore BPF registers R0 - R5 (x7, x0-x4) */ 32 + ldp x7, x0, [sp, #16] 33 + ldp x1, x2, [sp, #32] 34 + ldp x3, x4, [sp, #48] 35 + 36 + /* Restore FP and LR */ 37 + ldp x29, x30, [sp], #64 38 + 39 + ret 40 + SYM_FUNC_END(arch_bpf_timed_may_goto)
+1 -1
tools/testing/selftests/bpf/prog_tests/stream.c
··· 77 77 ASSERT_OK(ret, "ret"); 78 78 ASSERT_OK(opts.retval, "retval"); 79 79 80 - #if !defined(__x86_64__) && !defined(__s390x__) 80 + #if !defined(__x86_64__) && !defined(__s390x__) && !defined(__aarch64__) 81 81 ASSERT_TRUE(1, "Timed may_goto unsupported, skip."); 82 82 if (i == 0) { 83 83 ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts);
+16 -11
tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
··· 660 660 661 661 SEC("raw_tp") 662 662 __arch_arm64 663 - __log_level(4) __msg("stack depth 16") 664 - /* may_goto counter at -16 */ 665 - __xlated("0: *(u64 *)(r10 -16) =") 666 - __xlated("1: r1 = 1") 667 - __xlated("2: call bpf_get_smp_processor_id") 663 + __log_level(4) __msg("stack depth 24") 664 + /* may_goto counter at -24 */ 665 + __xlated("0: *(u64 *)(r10 -24) =") 666 + /* may_goto timestamp at -16 */ 667 + __xlated("1: *(u64 *)(r10 -16) =") 668 + __xlated("2: r1 = 1") 669 + __xlated("3: call bpf_get_smp_processor_id") 668 670 /* may_goto expansion starts */ 669 - __xlated("3: r11 = *(u64 *)(r10 -16)") 670 - __xlated("4: if r11 == 0x0 goto pc+3") 671 - __xlated("5: r11 -= 1") 672 - __xlated("6: *(u64 *)(r10 -16) = r11") 671 + __xlated("4: r11 = *(u64 *)(r10 -24)") 672 + __xlated("5: if r11 == 0x0 goto pc+6") 673 + __xlated("6: r11 -= 1") 674 + __xlated("7: if r11 != 0x0 goto pc+2") 675 + __xlated("8: r11 = -24") 676 + __xlated("9: call unknown") 677 + __xlated("10: *(u64 *)(r10 -24) = r11") 673 678 /* may_goto expansion ends */ 674 - __xlated("7: *(u64 *)(r10 -8) = r1") 675 - __xlated("8: exit") 679 + __xlated("11: *(u64 *)(r10 -8) = r1") 680 + __xlated("12: exit") 676 681 __success 677 682 __naked void may_goto_interaction_arm64(void) 678 683 {
+6 -28
tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
··· 10 10 __description("may_goto 0") 11 11 __arch_x86_64 12 12 __arch_s390x 13 + __arch_arm64 13 14 __xlated("0: r0 = 1") 14 15 __xlated("1: exit") 15 16 __success ··· 30 29 __description("batch 2 of may_goto 0") 31 30 __arch_x86_64 32 31 __arch_s390x 32 + __arch_arm64 33 33 __xlated("0: r0 = 1") 34 34 __xlated("1: exit") 35 35 __success ··· 52 50 __description("may_goto batch with offsets 2/1/0") 53 51 __arch_x86_64 54 52 __arch_s390x 53 + __arch_arm64 55 54 __xlated("0: r0 = 1") 56 55 __xlated("1: exit") 57 56 __success ··· 75 72 } 76 73 77 74 SEC("raw_tp") 78 - __description("may_goto batch with offsets 2/0 - x86_64 and s390x") 75 + __description("may_goto batch with offsets 2/0") 79 76 __arch_x86_64 80 77 __arch_s390x 78 + __arch_arm64 81 79 __xlated("0: *(u64 *)(r10 -16) = 65535") 82 80 __xlated("1: *(u64 *)(r10 -8) = 0") 83 81 __xlated("2: r11 = *(u64 *)(r10 -16)") ··· 92 88 __xlated("10: r0 = 2") 93 89 __xlated("11: exit") 94 90 __success 95 - __naked void may_goto_batch_2_x86_64_s390x(void) 96 - { 97 - asm volatile ( 98 - ".8byte %[may_goto1];" 99 - ".8byte %[may_goto3];" 100 - "r0 = 1;" 101 - "r0 = 2;" 102 - "exit;" 103 - : 104 - : __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)), 105 - __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0)) 106 - : __clobber_all); 107 - } 108 - 109 - SEC("raw_tp") 110 - __description("may_goto batch with offsets 2/0 - arm64") 111 - __arch_arm64 112 - __xlated("0: *(u64 *)(r10 -8) = 8388608") 113 - __xlated("1: r11 = *(u64 *)(r10 -8)") 114 - __xlated("2: if r11 == 0x0 goto pc+3") 115 - __xlated("3: r11 -= 1") 116 - __xlated("4: *(u64 *)(r10 -8) = r11") 117 - __xlated("5: r0 = 1") 118 - __xlated("6: r0 = 2") 119 - __xlated("7: exit") 120 - __success 121 - __naked void may_goto_batch_2_arm64(void) 91 + __naked void may_goto_batch_2(void) 122 92 { 123 93 asm volatile ( 124 94 ".8byte %[may_goto1];"