Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc64/bpf: Implement PROBE_MEM32 pseudo instructions

Add support for [LDX | STX | ST], PROBE_MEM32, [B | H | W | DW]
instructions. They are similar to PROBE_MEM instructions with the
following differences:
- PROBE_MEM32 supports store.
- PROBE_MEM32 relies on the verifier to clear upper 32-bit of the
src/dst register
- PROBE_MEM32 adds 64-bit kern_vm_start address (which is stored in _R26
in the prologue). Due to bpf_arena constructions such _R26 + reg +
off16 access is guaranteed to be within arena virtual range, so no
address check at run-time.
- PROBE_MEM32 allows STX and ST. If they fault the store is a nop. When
LDX faults the destination register is zeroed.

To support these on powerpc, we do tmp1 = _R26 + src/dst reg and then use
tmp1 as the new src/dst register. This allows us to reuse most of the
code for normal [LDX | STX | ST].

Additionally, bpf_jit_emit_probe_mem_store() is introduced to emit
instructions for storing memory values depending on the size (byte,
halfword, word, doubleword).

Stack layout is adjusted to introduce a new NVR (_R26) and to make
BPF_PPC_STACKFRAME quadword aligned (local_tmp_var is increased by
8 bytes).

Reviewed-by: Hari Bathini <hbathini@linux.ibm.com>
Tested-by: Venkat Rao Bagalkote <venkat88@linux.ibm.com>
Signed-off-by: Saket Kumar Bhaskar <skb99@linux.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Link: https://patch.msgid.link/20250904100835.1100423-2-skb99@linux.ibm.com

authored by

Saket Kumar Bhaskar and committed by
Madhavan Srinivasan
47c7f3b7 46104a7d

+155 -24
+3 -2
arch/powerpc/net/bpf_jit.h
··· 161 161 unsigned int seen; 162 162 unsigned int idx; 163 163 unsigned int stack_size; 164 - int b2p[MAX_BPF_JIT_REG + 2]; 164 + int b2p[MAX_BPF_JIT_REG + 3]; 165 165 unsigned int exentry_idx; 166 166 unsigned int alt_exit_addr; 167 + u64 arena_vm_start; 167 168 }; 168 169 169 170 #define bpf_to_ppc(r) (ctx->b2p[r]) ··· 202 201 203 202 int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, 204 203 struct codegen_context *ctx, int insn_idx, 205 - int jmp_off, int dst_reg); 204 + int jmp_off, int dst_reg, u32 code); 206 205 207 206 #endif 208 207
+7 -3
arch/powerpc/net/bpf_jit_comp.c
··· 204 204 205 205 /* Make sure that the stack is quadword aligned. */ 206 206 cgctx.stack_size = round_up(fp->aux->stack_depth, 16); 207 + cgctx.arena_vm_start = bpf_arena_get_kern_vm_start(fp->aux->arena); 207 208 208 209 /* Scouting faux-generate pass 0 */ 209 210 if (bpf_jit_build_body(fp, NULL, NULL, &cgctx, addrs, 0, false)) { ··· 327 326 */ 328 327 int bpf_add_extable_entry(struct bpf_prog *fp, u32 *image, u32 *fimage, int pass, 329 328 struct codegen_context *ctx, int insn_idx, int jmp_off, 330 - int dst_reg) 329 + int dst_reg, u32 code) 331 330 { 332 331 off_t offset; 333 332 unsigned long pc; ··· 356 355 (ctx->exentry_idx * BPF_FIXUP_LEN * 4); 357 356 358 357 fixup[0] = PPC_RAW_LI(dst_reg, 0); 358 + if (BPF_CLASS(code) == BPF_ST || BPF_CLASS(code) == BPF_STX) 359 + fixup[0] = PPC_RAW_NOP(); 360 + 359 361 if (IS_ENABLED(CONFIG_PPC32)) 360 362 fixup[1] = PPC_RAW_LI(dst_reg - 1, 0); /* clear higher 32-bit register too */ 361 363 ··· 583 579 { 584 580 if (IS_ENABLED(CONFIG_PPC64)) { 585 581 /* See bpf_jit_stack_tailcallcnt() */ 586 - int tailcallcnt_offset = 6 * 8; 582 + int tailcallcnt_offset = 7 * 8; 587 583 588 584 EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); 589 585 EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset)); ··· 598 594 { 599 595 if (IS_ENABLED(CONFIG_PPC64)) { 600 596 /* See bpf_jit_stack_tailcallcnt() */ 601 - int tailcallcnt_offset = 6 * 8; 597 + int tailcallcnt_offset = 7 * 8; 602 598 603 599 EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); 604 600 EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset));
+1 -1
arch/powerpc/net/bpf_jit_comp32.c
··· 1087 1087 } 1088 1088 1089 1089 ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, insn_idx, 1090 - jmp_off, dst_reg); 1090 + jmp_off, dst_reg, code); 1091 1091 if (ret) 1092 1092 return ret; 1093 1093 }
+144 -18
arch/powerpc/net/bpf_jit_comp64.c
··· 25 25 * with our redzone usage. 26 26 * 27 27 * [ prev sp ] <------------- 28 - * [ nv gpr save area ] 5*8 | 28 + * [ nv gpr save area ] 6*8 | 29 29 * [ tail_call_cnt ] 8 | 30 - * [ local_tmp_var ] 16 | 30 + * [ local_tmp_var ] 24 | 31 31 * fp (r31) --> [ ebpf stack space ] upto 512 | 32 32 * [ frame header ] 32/112 | 33 33 * sp (r1) ---> [ stack pointer ] -------------- 34 34 */ 35 35 36 36 /* for gpr non volatile registers BPG_REG_6 to 10 */ 37 - #define BPF_PPC_STACK_SAVE (5*8) 37 + #define BPF_PPC_STACK_SAVE (6*8) 38 38 /* for bpf JIT code internal usage */ 39 - #define BPF_PPC_STACK_LOCALS 24 39 + #define BPF_PPC_STACK_LOCALS 32 40 40 /* stack frame excluding BPF stack, ensure this is quadword aligned */ 41 41 #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ 42 42 BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) ··· 44 44 /* BPF register usage */ 45 45 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 46 46 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 47 + #define ARENA_VM_START (MAX_BPF_JIT_REG + 2) 47 48 48 49 /* BPF to ppc register mappings */ 49 50 void bpf_jit_init_reg_mapping(struct codegen_context *ctx) ··· 68 67 ctx->b2p[BPF_REG_AX] = _R12; 69 68 ctx->b2p[TMP_REG_1] = _R9; 70 69 ctx->b2p[TMP_REG_2] = _R10; 70 + /* non volatile register for kern_vm_start address */ 71 + ctx->b2p[ARENA_VM_START] = _R26; 71 72 } 72 73 73 - /* PPC NVR range -- update this if we ever use NVRs below r27 */ 74 - #define BPF_PPC_NVR_MIN _R27 74 + /* PPC NVR range -- update this if we ever use NVRs below r26 */ 75 + #define BPF_PPC_NVR_MIN _R26 75 76 76 77 static inline bool bpf_has_stack_frame(struct codegen_context *ctx) 77 78 { ··· 92 89 * [ prev sp ] <------------- 93 90 * [ ... ] | 94 91 * sp (r1) ---> [ stack pointer ] -------------- 95 - * [ nv gpr save area ] 5*8 92 + * [ nv gpr save area ] 6*8 96 93 * [ tail_call_cnt ] 8 97 - * [ local_tmp_var ] 16 94 + * [ local_tmp_var ] 24 98 95 * [ unused red zone ] 224 99 96 */ 100 97 static int bpf_jit_stack_local(struct codegen_context *ctx) ··· 102 99 if (bpf_has_stack_frame(ctx)) 103 100 return STACK_FRAME_MIN_SIZE + ctx->stack_size; 104 101 else 105 - return -(BPF_PPC_STACK_SAVE + 24); 102 + return -(BPF_PPC_STACK_SAVE + 32); 106 103 } 107 104 108 105 static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) 109 106 { 110 - return bpf_jit_stack_local(ctx) + 16; 107 + return bpf_jit_stack_local(ctx) + 24; 111 108 } 112 109 113 110 static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) ··· 173 170 if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) 174 171 EMIT(PPC_RAW_STD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); 175 172 173 + if (ctx->arena_vm_start) 174 + EMIT(PPC_RAW_STD(bpf_to_ppc(ARENA_VM_START), _R1, 175 + bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START)))); 176 + 176 177 /* Setup frame pointer to point to the bpf stack area */ 177 178 if (bpf_is_seen_register(ctx, bpf_to_ppc(BPF_REG_FP))) 178 179 EMIT(PPC_RAW_ADDI(bpf_to_ppc(BPF_REG_FP), _R1, 179 180 STACK_FRAME_MIN_SIZE + ctx->stack_size)); 181 + 182 + if (ctx->arena_vm_start) 183 + PPC_LI64(bpf_to_ppc(ARENA_VM_START), ctx->arena_vm_start); 180 184 } 181 185 182 186 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) ··· 194 184 for (i = BPF_REG_6; i <= BPF_REG_10; i++) 195 185 if (bpf_is_seen_register(ctx, bpf_to_ppc(i))) 196 186 EMIT(PPC_RAW_LD(bpf_to_ppc(i), _R1, bpf_jit_stack_offsetof(ctx, bpf_to_ppc(i)))); 187 + 188 + if (ctx->arena_vm_start) 189 + EMIT(PPC_RAW_LD(bpf_to_ppc(ARENA_VM_START), _R1, 190 + bpf_jit_stack_offsetof(ctx, bpf_to_ppc(ARENA_VM_START)))); 197 191 198 192 /* Tear down our stack frame */ 199 193 if (bpf_has_stack_frame(ctx)) { ··· 410 396 asm ( 411 397 " .global bpf_stf_barrier ;" 412 398 " bpf_stf_barrier: ;" 413 - " std 21,-64(1) ;" 414 - " std 22,-56(1) ;" 399 + " std 21,-80(1) ;" 400 + " std 22,-72(1) ;" 415 401 " sync ;" 416 - " ld 21,-64(1) ;" 417 - " ld 22,-56(1) ;" 402 + " ld 21,-80(1) ;" 403 + " ld 22,-72(1) ;" 418 404 " ori 31,31,0 ;" 419 405 " .rept 14 ;" 420 406 " b 1f ;" ··· 422 408 " .endr ;" 423 409 " blr ;" 424 410 ); 411 + 412 + static int bpf_jit_emit_probe_mem_store(struct codegen_context *ctx, u32 src_reg, s16 off, 413 + u32 code, u32 *image) 414 + { 415 + u32 tmp1_reg = bpf_to_ppc(TMP_REG_1); 416 + u32 tmp2_reg = bpf_to_ppc(TMP_REG_2); 417 + 418 + switch (BPF_SIZE(code)) { 419 + case BPF_B: 420 + EMIT(PPC_RAW_STB(src_reg, tmp1_reg, off)); 421 + break; 422 + case BPF_H: 423 + EMIT(PPC_RAW_STH(src_reg, tmp1_reg, off)); 424 + break; 425 + case BPF_W: 426 + EMIT(PPC_RAW_STW(src_reg, tmp1_reg, off)); 427 + break; 428 + case BPF_DW: 429 + if (off % 4) { 430 + EMIT(PPC_RAW_LI(tmp2_reg, off)); 431 + EMIT(PPC_RAW_STDX(src_reg, tmp1_reg, tmp2_reg)); 432 + } else { 433 + EMIT(PPC_RAW_STD(src_reg, tmp1_reg, off)); 434 + } 435 + break; 436 + default: 437 + return -EINVAL; 438 + } 439 + return 0; 440 + } 425 441 426 442 static int emit_atomic_ld_st(const struct bpf_insn insn, struct codegen_context *ctx, u32 *image) 427 443 { ··· 1004 960 } 1005 961 break; 1006 962 963 + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: 964 + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: 965 + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: 966 + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: 967 + 968 + EMIT(PPC_RAW_ADD(tmp1_reg, dst_reg, bpf_to_ppc(ARENA_VM_START))); 969 + 970 + ret = bpf_jit_emit_probe_mem_store(ctx, src_reg, off, code, image); 971 + if (ret) 972 + return ret; 973 + 974 + ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, 975 + ctx->idx - 1, 4, -1, code); 976 + if (ret) 977 + return ret; 978 + 979 + break; 980 + 981 + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: 982 + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: 983 + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: 984 + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: 985 + 986 + EMIT(PPC_RAW_ADD(tmp1_reg, dst_reg, bpf_to_ppc(ARENA_VM_START))); 987 + 988 + if (BPF_SIZE(code) == BPF_W || BPF_SIZE(code) == BPF_DW) { 989 + PPC_LI32(tmp2_reg, imm); 990 + src_reg = tmp2_reg; 991 + } else { 992 + EMIT(PPC_RAW_LI(tmp2_reg, imm)); 993 + src_reg = tmp2_reg; 994 + } 995 + 996 + ret = bpf_jit_emit_probe_mem_store(ctx, src_reg, off, code, image); 997 + if (ret) 998 + return ret; 999 + 1000 + ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, 1001 + ctx->idx - 1, 4, -1, code); 1002 + if (ret) 1003 + return ret; 1004 + 1005 + break; 1006 + 1007 1007 /* 1008 1008 * BPF_STX ATOMIC (atomic ops) 1009 1009 */ ··· 1200 1112 * Check if 'off' is word aligned for BPF_DW, because 1201 1113 * we might generate two instructions. 1202 1114 */ 1203 - if ((BPF_SIZE(code) == BPF_DW || 1204 - (BPF_SIZE(code) == BPF_B && BPF_MODE(code) == BPF_PROBE_MEMSX)) && 1205 - (off & 3)) 1115 + if ((BPF_SIZE(code) == BPF_DW && (off & 3)) || 1116 + (BPF_SIZE(code) == BPF_B && 1117 + BPF_MODE(code) == BPF_PROBE_MEMSX) || 1118 + (BPF_SIZE(code) == BPF_B && BPF_MODE(code) == BPF_MEMSX)) 1206 1119 PPC_JMP((ctx->idx + 3) * 4); 1207 1120 else 1208 1121 PPC_JMP((ctx->idx + 2) * 4); ··· 1249 1160 1250 1161 if (BPF_MODE(code) == BPF_PROBE_MEM) { 1251 1162 ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, 1252 - ctx->idx - 1, 4, dst_reg); 1163 + ctx->idx - 1, 4, dst_reg, code); 1253 1164 if (ret) 1254 1165 return ret; 1255 1166 } 1167 + break; 1168 + 1169 + /* dst = *(u64 *)(ul) (src + ARENA_VM_START + off) */ 1170 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: 1171 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: 1172 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: 1173 + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: 1174 + 1175 + EMIT(PPC_RAW_ADD(tmp1_reg, src_reg, bpf_to_ppc(ARENA_VM_START))); 1176 + 1177 + switch (size) { 1178 + case BPF_B: 1179 + EMIT(PPC_RAW_LBZ(dst_reg, tmp1_reg, off)); 1180 + break; 1181 + case BPF_H: 1182 + EMIT(PPC_RAW_LHZ(dst_reg, tmp1_reg, off)); 1183 + break; 1184 + case BPF_W: 1185 + EMIT(PPC_RAW_LWZ(dst_reg, tmp1_reg, off)); 1186 + break; 1187 + case BPF_DW: 1188 + if (off % 4) { 1189 + EMIT(PPC_RAW_LI(tmp2_reg, off)); 1190 + EMIT(PPC_RAW_LDX(dst_reg, tmp1_reg, tmp2_reg)); 1191 + } else { 1192 + EMIT(PPC_RAW_LD(dst_reg, tmp1_reg, off)); 1193 + } 1194 + break; 1195 + } 1196 + 1197 + if (size != BPF_DW && insn_is_zext(&insn[i + 1])) 1198 + addrs[++i] = ctx->idx * 4; 1199 + 1200 + ret = bpf_add_extable_entry(fp, image, fimage, pass, ctx, 1201 + ctx->idx - 1, 4, dst_reg, code); 1202 + if (ret) 1203 + return ret; 1256 1204 break; 1257 1205 1258 1206 /*