Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf: arm64: remove callee-save registers use for tmp registers

In the current implementation of ARM64 eBPF JIT, R23 and R24 are used for
tmp registers, which are callee-saved registers. This leads to variable size
of JIT prologue and epilogue. The latest blinding constant change prefers to
constant size of prologue and epilogue. AAPCS reserves R9 ~ R15 for temp
registers which not need to be saved/restored during function call. So, replace
R23 and R24 to R10 and R11, and remove tmp_used flag to save 2 instructions for
some jited BPF program.

CC: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Zi Shen Lim <zlim.lnx@gmail.com>
Signed-off-by: Yang Shi <yang.shi@linaro.org>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Yang Shi and committed by
David S. Miller
4c1cd4fd cd9e2e5d

+5 -29
+5 -29
arch/arm64/net/bpf_jit_comp.c
··· 51 51 [BPF_REG_9] = A64_R(22), 52 52 /* read-only frame pointer to access stack */ 53 53 [BPF_REG_FP] = A64_R(25), 54 - /* temporary register for internal BPF JIT */ 55 - [TMP_REG_1] = A64_R(23), 56 - [TMP_REG_2] = A64_R(24), 54 + /* temporary registers for internal BPF JIT */ 55 + [TMP_REG_1] = A64_R(10), 56 + [TMP_REG_2] = A64_R(11), 57 57 /* temporary register for blinding constants */ 58 58 [BPF_REG_AX] = A64_R(9), 59 59 }; ··· 61 61 struct jit_ctx { 62 62 const struct bpf_prog *prog; 63 63 int idx; 64 - int tmp_used; 65 64 int epilogue_offset; 66 65 int *offset; 67 66 u32 *image; ··· 153 154 const u8 r8 = bpf2a64[BPF_REG_8]; 154 155 const u8 r9 = bpf2a64[BPF_REG_9]; 155 156 const u8 fp = bpf2a64[BPF_REG_FP]; 156 - const u8 tmp1 = bpf2a64[TMP_REG_1]; 157 - const u8 tmp2 = bpf2a64[TMP_REG_2]; 158 157 159 158 /* 160 159 * BPF prog stack layout ··· 164 167 * | ... | callee saved registers 165 168 * +-----+ 166 169 * | | x25/x26 167 - * BPF fp register => -80:+-----+ <= (BPF_FP) 170 + * BPF fp register => -64:+-----+ <= (BPF_FP) 168 171 * | | 169 172 * | ... | BPF prog stack 170 173 * | | ··· 186 189 /* Save callee-saved register */ 187 190 emit(A64_PUSH(r6, r7, A64_SP), ctx); 188 191 emit(A64_PUSH(r8, r9, A64_SP), ctx); 189 - if (ctx->tmp_used) 190 - emit(A64_PUSH(tmp1, tmp2, A64_SP), ctx); 191 192 192 193 /* Save fp (x25) and x26. SP requires 16 bytes alignment */ 193 194 emit(A64_PUSH(fp, A64_R(26), A64_SP), ctx); ··· 205 210 const u8 r8 = bpf2a64[BPF_REG_8]; 206 211 const u8 r9 = bpf2a64[BPF_REG_9]; 207 212 const u8 fp = bpf2a64[BPF_REG_FP]; 208 - const u8 tmp1 = bpf2a64[TMP_REG_1]; 209 - const u8 tmp2 = bpf2a64[TMP_REG_2]; 210 213 211 214 /* We're done with BPF stack */ 212 215 emit(A64_ADD_I(1, A64_SP, A64_SP, STACK_SIZE), ctx); ··· 213 220 emit(A64_POP(fp, A64_R(26), A64_SP), ctx); 214 221 215 222 /* Restore callee-saved register */ 216 - if (ctx->tmp_used) 217 - emit(A64_POP(tmp1, tmp2, A64_SP), ctx); 218 223 emit(A64_POP(r8, r9, A64_SP), ctx); 219 224 emit(A64_POP(r6, r7, A64_SP), ctx); 220 225 ··· 308 317 emit(A64_UDIV(is64, dst, dst, src), ctx); 309 318 break; 310 319 case BPF_MOD: 311 - ctx->tmp_used = 1; 312 320 emit(A64_UDIV(is64, tmp, dst, src), ctx); 313 321 emit(A64_MUL(is64, tmp, tmp, src), ctx); 314 322 emit(A64_SUB(is64, dst, dst, tmp), ctx); ··· 380 390 /* dst = dst OP imm */ 381 391 case BPF_ALU | BPF_ADD | BPF_K: 382 392 case BPF_ALU64 | BPF_ADD | BPF_K: 383 - ctx->tmp_used = 1; 384 393 emit_a64_mov_i(is64, tmp, imm, ctx); 385 394 emit(A64_ADD(is64, dst, dst, tmp), ctx); 386 395 break; 387 396 case BPF_ALU | BPF_SUB | BPF_K: 388 397 case BPF_ALU64 | BPF_SUB | BPF_K: 389 - ctx->tmp_used = 1; 390 398 emit_a64_mov_i(is64, tmp, imm, ctx); 391 399 emit(A64_SUB(is64, dst, dst, tmp), ctx); 392 400 break; 393 401 case BPF_ALU | BPF_AND | BPF_K: 394 402 case BPF_ALU64 | BPF_AND | BPF_K: 395 - ctx->tmp_used = 1; 396 403 emit_a64_mov_i(is64, tmp, imm, ctx); 397 404 emit(A64_AND(is64, dst, dst, tmp), ctx); 398 405 break; 399 406 case BPF_ALU | BPF_OR | BPF_K: 400 407 case BPF_ALU64 | BPF_OR | BPF_K: 401 - ctx->tmp_used = 1; 402 408 emit_a64_mov_i(is64, tmp, imm, ctx); 403 409 emit(A64_ORR(is64, dst, dst, tmp), ctx); 404 410 break; 405 411 case BPF_ALU | BPF_XOR | BPF_K: 406 412 case BPF_ALU64 | BPF_XOR | BPF_K: 407 - ctx->tmp_used = 1; 408 413 emit_a64_mov_i(is64, tmp, imm, ctx); 409 414 emit(A64_EOR(is64, dst, dst, tmp), ctx); 410 415 break; 411 416 case BPF_ALU | BPF_MUL | BPF_K: 412 417 case BPF_ALU64 | BPF_MUL | BPF_K: 413 - ctx->tmp_used = 1; 414 418 emit_a64_mov_i(is64, tmp, imm, ctx); 415 419 emit(A64_MUL(is64, dst, dst, tmp), ctx); 416 420 break; 417 421 case BPF_ALU | BPF_DIV | BPF_K: 418 422 case BPF_ALU64 | BPF_DIV | BPF_K: 419 - ctx->tmp_used = 1; 420 423 emit_a64_mov_i(is64, tmp, imm, ctx); 421 424 emit(A64_UDIV(is64, dst, dst, tmp), ctx); 422 425 break; 423 426 case BPF_ALU | BPF_MOD | BPF_K: 424 427 case BPF_ALU64 | BPF_MOD | BPF_K: 425 - ctx->tmp_used = 1; 426 428 emit_a64_mov_i(is64, tmp2, imm, ctx); 427 429 emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 428 430 emit(A64_MUL(is64, tmp, tmp, tmp2), ctx); ··· 485 503 case BPF_JMP | BPF_JNE | BPF_K: 486 504 case BPF_JMP | BPF_JSGT | BPF_K: 487 505 case BPF_JMP | BPF_JSGE | BPF_K: 488 - ctx->tmp_used = 1; 489 506 emit_a64_mov_i(1, tmp, imm, ctx); 490 507 emit(A64_CMP(1, dst, tmp), ctx); 491 508 goto emit_cond_jmp; 492 509 case BPF_JMP | BPF_JSET | BPF_K: 493 - ctx->tmp_used = 1; 494 510 emit_a64_mov_i(1, tmp, imm, ctx); 495 511 emit(A64_TST(1, dst, tmp), ctx); 496 512 goto emit_cond_jmp; ··· 498 518 const u8 r0 = bpf2a64[BPF_REG_0]; 499 519 const u64 func = (u64)__bpf_call_base + imm; 500 520 501 - ctx->tmp_used = 1; 502 521 emit_a64_mov_i64(tmp, func, ctx); 503 522 emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); 504 523 emit(A64_MOV(1, A64_FP, A64_SP), ctx); ··· 543 564 case BPF_LDX | BPF_MEM | BPF_H: 544 565 case BPF_LDX | BPF_MEM | BPF_B: 545 566 case BPF_LDX | BPF_MEM | BPF_DW: 546 - ctx->tmp_used = 1; 547 567 emit_a64_mov_i(1, tmp, off, ctx); 548 568 switch (BPF_SIZE(code)) { 549 569 case BPF_W: ··· 566 588 case BPF_ST | BPF_MEM | BPF_B: 567 589 case BPF_ST | BPF_MEM | BPF_DW: 568 590 /* Load imm to a register then store it */ 569 - ctx->tmp_used = 1; 570 591 emit_a64_mov_i(1, tmp2, off, ctx); 571 592 emit_a64_mov_i(1, tmp, imm, ctx); 572 593 switch (BPF_SIZE(code)) { ··· 589 612 case BPF_STX | BPF_MEM | BPF_H: 590 613 case BPF_STX | BPF_MEM | BPF_B: 591 614 case BPF_STX | BPF_MEM | BPF_DW: 592 - ctx->tmp_used = 1; 593 615 emit_a64_mov_i(1, tmp, off, ctx); 594 616 switch (BPF_SIZE(code)) { 595 617 case BPF_W: ··· 774 798 775 799 /* 1. Initial fake pass to compute ctx->idx. */ 776 800 777 - /* Fake pass to fill in ctx->offset and ctx->tmp_used. */ 801 + /* Fake pass to fill in ctx->offset. */ 778 802 if (build_body(&ctx)) { 779 803 prog = orig_prog; 780 804 goto out_off;