Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARM: net: support BPF_ALU | BPF_MOD instructions in the BPF JIT.

For ARMv7 with UDIV instruction support, generate an UDIV instruction
followed by an MLS instruction.

For other ARM variants, generate code calling a C wrapper similar to
the jit_udiv() function used for BPF_ALU | BPF_DIV instructions.

Some performance numbers reported by the test_bpf module (the duration
per filter run is reported in nanoseconds, between "jitted:<x>" and
"PASS":

ARMv7 QEMU nojit: test_bpf: #3 DIV_MOD_KX jited:0 2196 PASS
ARMv7 QEMU jit: test_bpf: #3 DIV_MOD_KX jited:1 104 PASS
ARMv5 QEMU nojit: test_bpf: #3 DIV_MOD_KX jited:0 2176 PASS
ARMv5 QEMU jit: test_bpf: #3 DIV_MOD_KX jited:1 1104 PASS
ARMv5 kirkwood nojit: test_bpf: #3 DIV_MOD_KX jited:0 1103 PASS
ARMv5 kirkwood jit: test_bpf: #3 DIV_MOD_KX jited:1 311 PASS

Signed-off-by: Nicolas Schichan <nschichan@freebox.fr>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Nicolas Schichan and committed by
David S. Miller
4560cdff df7b6015

+37 -6
+32 -6
arch/arm/net/bpf_jit_32.c
··· 125 125 } 126 126 127 127 /* 128 - * Wrapper that handles both OABI and EABI and assures Thumb2 interworking 128 + * Wrappers which handle both OABI and EABI and assures Thumb2 interworking 129 129 * (where the assembly routines like __aeabi_uidiv could cause problems). 130 130 */ 131 131 static u32 jit_udiv(u32 dividend, u32 divisor) 132 132 { 133 133 return dividend / divisor; 134 + } 135 + 136 + static u32 jit_mod(u32 dividend, u32 divisor) 137 + { 138 + return dividend % divisor; 134 139 } 135 140 136 141 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) ··· 476 471 #endif 477 472 } 478 473 479 - static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) 474 + static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, 475 + int bpf_op) 480 476 { 481 477 #if __LINUX_ARM_ARCH__ == 7 482 478 if (elf_hwcap & HWCAP_IDIVA) { 483 - emit(ARM_UDIV(rd, rm, rn), ctx); 479 + if (bpf_op == BPF_DIV) 480 + emit(ARM_UDIV(rd, rm, rn), ctx); 481 + else { 482 + emit(ARM_UDIV(ARM_R3, rm, rn), ctx); 483 + emit(ARM_MLS(rd, rn, ARM_R3, rm), ctx); 484 + } 484 485 return; 485 486 } 486 487 #endif ··· 507 496 emit(ARM_MOV_R(ARM_R0, rm), ctx); 508 497 509 498 ctx->seen |= SEEN_CALL; 510 - emit_mov_i(ARM_R3, (u32)jit_udiv, ctx); 499 + emit_mov_i(ARM_R3, bpf_op == BPF_DIV ? (u32)jit_udiv : (u32)jit_mod, 500 + ctx); 511 501 emit_blx_r(ARM_R3, ctx); 512 502 513 503 if (rd != ARM_R0) ··· 709 697 if (k == 1) 710 698 break; 711 699 emit_mov_i(r_scratch, k, ctx); 712 - emit_udiv(r_A, r_A, r_scratch, ctx); 700 + emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_DIV); 713 701 break; 714 702 case BPF_ALU | BPF_DIV | BPF_X: 715 703 update_on_xread(ctx); 716 704 emit(ARM_CMP_I(r_X, 0), ctx); 717 705 emit_err_ret(ARM_COND_EQ, ctx); 718 - emit_udiv(r_A, r_A, r_X, ctx); 706 + emit_udivmod(r_A, r_A, r_X, ctx, BPF_DIV); 707 + break; 708 + case BPF_ALU | BPF_MOD | BPF_K: 709 + if (k == 1) { 710 + emit_mov_i(r_A, 0, ctx); 711 + break; 712 + } 713 + emit_mov_i(r_scratch, k, ctx); 714 + emit_udivmod(r_A, r_A, r_scratch, ctx, BPF_MOD); 715 + break; 716 + case BPF_ALU | BPF_MOD | BPF_X: 717 + update_on_xread(ctx); 718 + emit(ARM_CMP_I(r_X, 0), ctx); 719 + emit_err_ret(ARM_COND_EQ, ctx); 720 + emit_udivmod(r_A, r_A, r_X, ctx, BPF_MOD); 719 721 break; 720 722 case BPF_ALU | BPF_OR | BPF_K: 721 723 /* A |= K */
+5
arch/arm/net/bpf_jit_32.h
··· 115 115 116 116 #define ARM_INST_UMULL 0x00800090 117 117 118 + #define ARM_INST_MLS 0x00600090 119 + 118 120 /* 119 121 * Use a suitable undefined instruction to use for ARM/Thumb2 faulting. 120 122 * We need to be careful not to conflict with those used by other modules ··· 211 209 212 210 #define ARM_UMULL(rd_lo, rd_hi, rn, rm) (ARM_INST_UMULL | (rd_hi) << 16 \ 213 211 | (rd_lo) << 12 | (rm) << 8 | rn) 212 + 213 + #define ARM_MLS(rd, rn, rm, ra) (ARM_INST_MLS | (rd) << 16 | (rn) | (rm) << 8 \ 214 + | (ra) << 12) 214 215 215 216 #endif /* PFILTER_OPCODES_ARM_H */