Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bpf,x64 Emit IMUL instead of MUL for x86-64

IMUL allows for multiple operands and saving and storing rax/rdx is no
longer needed. Signedness of the operands doesn't matter here because
the we only keep the lower 32/64 bit of the product for 32/64 bit
multiplications.

Signed-off-by: Jie Meng <jmeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210913211337.1564014-1-jmeng@fb.com

authored by

Jie Meng and committed by
Alexei Starovoitov
c0354077 67dfac47

+43 -34
+24 -31
arch/x86/net/bpf_jit_comp.c
··· 1070 1070 break; 1071 1071 1072 1072 case BPF_ALU | BPF_MUL | BPF_K: 1073 - case BPF_ALU | BPF_MUL | BPF_X: 1074 1073 case BPF_ALU64 | BPF_MUL | BPF_K: 1075 - case BPF_ALU64 | BPF_MUL | BPF_X: 1076 - { 1077 - bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 1074 + if (BPF_CLASS(insn->code) == BPF_ALU64) 1075 + EMIT1(add_2mod(0x48, dst_reg, dst_reg)); 1076 + else if (is_ereg(dst_reg)) 1077 + EMIT1(add_2mod(0x40, dst_reg, dst_reg)); 1078 1078 1079 - if (dst_reg != BPF_REG_0) 1080 - EMIT1(0x50); /* push rax */ 1081 - if (dst_reg != BPF_REG_3) 1082 - EMIT1(0x52); /* push rdx */ 1083 - 1084 - /* mov r11, dst_reg */ 1085 - EMIT_mov(AUX_REG, dst_reg); 1086 - 1087 - if (BPF_SRC(insn->code) == BPF_X) 1088 - emit_mov_reg(&prog, is64, BPF_REG_0, src_reg); 1079 + if (is_imm8(imm32)) 1080 + /* imul dst_reg, dst_reg, imm8 */ 1081 + EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg), 1082 + imm32); 1089 1083 else 1090 - emit_mov_imm32(&prog, is64, BPF_REG_0, imm32); 1091 - 1092 - if (is64) 1093 - EMIT1(add_1mod(0x48, AUX_REG)); 1094 - else if (is_ereg(AUX_REG)) 1095 - EMIT1(add_1mod(0x40, AUX_REG)); 1096 - /* mul(q) r11 */ 1097 - EMIT2(0xF7, add_1reg(0xE0, AUX_REG)); 1098 - 1099 - if (dst_reg != BPF_REG_3) 1100 - EMIT1(0x5A); /* pop rdx */ 1101 - if (dst_reg != BPF_REG_0) { 1102 - /* mov dst_reg, rax */ 1103 - EMIT_mov(dst_reg, BPF_REG_0); 1104 - EMIT1(0x58); /* pop rax */ 1105 - } 1084 + /* imul dst_reg, dst_reg, imm32 */ 1085 + EMIT2_off32(0x69, 1086 + add_2reg(0xC0, dst_reg, dst_reg), 1087 + imm32); 1106 1088 break; 1107 - } 1089 + 1090 + case BPF_ALU | BPF_MUL | BPF_X: 1091 + case BPF_ALU64 | BPF_MUL | BPF_X: 1092 + if (BPF_CLASS(insn->code) == BPF_ALU64) 1093 + EMIT1(add_2mod(0x48, src_reg, dst_reg)); 1094 + else if (is_ereg(dst_reg) || is_ereg(src_reg)) 1095 + EMIT1(add_2mod(0x40, src_reg, dst_reg)); 1096 + 1097 + /* imul dst_reg, src_reg */ 1098 + EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg)); 1099 + break; 1100 + 1108 1101 /* Shifts */ 1109 1102 case BPF_ALU | BPF_LSH | BPF_K: 1110 1103 case BPF_ALU | BPF_RSH | BPF_K:
+19 -3
tools/testing/selftests/bpf/verifier/jit.c
··· 62 62 BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), 63 63 BPF_MOV64_IMM(BPF_REG_0, 1), 64 64 BPF_EXIT_INSN(), 65 + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), 66 + BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef), 67 + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), 68 + BPF_MOV64_IMM(BPF_REG_0, 1), 69 + BPF_EXIT_INSN(), 65 70 BPF_MOV32_REG(BPF_REG_2, BPF_REG_2), 66 71 BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), 67 72 BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1), ··· 78 73 BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), 79 74 BPF_MOV64_IMM(BPF_REG_0, 1), 80 75 BPF_EXIT_INSN(), 76 + BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL), 77 + BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef), 78 + BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2), 79 + BPF_MOV64_IMM(BPF_REG_0, 1), 80 + BPF_EXIT_INSN(), 81 + BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL), 82 + BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL), 83 + BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b), 84 + BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2), 85 + BPF_MOV64_IMM(BPF_REG_0, 1), 86 + BPF_EXIT_INSN(), 81 87 BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL), 82 88 BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL), 83 - BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL), 84 - BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1), 85 - BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2), 89 + BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL), 90 + BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1), 91 + BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2), 86 92 BPF_MOV64_IMM(BPF_REG_0, 1), 87 93 BPF_EXIT_INSN(), 88 94 BPF_MOV64_IMM(BPF_REG_0, 2),