Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arm32, bpf: add support for 64 bit division instruction

ARM32 doesn't have instructions to do 64-bit/64-bit divisions. So, to
implement the following instructions:
BPF_ALU64 | BPF_DIV
BPF_ALU64 | BPF_MOD
BPF_ALU64 | BPF_SDIV
BPF_ALU64 | BPF_SMOD

We implement the above instructions by doing function calls to div64_u64()
and div64_u64_rem() for unsigned division/mod and calls to div64_s64()
for signed division/mod.

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Link: https://lore.kernel.org/r/20230907230550.1417590-7-puranjay12@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Puranjay Mohan and committed by
Alexei Starovoitov
71086041 5097faa5

+115 -1
+115 -1
arch/arm/net/bpf_jit_32.c
··· 2 2 /* 3 3 * Just-In-Time compiler for eBPF filters on 32bit ARM 4 4 * 5 + * Copyright (c) 2023 Puranjay Mohan <puranjay12@gmail.com> 5 6 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> 6 7 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com> 7 8 */ ··· 16 15 #include <linux/string.h> 17 16 #include <linux/slab.h> 18 17 #include <linux/if_vlan.h> 18 + #include <linux/math64.h> 19 19 20 20 #include <asm/cacheflush.h> 21 21 #include <asm/hwcap.h> ··· 238 236 static s32 jit_smod32(s32 dividend, s32 divisor) 239 237 { 240 238 return dividend % divisor; 239 + } 240 + 241 + /* Wrappers for 64-bit div/mod */ 242 + static u64 jit_udiv64(u64 dividend, u64 divisor) 243 + { 244 + return div64_u64(dividend, divisor); 245 + } 246 + 247 + static u64 jit_mod64(u64 dividend, u64 divisor) 248 + { 249 + u64 rem; 250 + 251 + div64_u64_rem(dividend, divisor, &rem); 252 + return rem; 253 + } 254 + 255 + static s64 jit_sdiv64(s64 dividend, s64 divisor) 256 + { 257 + return div64_s64(dividend, divisor); 258 + } 259 + 260 + static s64 jit_smod64(s64 dividend, s64 divisor) 261 + { 262 + u64 q; 263 + 264 + q = div64_s64(dividend, divisor); 265 + 266 + return dividend - q * divisor; 241 267 } 242 268 243 269 static inline void _emit(int cond, u32 inst, struct jit_ctx *ctx) ··· 583 553 emit(ARM_MOV_R(ARM_R1, tmp[0]), ctx); 584 554 if (rm != ARM_R0) 585 555 emit(ARM_MOV_R(ARM_R0, tmp[1]), ctx); 556 + } 557 + 558 + static inline void emit_udivmod64(const s8 *rd, const s8 *rm, const s8 *rn, struct jit_ctx *ctx, 559 + u8 op, u8 sign) 560 + { 561 + u32 dst; 562 + 563 + /* Push caller-saved registers on stack */ 564 + emit(ARM_PUSH(CALLER_MASK), ctx); 565 + 566 + /* 567 + * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in 568 + * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack, 569 + * we can recover them later after returning from the function call. 570 + */ 571 + if (rm[1] != ARM_R0 || rn[1] != ARM_R2) { 572 + /* 573 + * Move Rm to {R1, R0} if it is not already there. 574 + */ 575 + if (rm[1] != ARM_R0) { 576 + if (rn[1] == ARM_R0) 577 + emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx); 578 + emit(ARM_MOV_R(ARM_R1, rm[0]), ctx); 579 + emit(ARM_MOV_R(ARM_R0, rm[1]), ctx); 580 + if (rn[1] == ARM_R0) { 581 + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); 582 + goto cont; 583 + } 584 + } 585 + /* 586 + * Move Rn to {R3, R2} if it is not already there. 587 + */ 588 + if (rn[1] != ARM_R2) { 589 + emit(ARM_MOV_R(ARM_R3, rn[0]), ctx); 590 + emit(ARM_MOV_R(ARM_R2, rn[1]), ctx); 591 + } 592 + } 593 + 594 + cont: 595 + 596 + /* Call appropriate function */ 597 + if (sign) { 598 + if (op == BPF_DIV) 599 + dst = (u32)jit_sdiv64; 600 + else 601 + dst = (u32)jit_smod64; 602 + } else { 603 + if (op == BPF_DIV) 604 + dst = (u32)jit_udiv64; 605 + else 606 + dst = (u32)jit_mod64; 607 + } 608 + 609 + emit_mov_i(ARM_IP, dst, ctx); 610 + emit_blx_r(ARM_IP, ctx); 611 + 612 + /* Save return value */ 613 + if (rd[1] != ARM_R0) { 614 + emit(ARM_MOV_R(rd[0], ARM_R1), ctx); 615 + emit(ARM_MOV_R(rd[1], ARM_R0), ctx); 616 + } 617 + 618 + /* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */ 619 + if (rd[1] != ARM_R0 && rd[1] != ARM_R2) { 620 + emit(ARM_POP(CALLER_MASK), ctx); 621 + } else if (rd[1] != ARM_R0) { 622 + emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx); 623 + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); 624 + } else { 625 + emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx); 626 + emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); 627 + } 586 628 } 587 629 588 630 /* Is the translated BPF register on stack? */ ··· 1684 1582 case BPF_ALU64 | BPF_DIV | BPF_X: 1685 1583 case BPF_ALU64 | BPF_MOD | BPF_K: 1686 1584 case BPF_ALU64 | BPF_MOD | BPF_X: 1687 - goto notyet; 1585 + rd = arm_bpf_get_reg64(dst, tmp2, ctx); 1586 + switch (BPF_SRC(code)) { 1587 + case BPF_X: 1588 + rs = arm_bpf_get_reg64(src, tmp, ctx); 1589 + break; 1590 + case BPF_K: 1591 + rs = tmp; 1592 + emit_a32_mov_se_i64(is64, rs, imm, ctx); 1593 + break; 1594 + } 1595 + emit_udivmod64(rd, rd, rs, ctx, BPF_OP(code), off); 1596 + arm_bpf_put_reg64(dst, rd, ctx); 1597 + break; 1688 1598 /* dst = dst << imm */ 1689 1599 /* dst = dst >> imm */ 1690 1600 /* dst = dst >> imm (signed) */