Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

riscv, bpf: Optimize calls

Instead of using emit_imm() and emit_jalr() which can expand to six
instructions, start using jal or auipc+jalr.

Signed-off-by: Björn Töpel <bjorn.topel@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20191216091343.23260-8-bjorn.topel@gmail.com

authored by

Björn Töpel and committed by
Daniel Borkmann
e368b64f 7f3631e8

+64 -37
+64 -37
arch/riscv/net/bpf_jit_comp.c
··· 811 811 *rd = RV_REG_T2; 812 812 } 813 813 814 - static void emit_jump_and_link(u8 rd, int rvoff, struct rv_jit_context *ctx) 814 + static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, 815 + struct rv_jit_context *ctx) 815 816 { 816 817 s64 upper, lower; 817 818 818 - if (is_21b_int(rvoff)) { 819 + if (rvoff && is_21b_int(rvoff) && !force_jalr) { 819 820 emit(rv_jal(rd, rvoff >> 1), ctx); 820 821 return; 821 822 } ··· 831 830 { 832 831 return cond == BPF_JSGT || cond == BPF_JSLT || 833 832 cond == BPF_JSGE || cond == BPF_JSLE; 833 + } 834 + 835 + static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) 836 + { 837 + s64 off = 0; 838 + u64 ip; 839 + u8 rd; 840 + 841 + if (addr && ctx->insns) { 842 + ip = (u64)(long)(ctx->insns + ctx->ninsns); 843 + off = addr - ip; 844 + if (!is_32b_int(off)) { 845 + pr_err("bpf-jit: target call addr %pK is out of range\n", 846 + (void *)addr); 847 + return -ERANGE; 848 + } 849 + } 850 + 851 + emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); 852 + rd = bpf_to_rv_reg(BPF_REG_0, ctx); 853 + emit(rv_addi(rd, RV_REG_A0, 0), ctx); 854 + return 0; 834 855 } 835 856 836 857 static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, ··· 1130 1107 /* JUMP off */ 1131 1108 case BPF_JMP | BPF_JA: 1132 1109 rvoff = rv_offset(i, off, ctx); 1133 - emit_jump_and_link(RV_REG_ZERO, rvoff, ctx); 1110 + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 1134 1111 break; 1135 1112 1136 1113 /* IF (dst COND src) JUMP off */ ··· 1232 1209 case BPF_JMP | BPF_CALL: 1233 1210 { 1234 1211 bool fixed; 1235 - int i, ret; 1212 + int ret; 1236 1213 u64 addr; 1237 1214 1238 1215 mark_call(ctx); ··· 1240 1217 &fixed); 1241 1218 if (ret < 0) 1242 1219 return ret; 1243 - if (fixed) { 1244 - emit_imm(RV_REG_T1, addr, ctx); 1245 - } else { 1246 - i = ctx->ninsns; 1247 - emit_imm(RV_REG_T1, addr, ctx); 1248 - for (i = ctx->ninsns - i; i < 8; i++) { 1249 - /* nop */ 1250 - emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0), 1251 - ctx); 1252 - } 1253 - } 1254 - emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx); 1255 - rd = bpf_to_rv_reg(BPF_REG_0, ctx); 1256 - emit(rv_addi(rd, RV_REG_A0, 0), ctx); 1220 + ret = emit_call(fixed, addr, ctx); 1221 + if (ret) 1222 + return ret; 1257 1223 break; 1258 1224 } 1259 1225 /* tail call */ ··· 1257 1245 break; 1258 1246 1259 1247 rvoff = epilogue_offset(ctx); 1260 - emit_jump_and_link(RV_REG_ZERO, rvoff, ctx); 1248 + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 1261 1249 break; 1262 1250 1263 1251 /* dst = imm64 */ ··· 1520 1508 __build_epilogue(false, ctx); 1521 1509 } 1522 1510 1523 - static int build_body(struct rv_jit_context *ctx, bool extra_pass) 1511 + static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) 1524 1512 { 1525 1513 const struct bpf_prog *prog = ctx->prog; 1526 1514 int i; ··· 1532 1520 ret = emit_insn(insn, ctx, extra_pass); 1533 1521 if (ret > 0) { 1534 1522 i++; 1535 - if (ctx->insns == NULL) 1536 - ctx->offset[i] = ctx->ninsns; 1523 + if (offset) 1524 + offset[i] = ctx->ninsns; 1537 1525 continue; 1538 1526 } 1539 - if (ctx->insns == NULL) 1540 - ctx->offset[i] = ctx->ninsns; 1527 + if (offset) 1528 + offset[i] = ctx->ninsns; 1541 1529 if (ret) 1542 1530 return ret; 1543 1531 } ··· 1565 1553 struct bpf_prog *tmp, *orig_prog = prog; 1566 1554 int pass = 0, prev_ninsns = 0, i; 1567 1555 struct rv_jit_data *jit_data; 1556 + unsigned int image_size = 0; 1568 1557 struct rv_jit_context *ctx; 1569 - unsigned int image_size; 1570 1558 1571 1559 if (!prog->jit_requested) 1572 1560 return orig_prog; ··· 1611 1599 for (i = 0; i < 16; i++) { 1612 1600 pass++; 1613 1601 ctx->ninsns = 0; 1614 - if (build_body(ctx, extra_pass)) { 1602 + if (build_body(ctx, extra_pass, ctx->offset)) { 1615 1603 prog = orig_prog; 1616 1604 goto out_offset; 1617 1605 } 1618 1606 build_prologue(ctx); 1619 1607 ctx->epilogue_offset = ctx->ninsns; 1620 1608 build_epilogue(ctx); 1621 - if (ctx->ninsns == prev_ninsns) 1622 - break; 1609 + 1610 + if (ctx->ninsns == prev_ninsns) { 1611 + if (jit_data->header) 1612 + break; 1613 + 1614 + image_size = sizeof(u32) * ctx->ninsns; 1615 + jit_data->header = 1616 + bpf_jit_binary_alloc(image_size, 1617 + &jit_data->image, 1618 + sizeof(u32), 1619 + bpf_fill_ill_insns); 1620 + if (!jit_data->header) { 1621 + prog = orig_prog; 1622 + goto out_offset; 1623 + } 1624 + 1625 + ctx->insns = (u32 *)jit_data->image; 1626 + /* Now, when the image is allocated, the image 1627 + * can potentially shrink more (auipc/jalr -> 1628 + * jal). 1629 + */ 1630 + } 1623 1631 prev_ninsns = ctx->ninsns; 1624 1632 } 1625 1633 1626 - /* Allocate image, now that we know the size. */ 1627 - image_size = sizeof(u32) * ctx->ninsns; 1628 - jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image, 1629 - sizeof(u32), 1630 - bpf_fill_ill_insns); 1631 - if (!jit_data->header) { 1634 + if (i == 16) { 1635 + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); 1636 + bpf_jit_binary_free(jit_data->header); 1632 1637 prog = orig_prog; 1633 1638 goto out_offset; 1634 1639 } 1635 1640 1636 - /* Second, real pass, that acutally emits the image. */ 1637 - ctx->insns = (u32 *)jit_data->image; 1638 1641 skip_init_ctx: 1639 1642 pass++; 1640 1643 ctx->ninsns = 0; 1641 1644 1642 1645 build_prologue(ctx); 1643 - if (build_body(ctx, extra_pass)) { 1646 + if (build_body(ctx, extra_pass, NULL)) { 1644 1647 bpf_jit_binary_free(jit_data->header); 1645 1648 prog = orig_prog; 1646 1649 goto out_offset;