Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

riscv, bpf: Introduce shift add helper with Zba optimization

Zba extension is very useful for generating addresses that index into array
of basic data types. This patch introduces sh2add and sh3add helpers for
RV32 and RV64 respectively, to accelerate addressing for array of unsigned
long data.

Signed-off-by: Xiao Wang <xiao.w.wang@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/bpf/20240524075543.4050464-3-xiao.w.wang@intel.com

authored by

Xiao Wang and committed by
Daniel Borkmann
96a27ee7 531876c8

+37 -8
+33
arch/riscv/net/bpf_jit.h
··· 742 742 return rv_css_insn(0x6, imm, rs2, 0x2); 743 743 } 744 744 745 + /* RVZBA instructions. */ 746 + static inline u32 rvzba_sh2add(u8 rd, u8 rs1, u8 rs2) 747 + { 748 + return rv_r_insn(0x10, rs2, rs1, 0x4, rd, 0x33); 749 + } 750 + 751 + static inline u32 rvzba_sh3add(u8 rd, u8 rs1, u8 rs2) 752 + { 753 + return rv_r_insn(0x10, rs2, rs1, 0x6, rd, 0x33); 754 + } 755 + 745 756 /* RVZBB instructions. */ 746 757 static inline u32 rvzbb_sextb(u8 rd, u8 rs1) 747 758 { ··· 1104 1093 emitc(rvc_sw(rs1, off, rs2), ctx); 1105 1094 else 1106 1095 emit(rv_sw(rs1, off, rs2), ctx); 1096 + } 1097 + 1098 + static inline void emit_sh2add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 1099 + { 1100 + if (rvzba_enabled()) { 1101 + emit(rvzba_sh2add(rd, rs1, rs2), ctx); 1102 + return; 1103 + } 1104 + 1105 + emit_slli(rd, rs1, 2, ctx); 1106 + emit_add(rd, rd, rs2, ctx); 1107 + } 1108 + 1109 + static inline void emit_sh3add(u8 rd, u8 rs1, u8 rs2, struct rv_jit_context *ctx) 1110 + { 1111 + if (rvzba_enabled()) { 1112 + emit(rvzba_sh3add(rd, rs1, rs2), ctx); 1113 + return; 1114 + } 1115 + 1116 + emit_slli(rd, rs1, 3, ctx); 1117 + emit_add(rd, rd, rs2, ctx); 1107 1118 } 1108 1119 1109 1120 /* RV64-only helper functions. */
+1 -2
arch/riscv/net/bpf_jit_comp32.c
··· 811 811 * if (!prog) 812 812 * goto out; 813 813 */ 814 - emit(rv_slli(RV_REG_T0, lo(idx_reg), 2), ctx); 815 - emit(rv_add(RV_REG_T0, RV_REG_T0, lo(arr_reg)), ctx); 814 + emit_sh2add(RV_REG_T0, lo(idx_reg), lo(arr_reg), ctx); 816 815 off = offsetof(struct bpf_array, ptrs); 817 816 if (is_12b_check(off, insn)) 818 817 return -1;
+3 -6
arch/riscv/net/bpf_jit_comp64.c
··· 380 380 * if (!prog) 381 381 * goto out; 382 382 */ 383 - emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx); 384 - emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx); 383 + emit_sh3add(RV_REG_T2, RV_REG_A2, RV_REG_A1, ctx); 385 384 off = offsetof(struct bpf_array, ptrs); 386 385 if (is_12b_check(off, insn)) 387 386 return -1; ··· 1098 1099 /* Load current CPU number in T1 */ 1099 1100 emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), 1100 1101 RV_REG_TP, ctx); 1101 - /* << 3 because offsets are 8 bytes */ 1102 - emit_slli(RV_REG_T1, RV_REG_T1, 3, ctx); 1103 1102 /* Load address of __per_cpu_offset array in T2 */ 1104 1103 emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); 1105 - /* Add offset of current CPU to __per_cpu_offset */ 1106 - emit_add(RV_REG_T1, RV_REG_T2, RV_REG_T1, ctx); 1104 + /* Get address of __per_cpu_offset[cpu] in T1 */ 1105 + emit_sh3add(RV_REG_T1, RV_REG_T1, RV_REG_T2, ctx); 1107 1106 /* Load __per_cpu_offset[cpu] in T1 */ 1108 1107 emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); 1109 1108 /* Add the offset to Rd */