Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

parisc: Add 32-bit eBPF JIT compiler

Signed-off-by: Helge Deller <deller@gmx.de>

+1615
+1615
arch/parisc/net/bpf_jit_comp32.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * BPF JIT compiler for PA-RISC (32-bit) 4 + * 5 + * Copyright (c) 2023 Helge Deller <deller@gmx.de> 6 + * 7 + * The code is based on the BPF JIT compiler for RV64 by Björn Töpel and 8 + * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan. 9 + */ 10 + 11 + #include <linux/bpf.h> 12 + #include <linux/filter.h> 13 + #include <linux/libgcc.h> 14 + #include "bpf_jit.h" 15 + 16 + /* 17 + * Stack layout during BPF program execution (note: stack grows up): 18 + * 19 + * high 20 + * HPPA32 sp => +----------+ <= HPPA32 fp 21 + * | saved sp | 22 + * | saved rp | 23 + * | ... | HPPA32 callee-saved registers 24 + * | curr args| 25 + * | local var| 26 + * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS) 27 + * | lo(R9) | 28 + * | hi(R9) | 29 + * | lo(FP) | JIT scratch space for BPF registers 30 + * | hi(FP) | 31 + * | ... | 32 + * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS 33 + * | | - 4 * BPF_JIT_SCRATCH_REGS) 34 + * | | 35 + * | ... | BPF program stack 36 + * | | 37 + * | ... | Function call stack 38 + * | | 39 + * +----------+ 40 + * low 41 + */ 42 + 43 + enum { 44 + /* Stack layout - these are offsets from top of JIT scratch space. */ 45 + BPF_R8_HI, 46 + BPF_R8_LO, 47 + BPF_R9_HI, 48 + BPF_R9_LO, 49 + BPF_FP_HI, 50 + BPF_FP_LO, 51 + BPF_AX_HI, 52 + BPF_AX_LO, 53 + BPF_R0_TEMP_HI, 54 + BPF_R0_TEMP_LO, 55 + BPF_JIT_SCRATCH_REGS, 56 + }; 57 + 58 + /* Number of callee-saved registers stored to stack: rp, r3-r18. */ 59 + #define NR_SAVED_REGISTERS (18 - 3 + 1 + 8) 60 + 61 + /* Offset from fp for BPF registers stored on stack. */ 62 + #define STACK_OFFSET(k) (- (NR_SAVED_REGISTERS + k + 1)) 63 + #define STACK_ALIGN FRAME_SIZE 64 + 65 + #define EXIT_PTR_LOAD(reg) hppa_ldw(-0x08, HPPA_REG_SP, reg) 66 + #define EXIT_PTR_STORE(reg) hppa_stw(reg, -0x08, HPPA_REG_SP) 67 + #define EXIT_PTR_JUMP(reg, nop) hppa_bv(HPPA_REG_ZERO, reg, nop) 68 + 69 + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 70 + #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 71 + #define TMP_REG_R0 (MAX_BPF_JIT_REG + 2) 72 + 73 + static const s8 regmap[][2] = { 74 + /* Return value from in-kernel function, and exit value from eBPF. */ 75 + [BPF_REG_0] = {HPPA_REG_RET0, HPPA_REG_RET1}, /* HI/LOW */ 76 + 77 + /* Arguments from eBPF program to in-kernel function. */ 78 + [BPF_REG_1] = {HPPA_R(3), HPPA_R(4)}, 79 + [BPF_REG_2] = {HPPA_R(5), HPPA_R(6)}, 80 + [BPF_REG_3] = {HPPA_R(7), HPPA_R(8)}, 81 + [BPF_REG_4] = {HPPA_R(9), HPPA_R(10)}, 82 + [BPF_REG_5] = {HPPA_R(11), HPPA_R(12)}, 83 + 84 + [BPF_REG_6] = {HPPA_R(13), HPPA_R(14)}, 85 + [BPF_REG_7] = {HPPA_R(15), HPPA_R(16)}, 86 + /* 87 + * Callee-saved registers that in-kernel function will preserve. 88 + * Stored on the stack. 89 + */ 90 + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, 91 + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, 92 + 93 + /* Read-only frame pointer to access BPF stack. Not needed. */ 94 + [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, 95 + 96 + /* Temporary register for blinding constants. Stored on the stack. */ 97 + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, 98 + /* 99 + * Temporary registers used by the JIT to operate on registers stored 100 + * on the stack. Save t0 and t1 to be used as temporaries in generated 101 + * code. 102 + */ 103 + [TMP_REG_1] = {HPPA_REG_T3, HPPA_REG_T2}, 104 + [TMP_REG_2] = {HPPA_REG_T5, HPPA_REG_T4}, 105 + 106 + /* temporary space for BPF_R0 during libgcc and millicode calls */ 107 + [TMP_REG_R0] = {STACK_OFFSET(BPF_R0_TEMP_HI), STACK_OFFSET(BPF_R0_TEMP_LO)}, 108 + }; 109 + 110 + static s8 hi(const s8 *r) 111 + { 112 + return r[0]; 113 + } 114 + 115 + static s8 lo(const s8 *r) 116 + { 117 + return r[1]; 118 + } 119 + 120 + static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx) 121 + { 122 + REG_SET_SEEN(ctx, rd); 123 + if (OPTIMIZE_HPPA && (rs == rd)) 124 + return; 125 + REG_SET_SEEN(ctx, rs); 126 + emit(hppa_copy(rs, rd), ctx); 127 + } 128 + 129 + static void emit_hppa_xor(const s8 r1, const s8 r2, const s8 r3, struct hppa_jit_context *ctx) 130 + { 131 + REG_SET_SEEN(ctx, r1); 132 + REG_SET_SEEN(ctx, r2); 133 + REG_SET_SEEN(ctx, r3); 134 + if (OPTIMIZE_HPPA && (r1 == r2)) { 135 + emit(hppa_copy(HPPA_REG_ZERO, r3), ctx); 136 + } else { 137 + emit(hppa_xor(r1, r2, r3), ctx); 138 + } 139 + } 140 + 141 + static void emit_imm(const s8 rd, s32 imm, struct hppa_jit_context *ctx) 142 + { 143 + u32 lower = im11(imm); 144 + 145 + REG_SET_SEEN(ctx, rd); 146 + if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) { 147 + emit(hppa_ldi(imm, rd), ctx); 148 + return; 149 + } 150 + emit(hppa_ldil(imm, rd), ctx); 151 + if (OPTIMIZE_HPPA && (lower == 0)) 152 + return; 153 + emit(hppa_ldo(lower, rd, rd), ctx); 154 + } 155 + 156 + static void emit_imm32(const s8 *rd, s32 imm, struct hppa_jit_context *ctx) 157 + { 158 + /* Emit immediate into lower bits. */ 159 + REG_SET_SEEN(ctx, lo(rd)); 160 + emit_imm(lo(rd), imm, ctx); 161 + 162 + /* Sign-extend into upper bits. */ 163 + REG_SET_SEEN(ctx, hi(rd)); 164 + if (imm >= 0) 165 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 166 + else 167 + emit(hppa_ldi(-1, hi(rd)), ctx); 168 + } 169 + 170 + static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo, 171 + struct hppa_jit_context *ctx) 172 + { 173 + emit_imm(hi(rd), imm_hi, ctx); 174 + emit_imm(lo(rd), imm_lo, ctx); 175 + } 176 + 177 + static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx) 178 + { 179 + const s8 *r0 = regmap[BPF_REG_0]; 180 + int i; 181 + 182 + if (is_tail_call) { 183 + /* 184 + * goto *(t0 + 4); 185 + * Skips first instruction of prologue which initializes tail 186 + * call counter. Assumes t0 contains address of target program, 187 + * see emit_bpf_tail_call. 188 + */ 189 + emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx); 190 + emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx); 191 + /* in delay slot: */ 192 + emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx); 193 + 194 + return; 195 + } 196 + 197 + /* load epilogue function pointer and jump to it. */ 198 + /* exit point is either directly below, or the outest TCC exit function */ 199 + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); 200 + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); 201 + 202 + /* NOTE: we are 32-bit and big-endian, so return lower 32-bit value */ 203 + emit_hppa_copy(lo(r0), HPPA_REG_RET0, ctx); 204 + 205 + /* Restore callee-saved registers. */ 206 + for (i = 3; i <= 18; i++) { 207 + if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i))) 208 + continue; 209 + emit(hppa_ldw(-REG_SIZE * (8 + (i-3)), HPPA_REG_SP, HPPA_R(i)), ctx); 210 + } 211 + 212 + /* load original return pointer (stored by outest TCC function) */ 213 + emit(hppa_ldw(-0x14, HPPA_REG_SP, HPPA_REG_RP), ctx); 214 + emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx); 215 + /* in delay slot: */ 216 + emit(hppa_ldw(-0x04, HPPA_REG_SP, HPPA_REG_SP), ctx); 217 + } 218 + 219 + static bool is_stacked(s8 reg) 220 + { 221 + return reg < 0; 222 + } 223 + 224 + static const s8 *bpf_get_reg64_offset(const s8 *reg, const s8 *tmp, 225 + u16 offset_sp, struct hppa_jit_context *ctx) 226 + { 227 + if (is_stacked(hi(reg))) { 228 + emit(hppa_ldw(REG_SIZE * hi(reg) - offset_sp, HPPA_REG_SP, hi(tmp)), ctx); 229 + emit(hppa_ldw(REG_SIZE * lo(reg) - offset_sp, HPPA_REG_SP, lo(tmp)), ctx); 230 + reg = tmp; 231 + } 232 + REG_SET_SEEN(ctx, hi(reg)); 233 + REG_SET_SEEN(ctx, lo(reg)); 234 + return reg; 235 + } 236 + 237 + static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp, 238 + struct hppa_jit_context *ctx) 239 + { 240 + return bpf_get_reg64_offset(reg, tmp, 0, ctx); 241 + } 242 + 243 + static const s8 *bpf_get_reg64_ref(const s8 *reg, const s8 *tmp, 244 + bool must_load, struct hppa_jit_context *ctx) 245 + { 246 + if (!OPTIMIZE_HPPA) 247 + return bpf_get_reg64(reg, tmp, ctx); 248 + 249 + if (is_stacked(hi(reg))) { 250 + if (must_load) 251 + emit(hppa_ldw(REG_SIZE * hi(reg), HPPA_REG_SP, hi(tmp)), ctx); 252 + reg = tmp; 253 + } 254 + REG_SET_SEEN(ctx, hi(reg)); 255 + REG_SET_SEEN(ctx, lo(reg)); 256 + return reg; 257 + } 258 + 259 + 260 + static void bpf_put_reg64(const s8 *reg, const s8 *src, 261 + struct hppa_jit_context *ctx) 262 + { 263 + if (is_stacked(hi(reg))) { 264 + emit(hppa_stw(hi(src), REG_SIZE * hi(reg), HPPA_REG_SP), ctx); 265 + emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx); 266 + } 267 + } 268 + 269 + static void bpf_save_R0(struct hppa_jit_context *ctx) 270 + { 271 + bpf_put_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx); 272 + } 273 + 274 + static void bpf_restore_R0(struct hppa_jit_context *ctx) 275 + { 276 + bpf_get_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx); 277 + } 278 + 279 + 280 + static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp, 281 + struct hppa_jit_context *ctx) 282 + { 283 + if (is_stacked(lo(reg))) { 284 + emit(hppa_ldw(REG_SIZE * lo(reg), HPPA_REG_SP, lo(tmp)), ctx); 285 + reg = tmp; 286 + } 287 + REG_SET_SEEN(ctx, lo(reg)); 288 + return reg; 289 + } 290 + 291 + static const s8 *bpf_get_reg32_ref(const s8 *reg, const s8 *tmp, 292 + struct hppa_jit_context *ctx) 293 + { 294 + if (!OPTIMIZE_HPPA) 295 + return bpf_get_reg32(reg, tmp, ctx); 296 + 297 + if (is_stacked(hi(reg))) { 298 + reg = tmp; 299 + } 300 + REG_SET_SEEN(ctx, lo(reg)); 301 + return reg; 302 + } 303 + 304 + static void bpf_put_reg32(const s8 *reg, const s8 *src, 305 + struct hppa_jit_context *ctx) 306 + { 307 + if (is_stacked(lo(reg))) { 308 + REG_SET_SEEN(ctx, lo(src)); 309 + emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx); 310 + if (1 && !ctx->prog->aux->verifier_zext) { 311 + REG_SET_SEEN(ctx, hi(reg)); 312 + emit(hppa_stw(HPPA_REG_ZERO, REG_SIZE * hi(reg), HPPA_REG_SP), ctx); 313 + } 314 + } else if (1 && !ctx->prog->aux->verifier_zext) { 315 + REG_SET_SEEN(ctx, hi(reg)); 316 + emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx); 317 + } 318 + } 319 + 320 + /* extern hppa millicode functions */ 321 + extern void $$mulI(void); 322 + extern void $$divU(void); 323 + extern void $$remU(void); 324 + 325 + static void emit_call_millicode(void *func, const s8 arg0, 326 + const s8 arg1, u8 opcode, struct hppa_jit_context *ctx) 327 + { 328 + u32 func_addr; 329 + 330 + emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx); 331 + emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx); 332 + 333 + /* libcgcc overwrites HPPA_REG_RET0/1, save temp. in dest. */ 334 + if (arg0 != HPPA_REG_RET1) 335 + bpf_save_R0(ctx); 336 + 337 + func_addr = (uintptr_t) dereference_function_descriptor(func); 338 + emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx); 339 + /* skip the following be_l instruction if divisor is zero. */ 340 + if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) { 341 + if (BPF_OP(opcode) == BPF_DIV) 342 + emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx); 343 + else 344 + emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx); 345 + emit(hppa_or_cond(HPPA_REG_ARG1, HPPA_REG_ZERO, 1, 0, HPPA_REG_ZERO), ctx); 346 + } 347 + /* Note: millicode functions use r31 as return pointer instead of rp */ 348 + emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); 349 + emit(hppa_nop(), ctx); /* this nop is needed here for delay slot */ 350 + 351 + /* Note: millicode functions return result in RET1, not RET0 */ 352 + emit_hppa_copy(HPPA_REG_RET1, arg0, ctx); 353 + 354 + /* restore HPPA_REG_RET0/1, temp. save in dest. */ 355 + if (arg0 != HPPA_REG_RET1) 356 + bpf_restore_R0(ctx); 357 + } 358 + 359 + static void emit_call_libgcc_ll(void *func, const s8 *arg0, 360 + const s8 *arg1, u8 opcode, struct hppa_jit_context *ctx) 361 + { 362 + u32 func_addr; 363 + 364 + emit_hppa_copy(lo(arg0), HPPA_REG_ARG0, ctx); 365 + emit_hppa_copy(hi(arg0), HPPA_REG_ARG1, ctx); 366 + emit_hppa_copy(lo(arg1), HPPA_REG_ARG2, ctx); 367 + emit_hppa_copy(hi(arg1), HPPA_REG_ARG3, ctx); 368 + 369 + /* libcgcc overwrites HPPA_REG_RET0/_RET1, so keep copy of R0 on stack */ 370 + if (hi(arg0) != HPPA_REG_RET0) 371 + bpf_save_R0(ctx); 372 + 373 + /* prepare stack */ 374 + emit(hppa_ldo(2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx); 375 + 376 + func_addr = (uintptr_t) dereference_function_descriptor(func); 377 + emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx); 378 + /* zero out the following be_l instruction if divisor is 0 (and set default values) */ 379 + if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) { 380 + emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx); 381 + if (BPF_OP(opcode) == BPF_DIV) 382 + emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx); 383 + else 384 + emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx); 385 + emit(hppa_or_cond(HPPA_REG_ARG2, HPPA_REG_ARG3, 1, 0, HPPA_REG_ZERO), ctx); 386 + } 387 + emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx); 388 + emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx); 389 + 390 + /* restore stack */ 391 + emit(hppa_ldo(-2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx); 392 + 393 + emit_hppa_copy(HPPA_REG_RET0, hi(arg0), ctx); 394 + emit_hppa_copy(HPPA_REG_RET1, lo(arg0), ctx); 395 + 396 + /* restore HPPA_REG_RET0/_RET1 */ 397 + if (hi(arg0) != HPPA_REG_RET0) 398 + bpf_restore_R0(ctx); 399 + } 400 + 401 + static void emit_jump(s32 paoff, bool force_far, 402 + struct hppa_jit_context *ctx) 403 + { 404 + unsigned long pc, addr; 405 + 406 + /* Note: allocate 2 instructions for jumps if force_far is set. */ 407 + if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 17)) { 408 + /* use BL,short branch followed by nop() */ 409 + emit(hppa_bl(paoff - HPPA_BRANCH_DISPLACEMENT, HPPA_REG_ZERO), ctx); 410 + if (force_far) 411 + emit(hppa_nop(), ctx); 412 + return; 413 + } 414 + 415 + pc = (uintptr_t) &ctx->insns[ctx->ninsns]; 416 + addr = pc + (paoff * HPPA_INSN_SIZE); 417 + emit(hppa_ldil(addr, HPPA_REG_R31), ctx); 418 + emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); // be,l,n addr(sr4,r31), %sr0, %r31 419 + } 420 + 421 + static void emit_alu_i64(const s8 *dst, s32 imm, 422 + struct hppa_jit_context *ctx, const u8 op) 423 + { 424 + const s8 *tmp1 = regmap[TMP_REG_1]; 425 + const s8 *rd; 426 + 427 + if (0 && op == BPF_MOV) 428 + rd = bpf_get_reg64_ref(dst, tmp1, false, ctx); 429 + else 430 + rd = bpf_get_reg64(dst, tmp1, ctx); 431 + 432 + /* dst = dst OP imm */ 433 + switch (op) { 434 + case BPF_MOV: 435 + emit_imm32(rd, imm, ctx); 436 + break; 437 + case BPF_AND: 438 + emit_imm(HPPA_REG_T0, imm, ctx); 439 + emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx); 440 + if (imm >= 0) 441 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 442 + break; 443 + case BPF_OR: 444 + emit_imm(HPPA_REG_T0, imm, ctx); 445 + emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx); 446 + if (imm < 0) 447 + emit_imm(hi(rd), -1, ctx); 448 + break; 449 + case BPF_XOR: 450 + emit_imm(HPPA_REG_T0, imm, ctx); 451 + emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx); 452 + if (imm < 0) { 453 + emit_imm(HPPA_REG_T0, -1, ctx); 454 + emit_hppa_xor(hi(rd), HPPA_REG_T0, hi(rd), ctx); 455 + } 456 + break; 457 + case BPF_LSH: 458 + if (imm == 0) 459 + break; 460 + if (imm > 32) { 461 + imm -= 32; 462 + emit(hppa_zdep(lo(rd), imm, imm, hi(rd)), ctx); 463 + emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx); 464 + } else if (imm == 32) { 465 + emit_hppa_copy(lo(rd), hi(rd), ctx); 466 + emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx); 467 + } else { 468 + emit(hppa_shd(hi(rd), lo(rd), 32 - imm, hi(rd)), ctx); 469 + emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx); 470 + } 471 + break; 472 + case BPF_RSH: 473 + if (imm == 0) 474 + break; 475 + if (imm > 32) { 476 + imm -= 32; 477 + emit(hppa_shr(hi(rd), imm, lo(rd)), ctx); 478 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 479 + } else if (imm == 32) { 480 + emit_hppa_copy(hi(rd), lo(rd), ctx); 481 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 482 + } else { 483 + emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx); 484 + emit(hppa_shr(hi(rd), imm, hi(rd)), ctx); 485 + } 486 + break; 487 + case BPF_ARSH: 488 + if (imm == 0) 489 + break; 490 + if (imm > 32) { 491 + imm -= 32; 492 + emit(hppa_extrws(hi(rd), 31 - imm, imm, lo(rd)), ctx); 493 + emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx); 494 + } else if (imm == 32) { 495 + emit_hppa_copy(hi(rd), lo(rd), ctx); 496 + emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx); 497 + } else { 498 + emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx); 499 + emit(hppa_extrws(hi(rd), 31 - imm, imm, hi(rd)), ctx); 500 + } 501 + break; 502 + default: 503 + WARN_ON(1); 504 + } 505 + 506 + bpf_put_reg64(dst, rd, ctx); 507 + } 508 + 509 + static void emit_alu_i32(const s8 *dst, s32 imm, 510 + struct hppa_jit_context *ctx, const u8 op) 511 + { 512 + const s8 *tmp1 = regmap[TMP_REG_1]; 513 + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); 514 + 515 + if (op == BPF_MOV) 516 + rd = bpf_get_reg32_ref(dst, tmp1, ctx); 517 + else 518 + rd = bpf_get_reg32(dst, tmp1, ctx); 519 + 520 + /* dst = dst OP imm */ 521 + switch (op) { 522 + case BPF_MOV: 523 + emit_imm(lo(rd), imm, ctx); 524 + break; 525 + case BPF_ADD: 526 + emit_imm(HPPA_REG_T0, imm, ctx); 527 + emit(hppa_add(lo(rd), HPPA_REG_T0, lo(rd)), ctx); 528 + break; 529 + case BPF_SUB: 530 + emit_imm(HPPA_REG_T0, imm, ctx); 531 + emit(hppa_sub(lo(rd), HPPA_REG_T0, lo(rd)), ctx); 532 + break; 533 + case BPF_AND: 534 + emit_imm(HPPA_REG_T0, imm, ctx); 535 + emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx); 536 + break; 537 + case BPF_OR: 538 + emit_imm(HPPA_REG_T0, imm, ctx); 539 + emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx); 540 + break; 541 + case BPF_XOR: 542 + emit_imm(HPPA_REG_T0, imm, ctx); 543 + emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx); 544 + break; 545 + case BPF_LSH: 546 + if (imm != 0) 547 + emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx); 548 + break; 549 + case BPF_RSH: 550 + if (imm != 0) 551 + emit(hppa_shr(lo(rd), imm, lo(rd)), ctx); 552 + break; 553 + case BPF_ARSH: 554 + if (imm != 0) 555 + emit(hppa_extrws(lo(rd), 31 - imm, imm, lo(rd)), ctx); 556 + break; 557 + default: 558 + WARN_ON(1); 559 + } 560 + 561 + bpf_put_reg32(dst, rd, ctx); 562 + } 563 + 564 + static void emit_alu_r64(const s8 *dst, const s8 *src, 565 + struct hppa_jit_context *ctx, const u8 op) 566 + { 567 + const s8 *tmp1 = regmap[TMP_REG_1]; 568 + const s8 *tmp2 = regmap[TMP_REG_2]; 569 + const s8 *rd; 570 + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); 571 + 572 + if (op == BPF_MOV) 573 + rd = bpf_get_reg64_ref(dst, tmp1, false, ctx); 574 + else 575 + rd = bpf_get_reg64(dst, tmp1, ctx); 576 + 577 + /* dst = dst OP src */ 578 + switch (op) { 579 + case BPF_MOV: 580 + emit_hppa_copy(lo(rs), lo(rd), ctx); 581 + emit_hppa_copy(hi(rs), hi(rd), ctx); 582 + break; 583 + case BPF_ADD: 584 + emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx); 585 + emit(hppa_addc(hi(rd), hi(rs), hi(rd)), ctx); 586 + break; 587 + case BPF_SUB: 588 + emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx); 589 + emit(hppa_subb(hi(rd), hi(rs), hi(rd)), ctx); 590 + break; 591 + case BPF_AND: 592 + emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx); 593 + emit(hppa_and(hi(rd), hi(rs), hi(rd)), ctx); 594 + break; 595 + case BPF_OR: 596 + emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx); 597 + emit(hppa_or(hi(rd), hi(rs), hi(rd)), ctx); 598 + break; 599 + case BPF_XOR: 600 + emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx); 601 + emit_hppa_xor(hi(rd), hi(rs), hi(rd), ctx); 602 + break; 603 + case BPF_MUL: 604 + emit_call_libgcc_ll(__muldi3, rd, rs, op, ctx); 605 + break; 606 + case BPF_DIV: 607 + emit_call_libgcc_ll(&hppa_div64, rd, rs, op, ctx); 608 + break; 609 + case BPF_MOD: 610 + emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, op, ctx); 611 + break; 612 + case BPF_LSH: 613 + emit_call_libgcc_ll(__ashldi3, rd, rs, op, ctx); 614 + break; 615 + case BPF_RSH: 616 + emit_call_libgcc_ll(__lshrdi3, rd, rs, op, ctx); 617 + break; 618 + case BPF_ARSH: 619 + emit_call_libgcc_ll(__ashrdi3, rd, rs, op, ctx); 620 + break; 621 + case BPF_NEG: 622 + emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx); 623 + emit(hppa_subb(HPPA_REG_ZERO, hi(rd), hi(rd)), ctx); 624 + break; 625 + default: 626 + WARN_ON(1); 627 + } 628 + 629 + bpf_put_reg64(dst, rd, ctx); 630 + } 631 + 632 + static void emit_alu_r32(const s8 *dst, const s8 *src, 633 + struct hppa_jit_context *ctx, const u8 op) 634 + { 635 + const s8 *tmp1 = regmap[TMP_REG_1]; 636 + const s8 *tmp2 = regmap[TMP_REG_2]; 637 + const s8 *rd; 638 + const s8 *rs = bpf_get_reg32(src, tmp2, ctx); 639 + 640 + if (op == BPF_MOV) 641 + rd = bpf_get_reg32_ref(dst, tmp1, ctx); 642 + else 643 + rd = bpf_get_reg32(dst, tmp1, ctx); 644 + 645 + /* dst = dst OP src */ 646 + switch (op) { 647 + case BPF_MOV: 648 + emit_hppa_copy(lo(rs), lo(rd), ctx); 649 + break; 650 + case BPF_ADD: 651 + emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx); 652 + break; 653 + case BPF_SUB: 654 + emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx); 655 + break; 656 + case BPF_AND: 657 + emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx); 658 + break; 659 + case BPF_OR: 660 + emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx); 661 + break; 662 + case BPF_XOR: 663 + emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx); 664 + break; 665 + case BPF_MUL: 666 + emit_call_millicode($$mulI, lo(rd), lo(rs), op, ctx); 667 + break; 668 + case BPF_DIV: 669 + emit_call_millicode($$divU, lo(rd), lo(rs), op, ctx); 670 + break; 671 + case BPF_MOD: 672 + emit_call_millicode($$remU, lo(rd), lo(rs), op, ctx); 673 + break; 674 + case BPF_LSH: 675 + emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx); 676 + emit(hppa_mtsar(HPPA_REG_T0), ctx); 677 + emit(hppa_depwz_sar(lo(rd), lo(rd)), ctx); 678 + break; 679 + case BPF_RSH: 680 + emit(hppa_mtsar(lo(rs)), ctx); 681 + emit(hppa_shrpw_sar(lo(rd), lo(rd)), ctx); 682 + break; 683 + case BPF_ARSH: /* sign extending arithmetic shift right */ 684 + // emit(hppa_beq(lo(rs), HPPA_REG_ZERO, 2), ctx); 685 + emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx); 686 + emit(hppa_mtsar(HPPA_REG_T0), ctx); 687 + emit(hppa_extrws_sar(lo(rd), lo(rd)), ctx); 688 + break; 689 + case BPF_NEG: 690 + emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx); // sub r0,rd,rd 691 + break; 692 + default: 693 + WARN_ON(1); 694 + } 695 + 696 + bpf_put_reg32(dst, rd, ctx); 697 + } 698 + 699 + static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 paoff, 700 + struct hppa_jit_context *ctx, const u8 op) 701 + { 702 + int e, s = ctx->ninsns; 703 + const s8 *tmp1 = regmap[TMP_REG_1]; 704 + const s8 *tmp2 = regmap[TMP_REG_2]; 705 + 706 + const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx); 707 + const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx); 708 + 709 + /* 710 + * NO_JUMP skips over the rest of the instructions and the 711 + * emit_jump, meaning the BPF branch is not taken. 712 + * JUMP skips directly to the emit_jump, meaning 713 + * the BPF branch is taken. 714 + * 715 + * The fallthrough case results in the BPF branch being taken. 716 + */ 717 + #define NO_JUMP(idx) (2 + (idx) - 1) 718 + #define JUMP(idx) (0 + (idx) - 1) 719 + 720 + switch (op) { 721 + case BPF_JEQ: 722 + emit(hppa_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 723 + emit(hppa_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 724 + break; 725 + case BPF_JGT: 726 + emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); 727 + emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 728 + emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 729 + break; 730 + case BPF_JLT: 731 + emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); 732 + emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 733 + emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 734 + break; 735 + case BPF_JGE: 736 + emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); 737 + emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 738 + emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 739 + break; 740 + case BPF_JLE: 741 + emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); 742 + emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 743 + emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 744 + break; 745 + case BPF_JNE: 746 + emit(hppa_bne(hi(rs1), hi(rs2), JUMP(1)), ctx); 747 + emit(hppa_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 748 + break; 749 + case BPF_JSGT: 750 + emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); 751 + emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 752 + emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 753 + break; 754 + case BPF_JSLT: 755 + emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); 756 + emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 757 + emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 758 + break; 759 + case BPF_JSGE: 760 + emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); 761 + emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 762 + emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 763 + break; 764 + case BPF_JSLE: 765 + emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); 766 + emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 767 + emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 768 + break; 769 + case BPF_JSET: 770 + emit(hppa_and(hi(rs1), hi(rs2), HPPA_REG_T0), ctx); 771 + emit(hppa_and(lo(rs1), lo(rs2), HPPA_REG_T1), ctx); 772 + emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, JUMP(1)), ctx); 773 + emit(hppa_beq(HPPA_REG_T1, HPPA_REG_ZERO, NO_JUMP(0)), ctx); 774 + break; 775 + default: 776 + WARN_ON(1); 777 + } 778 + 779 + #undef NO_JUMP 780 + #undef JUMP 781 + 782 + e = ctx->ninsns; 783 + /* Adjust for extra insns. */ 784 + paoff -= (e - s); 785 + emit_jump(paoff, true, ctx); 786 + return 0; 787 + } 788 + 789 + static int emit_bcc(u8 op, u8 rd, u8 rs, int paoff, struct hppa_jit_context *ctx) 790 + { 791 + int e, s; 792 + bool far = false; 793 + int off; 794 + 795 + if (op == BPF_JSET) { 796 + /* 797 + * BPF_JSET is a special case: it has no inverse so we always 798 + * treat it as a far branch. 799 + */ 800 + emit(hppa_and(rd, rs, HPPA_REG_T0), ctx); 801 + paoff -= 1; /* reduce offset due to hppa_and() above */ 802 + rd = HPPA_REG_T0; 803 + rs = HPPA_REG_ZERO; 804 + op = BPF_JNE; 805 + } 806 + 807 + s = ctx->ninsns; 808 + 809 + if (!relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 12)) { 810 + op = invert_bpf_cond(op); 811 + far = true; 812 + } 813 + 814 + /* 815 + * For a far branch, the condition is negated and we jump over the 816 + * branch itself, and the three instructions from emit_jump. 817 + * For a near branch, just use paoff. 818 + */ 819 + off = far ? (HPPA_BRANCH_DISPLACEMENT - 1) : paoff - HPPA_BRANCH_DISPLACEMENT; 820 + 821 + switch (op) { 822 + /* IF (dst COND src) JUMP off */ 823 + case BPF_JEQ: 824 + emit(hppa_beq(rd, rs, off), ctx); 825 + break; 826 + case BPF_JGT: 827 + emit(hppa_bgtu(rd, rs, off), ctx); 828 + break; 829 + case BPF_JLT: 830 + emit(hppa_bltu(rd, rs, off), ctx); 831 + break; 832 + case BPF_JGE: 833 + emit(hppa_bgeu(rd, rs, off), ctx); 834 + break; 835 + case BPF_JLE: 836 + emit(hppa_bleu(rd, rs, off), ctx); 837 + break; 838 + case BPF_JNE: 839 + emit(hppa_bne(rd, rs, off), ctx); 840 + break; 841 + case BPF_JSGT: 842 + emit(hppa_bgt(rd, rs, off), ctx); 843 + break; 844 + case BPF_JSLT: 845 + emit(hppa_blt(rd, rs, off), ctx); 846 + break; 847 + case BPF_JSGE: 848 + emit(hppa_bge(rd, rs, off), ctx); 849 + break; 850 + case BPF_JSLE: 851 + emit(hppa_ble(rd, rs, off), ctx); 852 + break; 853 + default: 854 + WARN_ON(1); 855 + } 856 + 857 + if (far) { 858 + e = ctx->ninsns; 859 + /* Adjust for extra insns. */ 860 + paoff -= (e - s); 861 + emit_jump(paoff, true, ctx); 862 + } 863 + return 0; 864 + } 865 + 866 + static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 paoff, 867 + struct hppa_jit_context *ctx, const u8 op) 868 + { 869 + int e, s = ctx->ninsns; 870 + const s8 *tmp1 = regmap[TMP_REG_1]; 871 + const s8 *tmp2 = regmap[TMP_REG_2]; 872 + 873 + const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx); 874 + const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx); 875 + 876 + e = ctx->ninsns; 877 + /* Adjust for extra insns. */ 878 + paoff -= (e - s); 879 + 880 + if (emit_bcc(op, lo(rs1), lo(rs2), paoff, ctx)) 881 + return -1; 882 + 883 + return 0; 884 + } 885 + 886 + static void emit_call(bool fixed, u64 addr, struct hppa_jit_context *ctx) 887 + { 888 + const s8 *tmp = regmap[TMP_REG_1]; 889 + const s8 *r0 = regmap[BPF_REG_0]; 890 + const s8 *reg; 891 + const int offset_sp = 2 * STACK_ALIGN; 892 + 893 + /* prepare stack */ 894 + emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx); 895 + 896 + /* load R1 & R2 in registers, R3-R5 to stack. */ 897 + reg = bpf_get_reg64_offset(regmap[BPF_REG_5], tmp, offset_sp, ctx); 898 + emit(hppa_stw(hi(reg), -0x48, HPPA_REG_SP), ctx); 899 + emit(hppa_stw(lo(reg), -0x44, HPPA_REG_SP), ctx); 900 + 901 + reg = bpf_get_reg64_offset(regmap[BPF_REG_4], tmp, offset_sp, ctx); 902 + emit(hppa_stw(hi(reg), -0x40, HPPA_REG_SP), ctx); 903 + emit(hppa_stw(lo(reg), -0x3c, HPPA_REG_SP), ctx); 904 + 905 + reg = bpf_get_reg64_offset(regmap[BPF_REG_3], tmp, offset_sp, ctx); 906 + emit(hppa_stw(hi(reg), -0x38, HPPA_REG_SP), ctx); 907 + emit(hppa_stw(lo(reg), -0x34, HPPA_REG_SP), ctx); 908 + 909 + reg = bpf_get_reg64_offset(regmap[BPF_REG_2], tmp, offset_sp, ctx); 910 + emit_hppa_copy(hi(reg), HPPA_REG_ARG3, ctx); 911 + emit_hppa_copy(lo(reg), HPPA_REG_ARG2, ctx); 912 + 913 + reg = bpf_get_reg64_offset(regmap[BPF_REG_1], tmp, offset_sp, ctx); 914 + emit_hppa_copy(hi(reg), HPPA_REG_ARG1, ctx); 915 + emit_hppa_copy(lo(reg), HPPA_REG_ARG0, ctx); 916 + 917 + /* backup TCC */ 918 + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) 919 + emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx); 920 + 921 + /* 922 + * Use ldil() to load absolute address. Don't use emit_imm as the 923 + * number of emitted instructions should not depend on the value of 924 + * addr. 925 + */ 926 + emit(hppa_ldil(addr, HPPA_REG_R31), ctx); 927 + emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx); 928 + /* set return address in delay slot */ 929 + emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx); 930 + 931 + /* restore TCC */ 932 + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) 933 + emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx); 934 + 935 + /* restore stack */ 936 + emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx); 937 + 938 + /* set return value. */ 939 + emit_hppa_copy(HPPA_REG_RET0, hi(r0), ctx); 940 + emit_hppa_copy(HPPA_REG_RET1, lo(r0), ctx); 941 + } 942 + 943 + static int emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx) 944 + { 945 + /* 946 + * R1 -> &ctx 947 + * R2 -> &array 948 + * R3 -> index 949 + */ 950 + int off; 951 + const s8 *arr_reg = regmap[BPF_REG_2]; 952 + const s8 *idx_reg = regmap[BPF_REG_3]; 953 + struct bpf_array bpfa; 954 + struct bpf_prog bpfp; 955 + 956 + /* get address of TCC main exit function for error case into rp */ 957 + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); 958 + 959 + /* max_entries = array->map.max_entries; */ 960 + off = offsetof(struct bpf_array, map.max_entries); 961 + BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4); 962 + emit(hppa_ldw(off, lo(arr_reg), HPPA_REG_T1), ctx); 963 + 964 + /* 965 + * if (index >= max_entries) 966 + * goto out; 967 + */ 968 + emit(hppa_bltu(lo(idx_reg), HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); 969 + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); 970 + 971 + /* 972 + * if (--tcc < 0) 973 + * goto out; 974 + */ 975 + REG_FORCE_SEEN(ctx, HPPA_REG_TCC); 976 + emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx); 977 + emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); 978 + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); 979 + 980 + /* 981 + * prog = array->ptrs[index]; 982 + * if (!prog) 983 + * goto out; 984 + */ 985 + BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 4); 986 + emit(hppa_sh2add(lo(idx_reg), lo(arr_reg), HPPA_REG_T0), ctx); 987 + off = offsetof(struct bpf_array, ptrs); 988 + BUILD_BUG_ON(!relative_bits_ok(off, 11)); 989 + emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx); 990 + emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); 991 + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); 992 + 993 + /* 994 + * tcc = temp_tcc; 995 + * goto *(prog->bpf_func + 4); 996 + */ 997 + off = offsetof(struct bpf_prog, bpf_func); 998 + BUILD_BUG_ON(!relative_bits_ok(off, 11)); 999 + BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 4); 1000 + emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx); 1001 + /* Epilogue jumps to *(t0 + 4). */ 1002 + __build_epilogue(true, ctx); 1003 + return 0; 1004 + } 1005 + 1006 + static int emit_load_r64(const s8 *dst, const s8 *src, s16 off, 1007 + struct hppa_jit_context *ctx, const u8 size) 1008 + { 1009 + const s8 *tmp1 = regmap[TMP_REG_1]; 1010 + const s8 *tmp2 = regmap[TMP_REG_2]; 1011 + const s8 *rd = bpf_get_reg64_ref(dst, tmp1, ctx->prog->aux->verifier_zext, ctx); 1012 + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); 1013 + s8 srcreg; 1014 + 1015 + /* need to calculate address since offset does not fit in 14 bits? */ 1016 + if (relative_bits_ok(off, 14)) 1017 + srcreg = lo(rs); 1018 + else { 1019 + /* need to use R1 here, since addil puts result into R1 */ 1020 + srcreg = HPPA_REG_R1; 1021 + emit(hppa_addil(off, lo(rs)), ctx); 1022 + off = im11(off); 1023 + } 1024 + 1025 + /* LDX: dst = *(size *)(src + off) */ 1026 + switch (size) { 1027 + case BPF_B: 1028 + emit(hppa_ldb(off + 0, srcreg, lo(rd)), ctx); 1029 + if (!ctx->prog->aux->verifier_zext) 1030 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 1031 + break; 1032 + case BPF_H: 1033 + emit(hppa_ldh(off + 0, srcreg, lo(rd)), ctx); 1034 + if (!ctx->prog->aux->verifier_zext) 1035 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 1036 + break; 1037 + case BPF_W: 1038 + emit(hppa_ldw(off + 0, srcreg, lo(rd)), ctx); 1039 + if (!ctx->prog->aux->verifier_zext) 1040 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 1041 + break; 1042 + case BPF_DW: 1043 + emit(hppa_ldw(off + 0, srcreg, hi(rd)), ctx); 1044 + emit(hppa_ldw(off + 4, srcreg, lo(rd)), ctx); 1045 + break; 1046 + } 1047 + 1048 + bpf_put_reg64(dst, rd, ctx); 1049 + return 0; 1050 + } 1051 + 1052 + static int emit_store_r64(const s8 *dst, const s8 *src, s16 off, 1053 + struct hppa_jit_context *ctx, const u8 size, 1054 + const u8 mode) 1055 + { 1056 + const s8 *tmp1 = regmap[TMP_REG_1]; 1057 + const s8 *tmp2 = regmap[TMP_REG_2]; 1058 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 1059 + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); 1060 + s8 dstreg; 1061 + 1062 + /* need to calculate address since offset does not fit in 14 bits? */ 1063 + if (relative_bits_ok(off, 14)) 1064 + dstreg = lo(rd); 1065 + else { 1066 + /* need to use R1 here, since addil puts result into R1 */ 1067 + dstreg = HPPA_REG_R1; 1068 + emit(hppa_addil(off, lo(rd)), ctx); 1069 + off = im11(off); 1070 + } 1071 + 1072 + /* ST: *(size *)(dst + off) = imm */ 1073 + switch (size) { 1074 + case BPF_B: 1075 + emit(hppa_stb(lo(rs), off + 0, dstreg), ctx); 1076 + break; 1077 + case BPF_H: 1078 + emit(hppa_sth(lo(rs), off + 0, dstreg), ctx); 1079 + break; 1080 + case BPF_W: 1081 + emit(hppa_stw(lo(rs), off + 0, dstreg), ctx); 1082 + break; 1083 + case BPF_DW: 1084 + emit(hppa_stw(hi(rs), off + 0, dstreg), ctx); 1085 + emit(hppa_stw(lo(rs), off + 4, dstreg), ctx); 1086 + break; 1087 + } 1088 + 1089 + return 0; 1090 + } 1091 + 1092 + static void emit_rev16(const s8 rd, struct hppa_jit_context *ctx) 1093 + { 1094 + emit(hppa_extru(rd, 23, 8, HPPA_REG_T1), ctx); 1095 + emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx); 1096 + emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx); 1097 + } 1098 + 1099 + static void emit_rev32(const s8 rs, const s8 rd, struct hppa_jit_context *ctx) 1100 + { 1101 + emit(hppa_shrpw(rs, rs, 16, HPPA_REG_T1), ctx); 1102 + emit(hppa_depwz(HPPA_REG_T1, 15, 8, HPPA_REG_T1), ctx); 1103 + emit(hppa_shrpw(rs, HPPA_REG_T1, 8, rd), ctx); 1104 + } 1105 + 1106 + static void emit_zext64(const s8 *dst, struct hppa_jit_context *ctx) 1107 + { 1108 + const s8 *rd; 1109 + const s8 *tmp1 = regmap[TMP_REG_1]; 1110 + 1111 + rd = bpf_get_reg64(dst, tmp1, ctx); 1112 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 1113 + bpf_put_reg64(dst, rd, ctx); 1114 + } 1115 + 1116 + int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx, 1117 + bool extra_pass) 1118 + { 1119 + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 1120 + BPF_CLASS(insn->code) == BPF_JMP; 1121 + int s, e, paoff, i = insn - ctx->prog->insnsi; 1122 + u8 code = insn->code; 1123 + s16 off = insn->off; 1124 + s32 imm = insn->imm; 1125 + 1126 + const s8 *dst = regmap[insn->dst_reg]; 1127 + const s8 *src = regmap[insn->src_reg]; 1128 + const s8 *tmp1 = regmap[TMP_REG_1]; 1129 + const s8 *tmp2 = regmap[TMP_REG_2]; 1130 + 1131 + if (0) printk("CLASS %03d CODE %#02x ALU64:%d BPF_SIZE %#02x " 1132 + "BPF_CODE %#02x src_reg %d dst_reg %d\n", 1133 + BPF_CLASS(code), code, (code & BPF_ALU64) ? 1:0, BPF_SIZE(code), 1134 + BPF_OP(code), insn->src_reg, insn->dst_reg); 1135 + 1136 + switch (code) { 1137 + /* dst = src */ 1138 + case BPF_ALU64 | BPF_MOV | BPF_X: 1139 + 1140 + case BPF_ALU64 | BPF_ADD | BPF_X: 1141 + case BPF_ALU64 | BPF_ADD | BPF_K: 1142 + 1143 + case BPF_ALU64 | BPF_SUB | BPF_X: 1144 + case BPF_ALU64 | BPF_SUB | BPF_K: 1145 + 1146 + case BPF_ALU64 | BPF_AND | BPF_X: 1147 + case BPF_ALU64 | BPF_OR | BPF_X: 1148 + case BPF_ALU64 | BPF_XOR | BPF_X: 1149 + 1150 + case BPF_ALU64 | BPF_MUL | BPF_X: 1151 + case BPF_ALU64 | BPF_MUL | BPF_K: 1152 + 1153 + case BPF_ALU64 | BPF_DIV | BPF_X: 1154 + case BPF_ALU64 | BPF_DIV | BPF_K: 1155 + 1156 + case BPF_ALU64 | BPF_MOD | BPF_X: 1157 + case BPF_ALU64 | BPF_MOD | BPF_K: 1158 + 1159 + case BPF_ALU64 | BPF_LSH | BPF_X: 1160 + case BPF_ALU64 | BPF_RSH | BPF_X: 1161 + case BPF_ALU64 | BPF_ARSH | BPF_X: 1162 + if (BPF_SRC(code) == BPF_K) { 1163 + emit_imm32(tmp2, imm, ctx); 1164 + src = tmp2; 1165 + } 1166 + emit_alu_r64(dst, src, ctx, BPF_OP(code)); 1167 + break; 1168 + 1169 + /* dst = -dst */ 1170 + case BPF_ALU64 | BPF_NEG: 1171 + emit_alu_r64(dst, tmp2, ctx, BPF_OP(code)); 1172 + break; 1173 + 1174 + case BPF_ALU64 | BPF_MOV | BPF_K: 1175 + case BPF_ALU64 | BPF_AND | BPF_K: 1176 + case BPF_ALU64 | BPF_OR | BPF_K: 1177 + case BPF_ALU64 | BPF_XOR | BPF_K: 1178 + case BPF_ALU64 | BPF_LSH | BPF_K: 1179 + case BPF_ALU64 | BPF_RSH | BPF_K: 1180 + case BPF_ALU64 | BPF_ARSH | BPF_K: 1181 + emit_alu_i64(dst, imm, ctx, BPF_OP(code)); 1182 + break; 1183 + 1184 + case BPF_ALU | BPF_MOV | BPF_X: 1185 + if (imm == 1) { 1186 + /* Special mov32 for zext. */ 1187 + emit_zext64(dst, ctx); 1188 + break; 1189 + } 1190 + fallthrough; 1191 + /* dst = dst OP src */ 1192 + case BPF_ALU | BPF_ADD | BPF_X: 1193 + case BPF_ALU | BPF_SUB | BPF_X: 1194 + case BPF_ALU | BPF_AND | BPF_X: 1195 + case BPF_ALU | BPF_OR | BPF_X: 1196 + case BPF_ALU | BPF_XOR | BPF_X: 1197 + 1198 + case BPF_ALU | BPF_MUL | BPF_X: 1199 + case BPF_ALU | BPF_MUL | BPF_K: 1200 + 1201 + case BPF_ALU | BPF_DIV | BPF_X: 1202 + case BPF_ALU | BPF_DIV | BPF_K: 1203 + 1204 + case BPF_ALU | BPF_MOD | BPF_X: 1205 + case BPF_ALU | BPF_MOD | BPF_K: 1206 + 1207 + case BPF_ALU | BPF_LSH | BPF_X: 1208 + case BPF_ALU | BPF_RSH | BPF_X: 1209 + case BPF_ALU | BPF_ARSH | BPF_X: 1210 + if (BPF_SRC(code) == BPF_K) { 1211 + emit_imm32(tmp2, imm, ctx); 1212 + src = tmp2; 1213 + } 1214 + emit_alu_r32(dst, src, ctx, BPF_OP(code)); 1215 + break; 1216 + 1217 + /* dst = dst OP imm */ 1218 + case BPF_ALU | BPF_MOV | BPF_K: 1219 + case BPF_ALU | BPF_ADD | BPF_K: 1220 + case BPF_ALU | BPF_SUB | BPF_K: 1221 + case BPF_ALU | BPF_AND | BPF_K: 1222 + case BPF_ALU | BPF_OR | BPF_K: 1223 + case BPF_ALU | BPF_XOR | BPF_K: 1224 + case BPF_ALU | BPF_LSH | BPF_K: 1225 + case BPF_ALU | BPF_RSH | BPF_K: 1226 + case BPF_ALU | BPF_ARSH | BPF_K: 1227 + /* 1228 + * mul,div,mod are handled in the BPF_X case. 1229 + */ 1230 + emit_alu_i32(dst, imm, ctx, BPF_OP(code)); 1231 + break; 1232 + 1233 + /* dst = -dst */ 1234 + case BPF_ALU | BPF_NEG: 1235 + /* 1236 + * src is ignored---choose tmp2 as a dummy register since it 1237 + * is not on the stack. 1238 + */ 1239 + emit_alu_r32(dst, tmp2, ctx, BPF_OP(code)); 1240 + break; 1241 + 1242 + /* dst = BSWAP##imm(dst) */ 1243 + case BPF_ALU | BPF_END | BPF_FROM_BE: 1244 + { 1245 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 1246 + 1247 + switch (imm) { 1248 + case 16: 1249 + /* zero-extend 16 bits into 64 bits */ 1250 + emit(hppa_extru(lo(rd), 31, 16, lo(rd)), ctx); 1251 + fallthrough; 1252 + case 32: 1253 + /* zero-extend 32 bits into 64 bits */ 1254 + if (!ctx->prog->aux->verifier_zext) 1255 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 1256 + break; 1257 + case 64: 1258 + /* Do nothing. */ 1259 + break; 1260 + default: 1261 + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); 1262 + return -1; 1263 + } 1264 + 1265 + bpf_put_reg64(dst, rd, ctx); 1266 + break; 1267 + } 1268 + 1269 + case BPF_ALU | BPF_END | BPF_FROM_LE: 1270 + { 1271 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 1272 + 1273 + switch (imm) { 1274 + case 16: 1275 + emit_rev16(lo(rd), ctx); 1276 + if (!ctx->prog->aux->verifier_zext) 1277 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 1278 + break; 1279 + case 32: 1280 + emit_rev32(lo(rd), lo(rd), ctx); 1281 + if (!ctx->prog->aux->verifier_zext) 1282 + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); 1283 + break; 1284 + case 64: 1285 + /* Swap upper and lower halves, then each half. */ 1286 + emit_hppa_copy(hi(rd), HPPA_REG_T0, ctx); 1287 + emit_rev32(lo(rd), hi(rd), ctx); 1288 + emit_rev32(HPPA_REG_T0, lo(rd), ctx); 1289 + break; 1290 + default: 1291 + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); 1292 + return -1; 1293 + } 1294 + 1295 + bpf_put_reg64(dst, rd, ctx); 1296 + break; 1297 + } 1298 + /* JUMP off */ 1299 + case BPF_JMP | BPF_JA: 1300 + paoff = hppa_offset(i, off, ctx); 1301 + emit_jump(paoff, false, ctx); 1302 + break; 1303 + /* function call */ 1304 + case BPF_JMP | BPF_CALL: 1305 + { 1306 + bool fixed; 1307 + int ret; 1308 + u64 addr; 1309 + 1310 + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, 1311 + &fixed); 1312 + if (ret < 0) 1313 + return ret; 1314 + emit_call(fixed, addr, ctx); 1315 + break; 1316 + } 1317 + /* tail call */ 1318 + case BPF_JMP | BPF_TAIL_CALL: 1319 + REG_SET_SEEN_ALL(ctx); 1320 + if (emit_bpf_tail_call(i, ctx)) 1321 + return -1; 1322 + break; 1323 + /* IF (dst COND imm) JUMP off */ 1324 + case BPF_JMP | BPF_JEQ | BPF_X: 1325 + case BPF_JMP | BPF_JEQ | BPF_K: 1326 + case BPF_JMP32 | BPF_JEQ | BPF_X: 1327 + case BPF_JMP32 | BPF_JEQ | BPF_K: 1328 + 1329 + case BPF_JMP | BPF_JNE | BPF_X: 1330 + case BPF_JMP | BPF_JNE | BPF_K: 1331 + case BPF_JMP32 | BPF_JNE | BPF_X: 1332 + case BPF_JMP32 | BPF_JNE | BPF_K: 1333 + 1334 + case BPF_JMP | BPF_JLE | BPF_X: 1335 + case BPF_JMP | BPF_JLE | BPF_K: 1336 + case BPF_JMP32 | BPF_JLE | BPF_X: 1337 + case BPF_JMP32 | BPF_JLE | BPF_K: 1338 + 1339 + case BPF_JMP | BPF_JLT | BPF_X: 1340 + case BPF_JMP | BPF_JLT | BPF_K: 1341 + case BPF_JMP32 | BPF_JLT | BPF_X: 1342 + case BPF_JMP32 | BPF_JLT | BPF_K: 1343 + 1344 + case BPF_JMP | BPF_JGE | BPF_X: 1345 + case BPF_JMP | BPF_JGE | BPF_K: 1346 + case BPF_JMP32 | BPF_JGE | BPF_X: 1347 + case BPF_JMP32 | BPF_JGE | BPF_K: 1348 + 1349 + case BPF_JMP | BPF_JGT | BPF_X: 1350 + case BPF_JMP | BPF_JGT | BPF_K: 1351 + case BPF_JMP32 | BPF_JGT | BPF_X: 1352 + case BPF_JMP32 | BPF_JGT | BPF_K: 1353 + 1354 + case BPF_JMP | BPF_JSLE | BPF_X: 1355 + case BPF_JMP | BPF_JSLE | BPF_K: 1356 + case BPF_JMP32 | BPF_JSLE | BPF_X: 1357 + case BPF_JMP32 | BPF_JSLE | BPF_K: 1358 + 1359 + case BPF_JMP | BPF_JSLT | BPF_X: 1360 + case BPF_JMP | BPF_JSLT | BPF_K: 1361 + case BPF_JMP32 | BPF_JSLT | BPF_X: 1362 + case BPF_JMP32 | BPF_JSLT | BPF_K: 1363 + 1364 + case BPF_JMP | BPF_JSGE | BPF_X: 1365 + case BPF_JMP | BPF_JSGE | BPF_K: 1366 + case BPF_JMP32 | BPF_JSGE | BPF_X: 1367 + case BPF_JMP32 | BPF_JSGE | BPF_K: 1368 + 1369 + case BPF_JMP | BPF_JSGT | BPF_X: 1370 + case BPF_JMP | BPF_JSGT | BPF_K: 1371 + case BPF_JMP32 | BPF_JSGT | BPF_X: 1372 + case BPF_JMP32 | BPF_JSGT | BPF_K: 1373 + 1374 + case BPF_JMP | BPF_JSET | BPF_X: 1375 + case BPF_JMP | BPF_JSET | BPF_K: 1376 + case BPF_JMP32 | BPF_JSET | BPF_X: 1377 + case BPF_JMP32 | BPF_JSET | BPF_K: 1378 + paoff = hppa_offset(i, off, ctx); 1379 + if (BPF_SRC(code) == BPF_K) { 1380 + s = ctx->ninsns; 1381 + emit_imm32(tmp2, imm, ctx); 1382 + src = tmp2; 1383 + e = ctx->ninsns; 1384 + paoff -= (e - s); 1385 + } 1386 + if (is64) 1387 + emit_branch_r64(dst, src, paoff, ctx, BPF_OP(code)); 1388 + else 1389 + emit_branch_r32(dst, src, paoff, ctx, BPF_OP(code)); 1390 + break; 1391 + /* function return */ 1392 + case BPF_JMP | BPF_EXIT: 1393 + if (i == ctx->prog->len - 1) 1394 + break; 1395 + /* load epilogue function pointer and jump to it. */ 1396 + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); 1397 + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); 1398 + break; 1399 + 1400 + /* dst = imm64 */ 1401 + case BPF_LD | BPF_IMM | BPF_DW: 1402 + { 1403 + struct bpf_insn insn1 = insn[1]; 1404 + u32 upper = insn1.imm; 1405 + u32 lower = imm; 1406 + const s8 *rd = bpf_get_reg64_ref(dst, tmp1, false, ctx); 1407 + 1408 + if (0 && bpf_pseudo_func(insn)) { 1409 + WARN_ON(upper); /* we are 32-bit! */ 1410 + upper = 0; 1411 + lower = (uintptr_t) dereference_function_descriptor(lower); 1412 + } 1413 + 1414 + emit_imm64(rd, upper, lower, ctx); 1415 + bpf_put_reg64(dst, rd, ctx); 1416 + return 1; 1417 + } 1418 + 1419 + /* LDX: dst = *(size *)(src + off) */ 1420 + case BPF_LDX | BPF_MEM | BPF_B: 1421 + case BPF_LDX | BPF_MEM | BPF_H: 1422 + case BPF_LDX | BPF_MEM | BPF_W: 1423 + case BPF_LDX | BPF_MEM | BPF_DW: 1424 + if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code))) 1425 + return -1; 1426 + break; 1427 + 1428 + /* speculation barrier */ 1429 + case BPF_ST | BPF_NOSPEC: 1430 + break; 1431 + 1432 + /* ST: *(size *)(dst + off) = imm */ 1433 + case BPF_ST | BPF_MEM | BPF_B: 1434 + case BPF_ST | BPF_MEM | BPF_H: 1435 + case BPF_ST | BPF_MEM | BPF_W: 1436 + case BPF_ST | BPF_MEM | BPF_DW: 1437 + 1438 + case BPF_STX | BPF_MEM | BPF_B: 1439 + case BPF_STX | BPF_MEM | BPF_H: 1440 + case BPF_STX | BPF_MEM | BPF_W: 1441 + case BPF_STX | BPF_MEM | BPF_DW: 1442 + if (BPF_CLASS(code) == BPF_ST) { 1443 + emit_imm32(tmp2, imm, ctx); 1444 + src = tmp2; 1445 + } 1446 + 1447 + if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code), 1448 + BPF_MODE(code))) 1449 + return -1; 1450 + break; 1451 + 1452 + case BPF_STX | BPF_ATOMIC | BPF_W: 1453 + case BPF_STX | BPF_ATOMIC | BPF_DW: 1454 + pr_info_once( 1455 + "bpf-jit: not supported: atomic operation %02x ***\n", 1456 + insn->imm); 1457 + return -EFAULT; 1458 + 1459 + default: 1460 + pr_err("bpf-jit: unknown opcode %02x\n", code); 1461 + return -EINVAL; 1462 + } 1463 + 1464 + return 0; 1465 + } 1466 + 1467 + void bpf_jit_build_prologue(struct hppa_jit_context *ctx) 1468 + { 1469 + const s8 *tmp = regmap[TMP_REG_1]; 1470 + const s8 *dst, *reg; 1471 + int stack_adjust = 0; 1472 + int i; 1473 + unsigned long addr; 1474 + int bpf_stack_adjust; 1475 + 1476 + /* 1477 + * stack on hppa grows up, so if tail calls are used we need to 1478 + * allocate the maximum stack size 1479 + */ 1480 + if (REG_ALL_SEEN(ctx)) 1481 + bpf_stack_adjust = MAX_BPF_STACK; 1482 + else 1483 + bpf_stack_adjust = ctx->prog->aux->stack_depth; 1484 + bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN); 1485 + 1486 + /* make space for callee-saved registers. */ 1487 + stack_adjust += NR_SAVED_REGISTERS * REG_SIZE; 1488 + /* make space for BPF registers on stack. */ 1489 + stack_adjust += BPF_JIT_SCRATCH_REGS * REG_SIZE; 1490 + /* make space for BPF stack. */ 1491 + stack_adjust += bpf_stack_adjust; 1492 + /* round up for stack alignment. */ 1493 + stack_adjust = round_up(stack_adjust, STACK_ALIGN); 1494 + 1495 + /* 1496 + * The first instruction sets the tail-call-counter (TCC) register. 1497 + * This instruction is skipped by tail calls. 1498 + * Use a temporary register instead of a caller-saved register initially. 1499 + */ 1500 + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx); 1501 + 1502 + /* 1503 + * skip all initializations when called as BPF TAIL call. 1504 + */ 1505 + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx); 1506 + emit(hppa_bne(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, ctx->prologue_len - 2 - HPPA_BRANCH_DISPLACEMENT), ctx); 1507 + 1508 + /* set up hppa stack frame. */ 1509 + emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx); // copy sp,r1 (=prev_sp) 1510 + emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx); // ldo stack_adjust(sp),sp (increase stack) 1511 + emit(hppa_stw(HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx); // stw prev_sp,-0x04(sp) 1512 + emit(hppa_stw(HPPA_REG_RP, -0x14, HPPA_REG_SP), ctx); // stw rp,-0x14(sp) 1513 + 1514 + REG_FORCE_SEEN(ctx, HPPA_REG_T0); 1515 + REG_FORCE_SEEN(ctx, HPPA_REG_T1); 1516 + REG_FORCE_SEEN(ctx, HPPA_REG_T2); 1517 + REG_FORCE_SEEN(ctx, HPPA_REG_T3); 1518 + REG_FORCE_SEEN(ctx, HPPA_REG_T4); 1519 + REG_FORCE_SEEN(ctx, HPPA_REG_T5); 1520 + 1521 + /* save callee-save registers. */ 1522 + for (i = 3; i <= 18; i++) { 1523 + if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i))) 1524 + continue; 1525 + emit(hppa_stw(HPPA_R(i), -REG_SIZE * (8 + (i-3)), HPPA_REG_SP), ctx); // stw ri,-save_area(sp) 1526 + } 1527 + 1528 + /* 1529 + * now really set the tail call counter (TCC) register. 1530 + */ 1531 + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) 1532 + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx); 1533 + 1534 + /* 1535 + * save epilogue function pointer for outer TCC call chain. 1536 + * The main TCC call stores the final RP on stack. 1537 + */ 1538 + addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset]; 1539 + /* skip first two instructions of exit function, which jump to exit */ 1540 + addr += 2 * HPPA_INSN_SIZE; 1541 + emit(hppa_ldil(addr, HPPA_REG_T2), ctx); 1542 + emit(hppa_ldo(im11(addr), HPPA_REG_T2, HPPA_REG_T2), ctx); 1543 + emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx); 1544 + 1545 + /* load R1 & R2 from registers, R3-R5 from stack. */ 1546 + /* use HPPA_REG_R1 which holds the old stack value */ 1547 + dst = regmap[BPF_REG_5]; 1548 + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); 1549 + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { 1550 + if (REG_WAS_SEEN(ctx, hi(reg))) 1551 + emit(hppa_ldw(-0x48, HPPA_REG_R1, hi(reg)), ctx); 1552 + if (REG_WAS_SEEN(ctx, lo(reg))) 1553 + emit(hppa_ldw(-0x44, HPPA_REG_R1, lo(reg)), ctx); 1554 + bpf_put_reg64(dst, tmp, ctx); 1555 + } 1556 + 1557 + dst = regmap[BPF_REG_4]; 1558 + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); 1559 + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { 1560 + if (REG_WAS_SEEN(ctx, hi(reg))) 1561 + emit(hppa_ldw(-0x40, HPPA_REG_R1, hi(reg)), ctx); 1562 + if (REG_WAS_SEEN(ctx, lo(reg))) 1563 + emit(hppa_ldw(-0x3c, HPPA_REG_R1, lo(reg)), ctx); 1564 + bpf_put_reg64(dst, tmp, ctx); 1565 + } 1566 + 1567 + dst = regmap[BPF_REG_3]; 1568 + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); 1569 + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { 1570 + if (REG_WAS_SEEN(ctx, hi(reg))) 1571 + emit(hppa_ldw(-0x38, HPPA_REG_R1, hi(reg)), ctx); 1572 + if (REG_WAS_SEEN(ctx, lo(reg))) 1573 + emit(hppa_ldw(-0x34, HPPA_REG_R1, lo(reg)), ctx); 1574 + bpf_put_reg64(dst, tmp, ctx); 1575 + } 1576 + 1577 + dst = regmap[BPF_REG_2]; 1578 + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); 1579 + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { 1580 + if (REG_WAS_SEEN(ctx, hi(reg))) 1581 + emit_hppa_copy(HPPA_REG_ARG3, hi(reg), ctx); 1582 + if (REG_WAS_SEEN(ctx, lo(reg))) 1583 + emit_hppa_copy(HPPA_REG_ARG2, lo(reg), ctx); 1584 + bpf_put_reg64(dst, tmp, ctx); 1585 + } 1586 + 1587 + dst = regmap[BPF_REG_1]; 1588 + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); 1589 + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { 1590 + if (REG_WAS_SEEN(ctx, hi(reg))) 1591 + emit_hppa_copy(HPPA_REG_ARG1, hi(reg), ctx); 1592 + if (REG_WAS_SEEN(ctx, lo(reg))) 1593 + emit_hppa_copy(HPPA_REG_ARG0, lo(reg), ctx); 1594 + bpf_put_reg64(dst, tmp, ctx); 1595 + } 1596 + 1597 + /* Set up BPF frame pointer. */ 1598 + dst = regmap[BPF_REG_FP]; 1599 + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); 1600 + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { 1601 + if (REG_WAS_SEEN(ctx, lo(reg))) 1602 + emit(hppa_ldo(-REG_SIZE * (NR_SAVED_REGISTERS + BPF_JIT_SCRATCH_REGS), 1603 + HPPA_REG_SP, lo(reg)), ctx); 1604 + if (REG_WAS_SEEN(ctx, hi(reg))) 1605 + emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx); 1606 + bpf_put_reg64(dst, tmp, ctx); 1607 + } 1608 + 1609 + emit(hppa_nop(), ctx); 1610 + } 1611 + 1612 + void bpf_jit_build_epilogue(struct hppa_jit_context *ctx) 1613 + { 1614 + __build_epilogue(false, ctx); 1615 + }