Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

riscv, bpf: Implement more atomic operations for RV64

This patch implement more BPF atomic operations for RV64. The newly
added operations are shown below:

atomic[64]_[fetch_]add
atomic[64]_[fetch_]and
atomic[64]_[fetch_]or
atomic[64]_xchg
atomic[64]_cmpxchg

Since riscv specification does not provide AMO instruction for CAS
operation, we use lr/sc instruction for cmpxchg operation, and AMO
instructions for the rest ops.

Tests "test_bpf.ko" and "test_progs -t atomic" have passed, as well
as "test_verifier" with no new failure cases.

Signed-off-by: Pu Lehui <pulehui@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Acked-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/bpf/20220410101246.232875-1-pulehui@huawei.com

authored by

Pu Lehui and committed by
Daniel Borkmann
dd642ccb 33fc250c

+153 -24
+67
arch/riscv/net/bpf_jit.h
··· 535 535 return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); 536 536 } 537 537 538 + static inline u32 rv_amoand_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 539 + { 540 + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 2, rd, 0x2f); 541 + } 542 + 543 + static inline u32 rv_amoor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 544 + { 545 + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 2, rd, 0x2f); 546 + } 547 + 548 + static inline u32 rv_amoxor_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 549 + { 550 + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 2, rd, 0x2f); 551 + } 552 + 553 + static inline u32 rv_amoswap_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 554 + { 555 + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 2, rd, 0x2f); 556 + } 557 + 558 + static inline u32 rv_lr_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 559 + { 560 + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 2, rd, 0x2f); 561 + } 562 + 563 + static inline u32 rv_sc_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 564 + { 565 + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 2, rd, 0x2f); 566 + } 567 + 568 + static inline u32 rv_fence(u8 pred, u8 succ) 569 + { 570 + u16 imm11_0 = pred << 4 | succ; 571 + 572 + return rv_i_insn(imm11_0, 0, 0, 0, 0xf); 573 + } 574 + 538 575 /* RVC instrutions. */ 539 576 540 577 static inline u16 rvc_addi4spn(u8 rd, u32 imm10) ··· 788 751 static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 789 752 { 790 753 return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); 754 + } 755 + 756 + static inline u32 rv_amoand_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 757 + { 758 + return rv_amo_insn(0xc, aq, rl, rs2, rs1, 3, rd, 0x2f); 759 + } 760 + 761 + static inline u32 rv_amoor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 762 + { 763 + return rv_amo_insn(0x8, aq, rl, rs2, rs1, 3, rd, 0x2f); 764 + } 765 + 766 + static inline u32 rv_amoxor_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 767 + { 768 + return rv_amo_insn(0x4, aq, rl, rs2, rs1, 3, rd, 0x2f); 769 + } 770 + 771 + static inline u32 rv_amoswap_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 772 + { 773 + return rv_amo_insn(0x1, aq, rl, rs2, rs1, 3, rd, 0x2f); 774 + } 775 + 776 + static inline u32 rv_lr_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 777 + { 778 + return rv_amo_insn(0x2, aq, rl, rs2, rs1, 3, rd, 0x2f); 779 + } 780 + 781 + static inline u32 rv_sc_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 782 + { 783 + return rv_amo_insn(0x3, aq, rl, rs2, rs1, 3, rd, 0x2f); 791 784 } 792 785 793 786 /* RV64-only RVC instructions. */
+86 -24
arch/riscv/net/bpf_jit_comp64.c
··· 455 455 return 0; 456 456 } 457 457 458 + static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, 459 + struct rv_jit_context *ctx) 460 + { 461 + u8 r0; 462 + int jmp_offset; 463 + 464 + if (off) { 465 + if (is_12b_int(off)) { 466 + emit_addi(RV_REG_T1, rd, off, ctx); 467 + } else { 468 + emit_imm(RV_REG_T1, off, ctx); 469 + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 470 + } 471 + rd = RV_REG_T1; 472 + } 473 + 474 + switch (imm) { 475 + /* lock *(u32/u64 *)(dst_reg + off16) <op>= src_reg */ 476 + case BPF_ADD: 477 + emit(is64 ? rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0) : 478 + rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 479 + break; 480 + case BPF_AND: 481 + emit(is64 ? rv_amoand_d(RV_REG_ZERO, rs, rd, 0, 0) : 482 + rv_amoand_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 483 + break; 484 + case BPF_OR: 485 + emit(is64 ? rv_amoor_d(RV_REG_ZERO, rs, rd, 0, 0) : 486 + rv_amoor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 487 + break; 488 + case BPF_XOR: 489 + emit(is64 ? rv_amoxor_d(RV_REG_ZERO, rs, rd, 0, 0) : 490 + rv_amoxor_w(RV_REG_ZERO, rs, rd, 0, 0), ctx); 491 + break; 492 + /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ 493 + case BPF_ADD | BPF_FETCH: 494 + emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : 495 + rv_amoadd_w(rs, rs, rd, 0, 0), ctx); 496 + if (!is64) 497 + emit_zext_32(rs, ctx); 498 + break; 499 + case BPF_AND | BPF_FETCH: 500 + emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : 501 + rv_amoand_w(rs, rs, rd, 0, 0), ctx); 502 + if (!is64) 503 + emit_zext_32(rs, ctx); 504 + break; 505 + case BPF_OR | BPF_FETCH: 506 + emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : 507 + rv_amoor_w(rs, rs, rd, 0, 0), ctx); 508 + if (!is64) 509 + emit_zext_32(rs, ctx); 510 + break; 511 + case BPF_XOR | BPF_FETCH: 512 + emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : 513 + rv_amoxor_w(rs, rs, rd, 0, 0), ctx); 514 + if (!is64) 515 + emit_zext_32(rs, ctx); 516 + break; 517 + /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ 518 + case BPF_XCHG: 519 + emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : 520 + rv_amoswap_w(rs, rs, rd, 0, 0), ctx); 521 + if (!is64) 522 + emit_zext_32(rs, ctx); 523 + break; 524 + /* r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg); */ 525 + case BPF_CMPXCHG: 526 + r0 = bpf_to_rv_reg(BPF_REG_0, ctx); 527 + emit(is64 ? rv_addi(RV_REG_T2, r0, 0) : 528 + rv_addiw(RV_REG_T2, r0, 0), ctx); 529 + emit(is64 ? rv_lr_d(r0, 0, rd, 0, 0) : 530 + rv_lr_w(r0, 0, rd, 0, 0), ctx); 531 + jmp_offset = ninsns_rvoff(8); 532 + emit(rv_bne(RV_REG_T2, r0, jmp_offset >> 1), ctx); 533 + emit(is64 ? rv_sc_d(RV_REG_T3, rs, rd, 0, 0) : 534 + rv_sc_w(RV_REG_T3, rs, rd, 0, 0), ctx); 535 + jmp_offset = ninsns_rvoff(-6); 536 + emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx); 537 + emit(rv_fence(0x3, 0x3), ctx); 538 + break; 539 + } 540 + } 541 + 458 542 #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) 459 543 #define BPF_FIXUP_REG_MASK GENMASK(31, 27) 460 544 ··· 1230 1146 break; 1231 1147 case BPF_STX | BPF_ATOMIC | BPF_W: 1232 1148 case BPF_STX | BPF_ATOMIC | BPF_DW: 1233 - if (insn->imm != BPF_ADD) { 1234 - pr_err("bpf-jit: not supported: atomic operation %02x ***\n", 1235 - insn->imm); 1236 - return -EINVAL; 1237 - } 1238 - 1239 - /* atomic_add: lock *(u32 *)(dst + off) += src 1240 - * atomic_add: lock *(u64 *)(dst + off) += src 1241 - */ 1242 - 1243 - if (off) { 1244 - if (is_12b_int(off)) { 1245 - emit_addi(RV_REG_T1, rd, off, ctx); 1246 - } else { 1247 - emit_imm(RV_REG_T1, off, ctx); 1248 - emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); 1249 - } 1250 - 1251 - rd = RV_REG_T1; 1252 - } 1253 - 1254 - emit(BPF_SIZE(code) == BPF_W ? 1255 - rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) : 1256 - rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx); 1149 + emit_atomic(rd, rs, off, imm, 1150 + BPF_SIZE(code) == BPF_DW, ctx); 1257 1151 break; 1258 1152 default: 1259 1153 pr_err("bpf-jit: unknown opcode %02x\n", code);