Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+13 -17

arch/arm/net/bpf_jit_32.c

··· 1260 1260 1261 1261 static void build_prologue(struct jit_ctx *ctx) 1262 1262 { 1263 - const s8 r0 = bpf2a32[BPF_REG_0][1]; 1264 - const s8 r2 = bpf2a32[BPF_REG_1][1]; 1265 - const s8 r3 = bpf2a32[BPF_REG_1][0]; 1266 - const s8 r4 = bpf2a32[BPF_REG_6][1]; 1267 - const s8 fplo = bpf2a32[BPF_REG_FP][1]; 1268 - const s8 fphi = bpf2a32[BPF_REG_FP][0]; 1263 + const s8 arm_r0 = bpf2a32[BPF_REG_0][1]; 1264 + const s8 *bpf_r1 = bpf2a32[BPF_REG_1]; 1265 + const s8 *bpf_fp = bpf2a32[BPF_REG_FP]; 1269 1266 const s8 *tcc = bpf2a32[TCALL_CNT]; 1270 1267 1271 1268 /* Save callee saved registers. */ ··· 1275 1278 emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); 1276 1279 emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); 1277 1280 #endif 1278 - /* Save frame pointer for later */ 1279 - emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); 1281 + /* mov r3, #0 */ 1282 + /* sub r2, sp, #SCRATCH_SIZE */ 1283 + emit(ARM_MOV_I(bpf_r1[0], 0), ctx); 1284 + emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx); 1280 1285 1281 1286 ctx->stack_size = imm8m(STACK_SIZE); 1282 1287 ··· 1286 1287 emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); 1287 1288 1288 1289 /* Set up BPF prog stack base register */ 1289 - emit_a32_mov_r(fplo, ARM_IP, ctx); 1290 - emit_a32_mov_i(fphi, 0, ctx); 1290 + emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx); 1291 1291 1292 - /* mov r4, 0 */ 1293 - emit(ARM_MOV_I(r4, 0), ctx); 1292 + /* Initialize Tail Count */ 1293 + emit(ARM_MOV_I(bpf_r1[1], 0), ctx); 1294 + emit_a32_mov_r64(true, tcc, bpf_r1, ctx); 1294 1295 1295 1296 /* Move BPF_CTX to BPF_R1 */ 1296 - emit(ARM_MOV_R(r3, r4), ctx); 1297 - emit(ARM_MOV_R(r2, r0), ctx); 1298 - /* Initialize Tail Count */ 1299 - emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); 1300 - emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); 1297 + emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx); 1298 + 1301 1299 /* end of prologue */ 1302 1300 } 1303 1301

+1

arch/arm64/Kconfig

··· 69 69 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) 70 70 select ARCH_SUPPORTS_NUMA_BALANCING 71 71 select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT 72 + select ARCH_WANT_DEFAULT_BPF_JIT 72 73 select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT 73 74 select ARCH_WANT_FRAME_POINTERS 74 75 select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)

+4

arch/riscv/include/asm/perf_event.h

··· 82 82 int irq; 83 83 }; 84 84 85 + #ifdef CONFIG_PERF_EVENTS 86 + #define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs 87 + #endif 88 + 85 89 #endif /* _ASM_RISCV_PERF_EVENT_H */

+4

arch/riscv/include/asm/pgtable.h

··· 94 94 #define VMALLOC_END (PAGE_OFFSET - 1) 95 95 #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) 96 96 97 + #define BPF_JIT_REGION_SIZE (SZ_128M) 98 + #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) 99 + #define BPF_JIT_REGION_END (VMALLOC_END) 100 + 97 101 /* 98 102 * Roughly size the vmemmap space to be large enough to fit enough 99 103 * struct pages to map half the virtual address space. Then

+9

arch/riscv/include/uapi/asm/bpf_perf_event.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 + #ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ 3 + #define _UAPI__ASM_BPF_PERF_EVENT_H__ 4 + 5 + #include <asm/ptrace.h> 6 + 7 + typedef struct user_regs_struct bpf_user_pt_regs_t; 8 + 9 + #endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */

+295 -240

arch/riscv/net/bpf_jit_comp.c

··· 120 120 return false; 121 121 } 122 122 123 + static void mark_fp(struct rv_jit_context *ctx) 124 + { 125 + __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); 126 + } 127 + 123 128 static void mark_call(struct rv_jit_context *ctx) 124 129 { 125 130 __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); ··· 461 456 return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); 462 457 } 463 458 459 + static u32 rv_auipc(u8 rd, u32 imm31_12) 460 + { 461 + return rv_u_insn(imm31_12, rd, 0x17); 462 + } 463 + 464 464 static bool is_12b_int(s64 val) 465 465 { 466 466 return -(1 << 11) <= val && val < (1 << 11); ··· 489 479 static int is_12b_check(int off, int insn) 490 480 { 491 481 if (!is_12b_int(off)) { 492 - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", 493 - insn, (int)off); 494 - return -1; 495 - } 496 - return 0; 497 - } 498 - 499 - static int is_13b_check(int off, int insn) 500 - { 501 - if (!is_13b_int(off)) { 502 - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", 503 - insn, (int)off); 504 - return -1; 505 - } 506 - return 0; 507 - } 508 - 509 - static int is_21b_check(int off, int insn) 510 - { 511 - if (!is_21b_int(off)) { 512 - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", 482 + pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", 513 483 insn, (int)off); 514 484 return -1; 515 485 } ··· 535 545 emit(rv_addi(rd, rd, lower), ctx); 536 546 } 537 547 538 - static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx) 548 + static int rv_offset(int insn, int off, struct rv_jit_context *ctx) 539 549 { 540 - int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to]; 550 + int from, to; 541 551 552 + off++; /* BPF branch is from PC+1, RV is from PC */ 553 + from = (insn > 0) ? ctx->offset[insn - 1] : 0; 554 + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; 542 555 return (to - from) << 2; 543 556 } 544 557 ··· 552 559 return (to - from) << 2; 553 560 } 554 561 555 - static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) 562 + static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) 556 563 { 557 564 int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; 558 565 ··· 589 596 590 597 emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); 591 598 /* Set return value. */ 592 - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); 593 - emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); 599 + if (!is_tail_call) 600 + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); 601 + emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 602 + is_tail_call ? 4 : 0), /* skip TCC init */ 603 + ctx); 604 + } 605 + 606 + /* return -1 or inverted cond */ 607 + static int invert_bpf_cond(u8 cond) 608 + { 609 + switch (cond) { 610 + case BPF_JEQ: 611 + return BPF_JNE; 612 + case BPF_JGT: 613 + return BPF_JLE; 614 + case BPF_JLT: 615 + return BPF_JGE; 616 + case BPF_JGE: 617 + return BPF_JLT; 618 + case BPF_JLE: 619 + return BPF_JGT; 620 + case BPF_JNE: 621 + return BPF_JEQ; 622 + case BPF_JSGT: 623 + return BPF_JSLE; 624 + case BPF_JSLT: 625 + return BPF_JSGE; 626 + case BPF_JSGE: 627 + return BPF_JSLT; 628 + case BPF_JSLE: 629 + return BPF_JSGT; 630 + } 631 + return -1; 632 + } 633 + 634 + static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, 635 + struct rv_jit_context *ctx) 636 + { 637 + switch (cond) { 638 + case BPF_JEQ: 639 + emit(rv_beq(rd, rs, rvoff >> 1), ctx); 640 + return; 641 + case BPF_JGT: 642 + emit(rv_bltu(rs, rd, rvoff >> 1), ctx); 643 + return; 644 + case BPF_JLT: 645 + emit(rv_bltu(rd, rs, rvoff >> 1), ctx); 646 + return; 647 + case BPF_JGE: 648 + emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); 649 + return; 650 + case BPF_JLE: 651 + emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); 652 + return; 653 + case BPF_JNE: 654 + emit(rv_bne(rd, rs, rvoff >> 1), ctx); 655 + return; 656 + case BPF_JSGT: 657 + emit(rv_blt(rs, rd, rvoff >> 1), ctx); 658 + return; 659 + case BPF_JSLT: 660 + emit(rv_blt(rd, rs, rvoff >> 1), ctx); 661 + return; 662 + case BPF_JSGE: 663 + emit(rv_bge(rd, rs, rvoff >> 1), ctx); 664 + return; 665 + case BPF_JSLE: 666 + emit(rv_bge(rs, rd, rvoff >> 1), ctx); 667 + } 668 + } 669 + 670 + static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, 671 + struct rv_jit_context *ctx) 672 + { 673 + s64 upper, lower; 674 + 675 + if (is_13b_int(rvoff)) { 676 + emit_bcc(cond, rd, rs, rvoff, ctx); 677 + return; 678 + } 679 + 680 + /* Adjust for jal */ 681 + rvoff -= 4; 682 + 683 + /* Transform, e.g.: 684 + * bne rd,rs,foo 685 + * to 686 + * beq rd,rs,<.L1> 687 + * (auipc foo) 688 + * jal(r) foo 689 + * .L1 690 + */ 691 + cond = invert_bpf_cond(cond); 692 + if (is_21b_int(rvoff)) { 693 + emit_bcc(cond, rd, rs, 8, ctx); 694 + emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); 695 + return; 696 + } 697 + 698 + /* 32b No need for an additional rvoff adjustment, since we 699 + * get that from the auipc at PC', where PC = PC' + 4. 700 + */ 701 + upper = (rvoff + (1 << 11)) >> 12; 702 + lower = rvoff & 0xfff; 703 + 704 + emit_bcc(cond, rd, rs, 12, ctx); 705 + emit(rv_auipc(RV_REG_T1, upper), ctx); 706 + emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); 594 707 } 595 708 596 709 static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) ··· 726 627 return -1; 727 628 emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); 728 629 off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 729 - if (is_13b_check(off, insn)) 730 - return -1; 731 - emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx); 630 + emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); 732 631 733 632 /* if (TCC-- < 0) 734 633 * goto out; 735 634 */ 736 635 emit(rv_addi(RV_REG_T1, tcc, -1), ctx); 737 636 off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 738 - if (is_13b_check(off, insn)) 739 - return -1; 740 - emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx); 637 + emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); 741 638 742 639 /* prog = array->ptrs[index]; 743 640 * if (!prog) ··· 746 651 return -1; 747 652 emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx); 748 653 off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 749 - if (is_13b_check(off, insn)) 750 - return -1; 751 - emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx); 654 + emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); 752 655 753 656 /* goto *(prog->bpf_func + 4); */ 754 657 off = offsetof(struct bpf_prog, bpf_func); 755 658 if (is_12b_check(off, insn)) 756 659 return -1; 757 660 emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx); 758 - emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx); 759 661 emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); 760 - __build_epilogue(RV_REG_T3, ctx); 662 + __build_epilogue(true, ctx); 761 663 return 0; 762 664 } 763 665 ··· 777 685 code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) || 778 686 code & BPF_LDX || code & BPF_STX) 779 687 *rs = bpf_to_rv_reg(insn->src_reg, ctx); 780 - } 781 - 782 - static int rv_offset_check(int *rvoff, s16 off, int insn, 783 - struct rv_jit_context *ctx) 784 - { 785 - *rvoff = rv_offset(insn + off, insn, ctx); 786 - return is_13b_check(*rvoff, insn); 787 688 } 788 689 789 690 static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) ··· 811 726 *rd = RV_REG_T2; 812 727 } 813 728 729 + static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, 730 + struct rv_jit_context *ctx) 731 + { 732 + s64 upper, lower; 733 + 734 + if (rvoff && is_21b_int(rvoff) && !force_jalr) { 735 + emit(rv_jal(rd, rvoff >> 1), ctx); 736 + return; 737 + } 738 + 739 + upper = (rvoff + (1 << 11)) >> 12; 740 + lower = rvoff & 0xfff; 741 + emit(rv_auipc(RV_REG_T1, upper), ctx); 742 + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); 743 + } 744 + 745 + static bool is_signed_bpf_cond(u8 cond) 746 + { 747 + return cond == BPF_JSGT || cond == BPF_JSLT || 748 + cond == BPF_JSGE || cond == BPF_JSLE; 749 + } 750 + 751 + static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) 752 + { 753 + s64 off = 0; 754 + u64 ip; 755 + u8 rd; 756 + 757 + if (addr && ctx->insns) { 758 + ip = (u64)(long)(ctx->insns + ctx->ninsns); 759 + off = addr - ip; 760 + if (!is_32b_int(off)) { 761 + pr_err("bpf-jit: target call addr %pK is out of range\n", 762 + (void *)addr); 763 + return -ERANGE; 764 + } 765 + } 766 + 767 + emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); 768 + rd = bpf_to_rv_reg(BPF_REG_0, ctx); 769 + emit(rv_addi(rd, RV_REG_A0, 0), ctx); 770 + return 0; 771 + } 772 + 814 773 static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 815 774 bool extra_pass) 816 775 { 817 776 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 818 777 BPF_CLASS(insn->code) == BPF_JMP; 778 + int s, e, rvoff, i = insn - ctx->prog->insnsi; 819 779 struct bpf_prog_aux *aux = ctx->prog->aux; 820 - int rvoff, i = insn - ctx->prog->insnsi; 821 780 u8 rd = -1, rs = -1, code = insn->code; 822 781 s16 off = insn->off; 823 782 s32 imm = insn->imm; ··· 1129 1000 1130 1001 /* JUMP off */ 1131 1002 case BPF_JMP | BPF_JA: 1132 - rvoff = rv_offset(i + off, i, ctx); 1133 - if (!is_21b_int(rvoff)) { 1134 - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", 1135 - i, rvoff); 1136 - return -1; 1137 - } 1138 - 1139 - emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); 1003 + rvoff = rv_offset(i, off, ctx); 1004 + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 1140 1005 break; 1141 1006 1142 1007 /* IF (dst COND src) JUMP off */ 1143 1008 case BPF_JMP | BPF_JEQ | BPF_X: 1144 1009 case BPF_JMP32 | BPF_JEQ | BPF_X: 1145 - if (rv_offset_check(&rvoff, off, i, ctx)) 1146 - return -1; 1147 - if (!is64) 1148 - emit_zext_32_rd_rs(&rd, &rs, ctx); 1149 - emit(rv_beq(rd, rs, rvoff >> 1), ctx); 1150 - break; 1151 1010 case BPF_JMP | BPF_JGT | BPF_X: 1152 1011 case BPF_JMP32 | BPF_JGT | BPF_X: 1153 - if (rv_offset_check(&rvoff, off, i, ctx)) 1154 - return -1; 1155 - if (!is64) 1156 - emit_zext_32_rd_rs(&rd, &rs, ctx); 1157 - emit(rv_bltu(rs, rd, rvoff >> 1), ctx); 1158 - break; 1159 1012 case BPF_JMP | BPF_JLT | BPF_X: 1160 1013 case BPF_JMP32 | BPF_JLT | BPF_X: 1161 - if (rv_offset_check(&rvoff, off, i, ctx)) 1162 - return -1; 1163 - if (!is64) 1164 - emit_zext_32_rd_rs(&rd, &rs, ctx); 1165 - emit(rv_bltu(rd, rs, rvoff >> 1), ctx); 1166 - break; 1167 1014 case BPF_JMP | BPF_JGE | BPF_X: 1168 1015 case BPF_JMP32 | BPF_JGE | BPF_X: 1169 - if (rv_offset_check(&rvoff, off, i, ctx)) 1170 - return -1; 1171 - if (!is64) 1172 - emit_zext_32_rd_rs(&rd, &rs, ctx); 1173 - emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); 1174 - break; 1175 1016 case BPF_JMP | BPF_JLE | BPF_X: 1176 1017 case BPF_JMP32 | BPF_JLE | BPF_X: 1177 - if (rv_offset_check(&rvoff, off, i, ctx)) 1178 - return -1; 1179 - if (!is64) 1180 - emit_zext_32_rd_rs(&rd, &rs, ctx); 1181 - emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); 1182 - break; 1183 1018 case BPF_JMP | BPF_JNE | BPF_X: 1184 1019 case BPF_JMP32 | BPF_JNE | BPF_X: 1185 - if (rv_offset_check(&rvoff, off, i, ctx)) 1186 - return -1; 1187 - if (!is64) 1188 - emit_zext_32_rd_rs(&rd, &rs, ctx); 1189 - emit(rv_bne(rd, rs, rvoff >> 1), ctx); 1190 - break; 1191 1020 case BPF_JMP | BPF_JSGT | BPF_X: 1192 1021 case BPF_JMP32 | BPF_JSGT | BPF_X: 1193 - if (rv_offset_check(&rvoff, off, i, ctx)) 1194 - return -1; 1195 - if (!is64) 1196 - emit_sext_32_rd_rs(&rd, &rs, ctx); 1197 - emit(rv_blt(rs, rd, rvoff >> 1), ctx); 1198 - break; 1199 1022 case BPF_JMP | BPF_JSLT | BPF_X: 1200 1023 case BPF_JMP32 | BPF_JSLT | BPF_X: 1201 - if (rv_offset_check(&rvoff, off, i, ctx)) 1202 - return -1; 1203 - if (!is64) 1204 - emit_sext_32_rd_rs(&rd, &rs, ctx); 1205 - emit(rv_blt(rd, rs, rvoff >> 1), ctx); 1206 - break; 1207 1024 case BPF_JMP | BPF_JSGE | BPF_X: 1208 1025 case BPF_JMP32 | BPF_JSGE | BPF_X: 1209 - if (rv_offset_check(&rvoff, off, i, ctx)) 1210 - return -1; 1211 - if (!is64) 1212 - emit_sext_32_rd_rs(&rd, &rs, ctx); 1213 - emit(rv_bge(rd, rs, rvoff >> 1), ctx); 1214 - break; 1215 1026 case BPF_JMP | BPF_JSLE | BPF_X: 1216 1027 case BPF_JMP32 | BPF_JSLE | BPF_X: 1217 - if (rv_offset_check(&rvoff, off, i, ctx)) 1218 - return -1; 1219 - if (!is64) 1220 - emit_sext_32_rd_rs(&rd, &rs, ctx); 1221 - emit(rv_bge(rs, rd, rvoff >> 1), ctx); 1222 - break; 1223 1028 case BPF_JMP | BPF_JSET | BPF_X: 1224 1029 case BPF_JMP32 | BPF_JSET | BPF_X: 1225 - if (rv_offset_check(&rvoff, off, i, ctx)) 1226 - return -1; 1227 - if (!is64) 1228 - emit_zext_32_rd_rs(&rd, &rs, ctx); 1229 - emit(rv_and(RV_REG_T1, rd, rs), ctx); 1230 - emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); 1030 + rvoff = rv_offset(i, off, ctx); 1031 + if (!is64) { 1032 + s = ctx->ninsns; 1033 + if (is_signed_bpf_cond(BPF_OP(code))) 1034 + emit_sext_32_rd_rs(&rd, &rs, ctx); 1035 + else 1036 + emit_zext_32_rd_rs(&rd, &rs, ctx); 1037 + e = ctx->ninsns; 1038 + 1039 + /* Adjust for extra insns */ 1040 + rvoff -= (e - s) << 2; 1041 + } 1042 + 1043 + if (BPF_OP(code) == BPF_JSET) { 1044 + /* Adjust for and */ 1045 + rvoff -= 4; 1046 + emit(rv_and(RV_REG_T1, rd, rs), ctx); 1047 + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, 1048 + ctx); 1049 + } else { 1050 + emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); 1051 + } 1231 1052 break; 1232 1053 1233 1054 /* IF (dst COND imm) JUMP off */ 1234 1055 case BPF_JMP | BPF_JEQ | BPF_K: 1235 1056 case BPF_JMP32 | BPF_JEQ | BPF_K: 1236 - if (rv_offset_check(&rvoff, off, i, ctx)) 1237 - return -1; 1238 - emit_imm(RV_REG_T1, imm, ctx); 1239 - if (!is64) 1240 - emit_zext_32_rd_t1(&rd, ctx); 1241 - emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx); 1242 - break; 1243 1057 case BPF_JMP | BPF_JGT | BPF_K: 1244 1058 case BPF_JMP32 | BPF_JGT | BPF_K: 1245 - if (rv_offset_check(&rvoff, off, i, ctx)) 1246 - return -1; 1247 - emit_imm(RV_REG_T1, imm, ctx); 1248 - if (!is64) 1249 - emit_zext_32_rd_t1(&rd, ctx); 1250 - emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx); 1251 - break; 1252 1059 case BPF_JMP | BPF_JLT | BPF_K: 1253 1060 case BPF_JMP32 | BPF_JLT | BPF_K: 1254 - if (rv_offset_check(&rvoff, off, i, ctx)) 1255 - return -1; 1256 - emit_imm(RV_REG_T1, imm, ctx); 1257 - if (!is64) 1258 - emit_zext_32_rd_t1(&rd, ctx); 1259 - emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx); 1260 - break; 1261 1061 case BPF_JMP | BPF_JGE | BPF_K: 1262 1062 case BPF_JMP32 | BPF_JGE | BPF_K: 1263 - if (rv_offset_check(&rvoff, off, i, ctx)) 1264 - return -1; 1265 - emit_imm(RV_REG_T1, imm, ctx); 1266 - if (!is64) 1267 - emit_zext_32_rd_t1(&rd, ctx); 1268 - emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx); 1269 - break; 1270 1063 case BPF_JMP | BPF_JLE | BPF_K: 1271 1064 case BPF_JMP32 | BPF_JLE | BPF_K: 1272 - if (rv_offset_check(&rvoff, off, i, ctx)) 1273 - return -1; 1274 - emit_imm(RV_REG_T1, imm, ctx); 1275 - if (!is64) 1276 - emit_zext_32_rd_t1(&rd, ctx); 1277 - emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx); 1278 - break; 1279 1065 case BPF_JMP | BPF_JNE | BPF_K: 1280 1066 case BPF_JMP32 | BPF_JNE | BPF_K: 1281 - if (rv_offset_check(&rvoff, off, i, ctx)) 1282 - return -1; 1283 - emit_imm(RV_REG_T1, imm, ctx); 1284 - if (!is64) 1285 - emit_zext_32_rd_t1(&rd, ctx); 1286 - emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx); 1287 - break; 1288 1067 case BPF_JMP | BPF_JSGT | BPF_K: 1289 1068 case BPF_JMP32 | BPF_JSGT | BPF_K: 1290 - if (rv_offset_check(&rvoff, off, i, ctx)) 1291 - return -1; 1292 - emit_imm(RV_REG_T1, imm, ctx); 1293 - if (!is64) 1294 - emit_sext_32_rd(&rd, ctx); 1295 - emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx); 1296 - break; 1297 1069 case BPF_JMP | BPF_JSLT | BPF_K: 1298 1070 case BPF_JMP32 | BPF_JSLT | BPF_K: 1299 - if (rv_offset_check(&rvoff, off, i, ctx)) 1300 - return -1; 1301 - emit_imm(RV_REG_T1, imm, ctx); 1302 - if (!is64) 1303 - emit_sext_32_rd(&rd, ctx); 1304 - emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx); 1305 - break; 1306 1071 case BPF_JMP | BPF_JSGE | BPF_K: 1307 1072 case BPF_JMP32 | BPF_JSGE | BPF_K: 1308 - if (rv_offset_check(&rvoff, off, i, ctx)) 1309 - return -1; 1310 - emit_imm(RV_REG_T1, imm, ctx); 1311 - if (!is64) 1312 - emit_sext_32_rd(&rd, ctx); 1313 - emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx); 1314 - break; 1315 1073 case BPF_JMP | BPF_JSLE | BPF_K: 1316 1074 case BPF_JMP32 | BPF_JSLE | BPF_K: 1317 - if (rv_offset_check(&rvoff, off, i, ctx)) 1318 - return -1; 1319 - emit_imm(RV_REG_T1, imm, ctx); 1320 - if (!is64) 1321 - emit_sext_32_rd(&rd, ctx); 1322 - emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx); 1323 - break; 1324 1075 case BPF_JMP | BPF_JSET | BPF_K: 1325 1076 case BPF_JMP32 | BPF_JSET | BPF_K: 1326 - if (rv_offset_check(&rvoff, off, i, ctx)) 1327 - return -1; 1077 + rvoff = rv_offset(i, off, ctx); 1078 + s = ctx->ninsns; 1328 1079 emit_imm(RV_REG_T1, imm, ctx); 1329 - if (!is64) 1330 - emit_zext_32_rd_t1(&rd, ctx); 1331 - emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); 1332 - emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); 1080 + if (!is64) { 1081 + if (is_signed_bpf_cond(BPF_OP(code))) 1082 + emit_sext_32_rd(&rd, ctx); 1083 + else 1084 + emit_zext_32_rd_t1(&rd, ctx); 1085 + } 1086 + e = ctx->ninsns; 1087 + 1088 + /* Adjust for extra insns */ 1089 + rvoff -= (e - s) << 2; 1090 + 1091 + if (BPF_OP(code) == BPF_JSET) { 1092 + /* Adjust for and */ 1093 + rvoff -= 4; 1094 + emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); 1095 + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, 1096 + ctx); 1097 + } else { 1098 + emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx); 1099 + } 1333 1100 break; 1334 1101 1335 1102 /* function call */ 1336 1103 case BPF_JMP | BPF_CALL: 1337 1104 { 1338 1105 bool fixed; 1339 - int i, ret; 1106 + int ret; 1340 1107 u64 addr; 1341 1108 1342 1109 mark_call(ctx); ··· 1240 1215 &fixed); 1241 1216 if (ret < 0) 1242 1217 return ret; 1243 - if (fixed) { 1244 - emit_imm(RV_REG_T1, addr, ctx); 1245 - } else { 1246 - i = ctx->ninsns; 1247 - emit_imm(RV_REG_T1, addr, ctx); 1248 - for (i = ctx->ninsns - i; i < 8; i++) { 1249 - /* nop */ 1250 - emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0), 1251 - ctx); 1252 - } 1253 - } 1254 - emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx); 1255 - rd = bpf_to_rv_reg(BPF_REG_0, ctx); 1256 - emit(rv_addi(rd, RV_REG_A0, 0), ctx); 1218 + ret = emit_call(fixed, addr, ctx); 1219 + if (ret) 1220 + return ret; 1257 1221 break; 1258 1222 } 1259 1223 /* tail call */ ··· 1257 1243 break; 1258 1244 1259 1245 rvoff = epilogue_offset(ctx); 1260 - if (is_21b_check(rvoff, i)) 1261 - return -1; 1262 - emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); 1246 + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 1263 1247 break; 1264 1248 1265 1249 /* dst = imm64 */ ··· 1438 1426 { 1439 1427 int stack_adjust = 0, store_offset, bpf_stack_adjust; 1440 1428 1429 + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); 1430 + if (bpf_stack_adjust) 1431 + mark_fp(ctx); 1432 + 1441 1433 if (seen_reg(RV_REG_RA, ctx)) 1442 1434 stack_adjust += 8; 1443 1435 stack_adjust += 8; /* RV_REG_FP */ ··· 1459 1443 stack_adjust += 8; 1460 1444 1461 1445 stack_adjust = round_up(stack_adjust, 16); 1462 - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); 1463 1446 stack_adjust += bpf_stack_adjust; 1464 1447 1465 1448 store_offset = stack_adjust - 8; ··· 1517 1502 1518 1503 static void build_epilogue(struct rv_jit_context *ctx) 1519 1504 { 1520 - __build_epilogue(RV_REG_RA, ctx); 1505 + __build_epilogue(false, ctx); 1521 1506 } 1522 1507 1523 - static int build_body(struct rv_jit_context *ctx, bool extra_pass) 1508 + static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) 1524 1509 { 1525 1510 const struct bpf_prog *prog = ctx->prog; 1526 1511 int i; ··· 1532 1517 ret = emit_insn(insn, ctx, extra_pass); 1533 1518 if (ret > 0) { 1534 1519 i++; 1535 - if (ctx->insns == NULL) 1536 - ctx->offset[i] = ctx->ninsns; 1520 + if (offset) 1521 + offset[i] = ctx->ninsns; 1537 1522 continue; 1538 1523 } 1539 - if (ctx->insns == NULL) 1540 - ctx->offset[i] = ctx->ninsns; 1524 + if (offset) 1525 + offset[i] = ctx->ninsns; 1541 1526 if (ret) 1542 1527 return ret; 1543 1528 } ··· 1563 1548 { 1564 1549 bool tmp_blinded = false, extra_pass = false; 1565 1550 struct bpf_prog *tmp, *orig_prog = prog; 1551 + int pass = 0, prev_ninsns = 0, i; 1566 1552 struct rv_jit_data *jit_data; 1553 + unsigned int image_size = 0; 1567 1554 struct rv_jit_context *ctx; 1568 - unsigned int image_size; 1569 1555 1570 1556 if (!prog->jit_requested) 1571 1557 return orig_prog; ··· 1603 1587 prog = orig_prog; 1604 1588 goto out_offset; 1605 1589 } 1590 + for (i = 0; i < prog->len; i++) { 1591 + prev_ninsns += 32; 1592 + ctx->offset[i] = prev_ninsns; 1593 + } 1606 1594 1607 - /* First pass generates the ctx->offset, but does not emit an image. */ 1608 - if (build_body(ctx, extra_pass)) { 1595 + for (i = 0; i < 16; i++) { 1596 + pass++; 1597 + ctx->ninsns = 0; 1598 + if (build_body(ctx, extra_pass, ctx->offset)) { 1599 + prog = orig_prog; 1600 + goto out_offset; 1601 + } 1602 + build_prologue(ctx); 1603 + ctx->epilogue_offset = ctx->ninsns; 1604 + build_epilogue(ctx); 1605 + 1606 + if (ctx->ninsns == prev_ninsns) { 1607 + if (jit_data->header) 1608 + break; 1609 + 1610 + image_size = sizeof(u32) * ctx->ninsns; 1611 + jit_data->header = 1612 + bpf_jit_binary_alloc(image_size, 1613 + &jit_data->image, 1614 + sizeof(u32), 1615 + bpf_fill_ill_insns); 1616 + if (!jit_data->header) { 1617 + prog = orig_prog; 1618 + goto out_offset; 1619 + } 1620 + 1621 + ctx->insns = (u32 *)jit_data->image; 1622 + /* Now, when the image is allocated, the image 1623 + * can potentially shrink more (auipc/jalr -> 1624 + * jal). 1625 + */ 1626 + } 1627 + prev_ninsns = ctx->ninsns; 1628 + } 1629 + 1630 + if (i == 16) { 1631 + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); 1632 + bpf_jit_binary_free(jit_data->header); 1609 1633 prog = orig_prog; 1610 1634 goto out_offset; 1611 1635 } 1612 - build_prologue(ctx); 1613 - ctx->epilogue_offset = ctx->ninsns; 1614 - build_epilogue(ctx); 1615 1636 1616 - /* Allocate image, now that we know the size. */ 1617 - image_size = sizeof(u32) * ctx->ninsns; 1618 - jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image, 1619 - sizeof(u32), 1620 - bpf_fill_ill_insns); 1621 - if (!jit_data->header) { 1622 - prog = orig_prog; 1623 - goto out_offset; 1624 - } 1625 - 1626 - /* Second, real pass, that acutally emits the image. */ 1627 - ctx->insns = (u32 *)jit_data->image; 1628 1637 skip_init_ctx: 1638 + pass++; 1629 1639 ctx->ninsns = 0; 1630 1640 1631 1641 build_prologue(ctx); 1632 - if (build_body(ctx, extra_pass)) { 1642 + if (build_body(ctx, extra_pass, NULL)) { 1633 1643 bpf_jit_binary_free(jit_data->header); 1634 1644 prog = orig_prog; 1635 1645 goto out_offset; ··· 1663 1621 build_epilogue(ctx); 1664 1622 1665 1623 if (bpf_jit_enable > 1) 1666 - bpf_jit_dump(prog->len, image_size, 2, ctx->insns); 1624 + bpf_jit_dump(prog->len, image_size, pass, ctx->insns); 1667 1625 1668 1626 prog->bpf_func = (void *)ctx->insns; 1669 1627 prog->jited = 1; ··· 1682 1640 bpf_jit_prog_release_other(prog, prog == orig_prog ? 1683 1641 tmp : orig_prog); 1684 1642 return prog; 1643 + } 1644 + 1645 + void *bpf_jit_alloc_exec(unsigned long size) 1646 + { 1647 + return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, 1648 + BPF_JIT_REGION_END, GFP_KERNEL, 1649 + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, 1650 + __builtin_return_address(0)); 1651 + } 1652 + 1653 + void bpf_jit_free_exec(void *addr) 1654 + { 1655 + return vfree(addr); 1685 1656 }

+1

arch/x86/Kconfig

··· 93 93 select ARCH_USE_QUEUED_RWLOCKS 94 94 select ARCH_USE_QUEUED_SPINLOCKS 95 95 select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH 96 + select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 96 97 select ARCH_WANTS_DYNAMIC_TASK_STRUCT 97 98 select ARCH_WANT_HUGE_PMD_SHARE 98 99 select ARCH_WANTS_THP_SWAP if X86_64

+150

arch/x86/net/bpf_jit_comp.c

··· 10 10 #include <linux/if_vlan.h> 11 11 #include <linux/bpf.h> 12 12 #include <linux/memory.h> 13 + #include <linux/sort.h> 13 14 #include <asm/extable.h> 14 15 #include <asm/set_memory.h> 15 16 #include <asm/nospec-branch.h> 16 17 #include <asm/text-patching.h> 18 + #include <asm/asm-prototypes.h> 17 19 18 20 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 19 21 { ··· 1530 1528 if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY)) 1531 1529 return -EFAULT; 1532 1530 return 0; 1531 + } 1532 + 1533 + static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) 1534 + { 1535 + u8 *prog = *pprog; 1536 + int cnt = 0; 1537 + s64 offset; 1538 + 1539 + offset = func - (ip + 2 + 4); 1540 + if (!is_simm32(offset)) { 1541 + pr_err("Target %p is out of range\n", func); 1542 + return -EINVAL; 1543 + } 1544 + EMIT2_off32(0x0F, jmp_cond + 0x10, offset); 1545 + *pprog = prog; 1546 + return 0; 1547 + } 1548 + 1549 + static void emit_nops(u8 **pprog, unsigned int len) 1550 + { 1551 + unsigned int i, noplen; 1552 + u8 *prog = *pprog; 1553 + int cnt = 0; 1554 + 1555 + while (len > 0) { 1556 + noplen = len; 1557 + 1558 + if (noplen > ASM_NOP_MAX) 1559 + noplen = ASM_NOP_MAX; 1560 + 1561 + for (i = 0; i < noplen; i++) 1562 + EMIT1(ideal_nops[noplen][i]); 1563 + len -= noplen; 1564 + } 1565 + 1566 + *pprog = prog; 1567 + } 1568 + 1569 + static int emit_fallback_jump(u8 **pprog) 1570 + { 1571 + u8 *prog = *pprog; 1572 + int err = 0; 1573 + 1574 + #ifdef CONFIG_RETPOLINE 1575 + /* Note that this assumes the the compiler uses external 1576 + * thunks for indirect calls. Both clang and GCC use the same 1577 + * naming convention for external thunks. 1578 + */ 1579 + err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); 1580 + #else 1581 + int cnt = 0; 1582 + 1583 + EMIT2(0xFF, 0xE2); /* jmp rdx */ 1584 + #endif 1585 + *pprog = prog; 1586 + return err; 1587 + } 1588 + 1589 + static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) 1590 + { 1591 + u8 *jg_reloc, *jg_target, *prog = *pprog; 1592 + int pivot, err, jg_bytes = 1, cnt = 0; 1593 + s64 jg_offset; 1594 + 1595 + if (a == b) { 1596 + /* Leaf node of recursion, i.e. not a range of indices 1597 + * anymore. 1598 + */ 1599 + EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ 1600 + if (!is_simm32(progs[a])) 1601 + return -1; 1602 + EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), 1603 + progs[a]); 1604 + err = emit_cond_near_jump(&prog, /* je func */ 1605 + (void *)progs[a], prog, 1606 + X86_JE); 1607 + if (err) 1608 + return err; 1609 + 1610 + err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ 1611 + if (err) 1612 + return err; 1613 + 1614 + *pprog = prog; 1615 + return 0; 1616 + } 1617 + 1618 + /* Not a leaf node, so we pivot, and recursively descend into 1619 + * the lower and upper ranges. 1620 + */ 1621 + pivot = (b - a) / 2; 1622 + EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ 1623 + if (!is_simm32(progs[a + pivot])) 1624 + return -1; 1625 + EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]); 1626 + 1627 + if (pivot > 2) { /* jg upper_part */ 1628 + /* Require near jump. */ 1629 + jg_bytes = 4; 1630 + EMIT2_off32(0x0F, X86_JG + 0x10, 0); 1631 + } else { 1632 + EMIT2(X86_JG, 0); 1633 + } 1634 + jg_reloc = prog; 1635 + 1636 + err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ 1637 + progs); 1638 + if (err) 1639 + return err; 1640 + 1641 + /* From Intel 64 and IA-32 Architectures Optimization 1642 + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler 1643 + * Coding Rule 11: All branch targets should be 16-byte 1644 + * aligned. 1645 + */ 1646 + jg_target = PTR_ALIGN(prog, 16); 1647 + if (jg_target != prog) 1648 + emit_nops(&prog, jg_target - prog); 1649 + jg_offset = prog - jg_reloc; 1650 + emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); 1651 + 1652 + err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ 1653 + b, progs); 1654 + if (err) 1655 + return err; 1656 + 1657 + *pprog = prog; 1658 + return 0; 1659 + } 1660 + 1661 + static int cmp_ips(const void *a, const void *b) 1662 + { 1663 + const s64 *ipa = a; 1664 + const s64 *ipb = b; 1665 + 1666 + if (*ipa > *ipb) 1667 + return 1; 1668 + if (*ipa < *ipb) 1669 + return -1; 1670 + return 0; 1671 + } 1672 + 1673 + int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) 1674 + { 1675 + u8 *prog = image; 1676 + 1677 + sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); 1678 + return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs); 1533 1679 } 1534 1680 1535 1681 struct x64_jit_data {

+2 -2

drivers/net/ethernet/intel/i40e/i40e_xsk.c

··· 269 269 270 270 bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); 271 271 272 - xsk_umem_discard_addr(umem); 272 + xsk_umem_release_addr(umem); 273 273 return true; 274 274 } 275 275 ··· 306 306 307 307 bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); 308 308 309 - xsk_umem_discard_addr_rq(umem); 309 + xsk_umem_release_addr_rq(umem); 310 310 return true; 311 311 } 312 312

+2 -2

drivers/net/ethernet/intel/ice/ice_xsk.c

··· 555 555 556 556 rx_buf->handle = handle + umem->headroom; 557 557 558 - xsk_umem_discard_addr(umem); 558 + xsk_umem_release_addr(umem); 559 559 return true; 560 560 } 561 561 ··· 591 591 592 592 rx_buf->handle = handle + umem->headroom; 593 593 594 - xsk_umem_discard_addr_rq(umem); 594 + xsk_umem_release_addr_rq(umem); 595 595 return true; 596 596 } 597 597

+2 -2

drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c

··· 277 277 278 278 bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); 279 279 280 - xsk_umem_discard_addr(umem); 280 + xsk_umem_release_addr(umem); 281 281 return true; 282 282 } 283 283 ··· 304 304 305 305 bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); 306 306 307 - xsk_umem_discard_addr_rq(umem); 307 + xsk_umem_release_addr_rq(umem); 308 308 return true; 309 309 } 310 310

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c

··· 35 35 */ 36 36 dma_info->addr = xdp_umem_get_dma(umem, handle); 37 37 38 - xsk_umem_discard_addr_rq(umem); 38 + xsk_umem_release_addr_rq(umem); 39 39 40 40 dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, 41 41 DMA_BIDIRECTIONAL);

+3 -1

include/linux/bpf-cgroup.h

··· 85 85 void cgroup_bpf_offline(struct cgroup *cgrp); 86 86 87 87 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, 88 + struct bpf_prog *replace_prog, 88 89 enum bpf_attach_type type, u32 flags); 89 90 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 90 91 enum bpf_attach_type type); ··· 94 93 95 94 /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ 96 95 int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, 97 - enum bpf_attach_type type, u32 flags); 96 + struct bpf_prog *replace_prog, enum bpf_attach_type type, 97 + u32 flags); 98 98 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, 99 99 enum bpf_attach_type type, u32 flags); 100 100 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,

+76 -4

include/linux/bpf.h

··· 471 471 void *image; 472 472 u64 selector; 473 473 }; 474 + 475 + #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ 476 + 477 + struct bpf_dispatcher_prog { 478 + struct bpf_prog *prog; 479 + refcount_t users; 480 + }; 481 + 482 + struct bpf_dispatcher { 483 + /* dispatcher mutex */ 484 + struct mutex mutex; 485 + void *func; 486 + struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; 487 + int num_progs; 488 + void *image; 489 + u32 image_off; 490 + }; 491 + 492 + static __always_inline unsigned int bpf_dispatcher_nopfunc( 493 + const void *ctx, 494 + const struct bpf_insn *insnsi, 495 + unsigned int (*bpf_func)(const void *, 496 + const struct bpf_insn *)) 497 + { 498 + return bpf_func(ctx, insnsi); 499 + } 474 500 #ifdef CONFIG_BPF_JIT 475 501 struct bpf_trampoline *bpf_trampoline_lookup(u64 key); 476 502 int bpf_trampoline_link_prog(struct bpf_prog *prog); 477 503 int bpf_trampoline_unlink_prog(struct bpf_prog *prog); 478 504 void bpf_trampoline_put(struct bpf_trampoline *tr); 505 + void *bpf_jit_alloc_exec_page(void); 506 + #define BPF_DISPATCHER_INIT(name) { \ 507 + .mutex = __MUTEX_INITIALIZER(name.mutex), \ 508 + .func = &name##func, \ 509 + .progs = {}, \ 510 + .num_progs = 0, \ 511 + .image = NULL, \ 512 + .image_off = 0 \ 513 + } 514 + 515 + #define DEFINE_BPF_DISPATCHER(name) \ 516 + noinline unsigned int name##func( \ 517 + const void *ctx, \ 518 + const struct bpf_insn *insnsi, \ 519 + unsigned int (*bpf_func)(const void *, \ 520 + const struct bpf_insn *)) \ 521 + { \ 522 + return bpf_func(ctx, insnsi); \ 523 + } \ 524 + EXPORT_SYMBOL(name##func); \ 525 + struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); 526 + #define DECLARE_BPF_DISPATCHER(name) \ 527 + unsigned int name##func( \ 528 + const void *ctx, \ 529 + const struct bpf_insn *insnsi, \ 530 + unsigned int (*bpf_func)(const void *, \ 531 + const struct bpf_insn *)); \ 532 + extern struct bpf_dispatcher name; 533 + #define BPF_DISPATCHER_FUNC(name) name##func 534 + #define BPF_DISPATCHER_PTR(name) (&name) 535 + void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, 536 + struct bpf_prog *to); 479 537 #else 480 538 static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) 481 539 { ··· 548 490 return -ENOTSUPP; 549 491 } 550 492 static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} 493 + #define DEFINE_BPF_DISPATCHER(name) 494 + #define DECLARE_BPF_DISPATCHER(name) 495 + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc 496 + #define BPF_DISPATCHER_PTR(name) NULL 497 + static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, 498 + struct bpf_prog *from, 499 + struct bpf_prog *to) {} 551 500 #endif 552 501 553 502 struct bpf_func_info_aux { ··· 962 897 963 898 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); 964 899 struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); 965 - void __dev_map_flush(struct bpf_map *map); 900 + void __dev_map_flush(void); 966 901 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, 967 902 struct net_device *dev_rx); 968 903 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, 969 904 struct bpf_prog *xdp_prog); 970 905 971 906 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); 972 - void __cpu_map_flush(struct bpf_map *map); 907 + void __cpu_map_flush(void); 973 908 int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, 974 909 struct net_device *dev_rx); 975 910 ··· 1007 942 struct btf_func_model *m); 1008 943 1009 944 int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); 945 + 946 + struct bpf_prog *bpf_prog_by_id(u32 id); 1010 947 1011 948 #else /* !CONFIG_BPF_SYSCALL */ 1012 949 static inline struct bpf_prog *bpf_prog_get(u32 ufd) ··· 1071 1004 return NULL; 1072 1005 } 1073 1006 1074 - static inline void __dev_map_flush(struct bpf_map *map) 1007 + static inline void __dev_map_flush(void) 1075 1008 { 1076 1009 } 1077 1010 ··· 1100 1033 return NULL; 1101 1034 } 1102 1035 1103 - static inline void __cpu_map_flush(struct bpf_map *map) 1036 + static inline void __cpu_map_flush(void) 1104 1037 { 1105 1038 } 1106 1039 ··· 1140 1073 1141 1074 static inline void bpf_map_put(struct bpf_map *map) 1142 1075 { 1076 + } 1077 + 1078 + static inline struct bpf_prog *bpf_prog_by_id(u32 id) 1079 + { 1080 + return ERR_PTR(-ENOTSUPP); 1143 1081 } 1144 1082 #endif /* CONFIG_BPF_SYSCALL */ 1145 1083

+24 -17

include/linux/filter.h

··· 559 559 560 560 DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); 561 561 562 - #define BPF_PROG_RUN(prog, ctx) ({ \ 563 - u32 ret; \ 564 - cant_sleep(); \ 565 - if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ 566 - struct bpf_prog_stats *stats; \ 567 - u64 start = sched_clock(); \ 568 - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ 569 - stats = this_cpu_ptr(prog->aux->stats); \ 570 - u64_stats_update_begin(&stats->syncp); \ 571 - stats->cnt++; \ 572 - stats->nsecs += sched_clock() - start; \ 573 - u64_stats_update_end(&stats->syncp); \ 574 - } else { \ 575 - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ 576 - } \ 562 + #define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ 563 + u32 ret; \ 564 + cant_sleep(); \ 565 + if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ 566 + struct bpf_prog_stats *stats; \ 567 + u64 start = sched_clock(); \ 568 + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ 569 + stats = this_cpu_ptr(prog->aux->stats); \ 570 + u64_stats_update_begin(&stats->syncp); \ 571 + stats->cnt++; \ 572 + stats->nsecs += sched_clock() - start; \ 573 + u64_stats_update_end(&stats->syncp); \ 574 + } else { \ 575 + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ 576 + } \ 577 577 ret; }) 578 + 579 + #define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \ 580 + bpf_dispatcher_nopfunc) 578 581 579 582 #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN 580 583 ··· 592 589 u32 tgt_index; 593 590 void *tgt_value; 594 591 struct bpf_map *map; 595 - struct bpf_map *map_to_flush; 596 592 u32 kern_flags; 597 593 }; 598 594 ··· 701 699 return res; 702 700 } 703 701 702 + DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) 703 + 704 704 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, 705 705 struct xdp_buff *xdp) 706 706 { ··· 712 708 * already takes rcu_read_lock() when fetching the program, so 713 709 * it's not necessary here anymore. 714 710 */ 715 - return BPF_PROG_RUN(prog, xdp); 711 + return __BPF_PROG_RUN(prog, xdp, 712 + BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); 716 713 } 714 + 715 + void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); 717 716 718 717 static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) 719 718 {

+11 -14

include/net/xdp_sock.h

··· 72 72 73 73 struct xsk_map { 74 74 struct bpf_map map; 75 - struct list_head __percpu *flush_list; 76 75 spinlock_t lock; /* Synchronize map updates */ 77 76 struct xdp_sock *xsk_map[]; 78 77 }; ··· 118 119 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); 119 120 /* Used from netdev driver */ 120 121 bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); 121 - u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); 122 - void xsk_umem_discard_addr(struct xdp_umem *umem); 122 + bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); 123 + void xsk_umem_release_addr(struct xdp_umem *umem); 123 124 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); 124 125 bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); 125 126 void xsk_umem_consume_tx_done(struct xdp_umem *umem); ··· 138 139 struct xdp_sock **map_entry); 139 140 int xsk_map_inc(struct xsk_map *map); 140 141 void xsk_map_put(struct xsk_map *map); 141 - int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, 142 - struct xdp_sock *xs); 143 - void __xsk_map_flush(struct bpf_map *map); 142 + int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); 143 + void __xsk_map_flush(void); 144 144 145 145 static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, 146 146 u32 key) ··· 197 199 return xsk_umem_has_addrs(umem, cnt - rq->length); 198 200 } 199 201 200 - static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) 202 + static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) 201 203 { 202 204 struct xdp_umem_fq_reuse *rq = umem->fq_reuse; 203 205 ··· 208 210 return addr; 209 211 } 210 212 211 - static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) 213 + static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) 212 214 { 213 215 struct xdp_umem_fq_reuse *rq = umem->fq_reuse; 214 216 215 217 if (!rq->length) 216 - xsk_umem_discard_addr(umem); 218 + xsk_umem_release_addr(umem); 217 219 else 218 220 rq->length--; 219 221 } ··· 258 260 return NULL; 259 261 } 260 262 261 - static inline void xsk_umem_discard_addr(struct xdp_umem *umem) 263 + static inline void xsk_umem_release_addr(struct xdp_umem *umem) 262 264 { 263 265 } 264 266 ··· 332 334 return NULL; 333 335 } 334 336 335 - static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) 337 + static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) 336 338 { 337 339 } 338 340 ··· 367 369 return 0; 368 370 } 369 371 370 - static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, 371 - struct xdp_sock *xs) 372 + static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) 372 373 { 373 374 return -EOPNOTSUPP; 374 375 } 375 376 376 - static inline void __xsk_map_flush(struct bpf_map *map) 377 + static inline void __xsk_map_flush(void) 377 378 { 378 379 } 379 380

+1

include/uapi/linux/audit.h

··· 116 116 #define AUDIT_FANOTIFY 1331 /* Fanotify access decision */ 117 117 #define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */ 118 118 #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ 119 + #define AUDIT_BPF 1334 /* BPF subsystem */ 119 120 120 121 #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ 121 122 #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */

+10

include/uapi/linux/bpf.h

··· 231 231 * When children program makes decision (like picking TCP CA or sock bind) 232 232 * parent program has a chance to override it. 233 233 * 234 + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of 235 + * programs for a cgroup. Though it's possible to replace an old program at 236 + * any position by also specifying BPF_F_REPLACE flag and position itself in 237 + * replace_bpf_fd attribute. Old program at this position will be released. 238 + * 234 239 * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. 235 240 * A cgroup with NONE doesn't allow any programs in sub-cgroups. 236 241 * Ex1: ··· 254 249 */ 255 250 #define BPF_F_ALLOW_OVERRIDE (1U << 0) 256 251 #define BPF_F_ALLOW_MULTI (1U << 1) 252 + #define BPF_F_REPLACE (1U << 2) 257 253 258 254 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the 259 255 * verifier will perform strict alignment checking as if the kernel ··· 448 442 __u32 attach_bpf_fd; /* eBPF program to attach */ 449 443 __u32 attach_type; 450 444 __u32 attach_flags; 445 + __u32 replace_bpf_fd; /* previously attached eBPF 446 + * program to replace if 447 + * BPF_F_REPLACE is used 448 + */ 451 449 }; 452 450 453 451 struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */

+2 -1

include/uapi/linux/btf.h

··· 142 142 143 143 enum { 144 144 BTF_VAR_STATIC = 0, 145 - BTF_VAR_GLOBAL_ALLOCATED, 145 + BTF_VAR_GLOBAL_ALLOCATED = 1, 146 + BTF_VAR_GLOBAL_EXTERN = 2, 146 147 }; 147 148 148 149 /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe

+7

init/Kconfig

··· 1604 1604 Enable the bpf() system call that allows to manipulate eBPF 1605 1605 programs and maps via file descriptors. 1606 1606 1607 + config ARCH_WANT_DEFAULT_BPF_JIT 1608 + bool 1609 + 1607 1610 config BPF_JIT_ALWAYS_ON 1608 1611 bool "Permanently enable BPF JIT and remove BPF interpreter" 1609 1612 depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT 1610 1613 help 1611 1614 Enables BPF JIT and removes BPF interpreter to avoid 1612 1615 speculative execution of BPF instructions by the interpreter 1616 + 1617 + config BPF_JIT_DEFAULT_ON 1618 + def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON 1619 + depends on HAVE_EBPF_JIT && BPF_JIT 1613 1620 1614 1621 config USERFAULTFD 1615 1622 bool "Enable userfaultfd() system call"

+1

kernel/bpf/Makefile

··· 8 8 obj-$(CONFIG_BPF_SYSCALL) += disasm.o 9 9 obj-$(CONFIG_BPF_JIT) += trampoline.o 10 10 obj-$(CONFIG_BPF_SYSCALL) += btf.o 11 + obj-$(CONFIG_BPF_JIT) += dispatcher.o 11 12 ifeq ($(CONFIG_NET),y) 12 13 obj-$(CONFIG_BPF_SYSCALL) += devmap.o 13 14 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o

+51 -46

kernel/bpf/cgroup.c

··· 103 103 * if parent has overridable or multi-prog, allow attaching 104 104 */ 105 105 static bool hierarchy_allows_attach(struct cgroup *cgrp, 106 - enum bpf_attach_type type, 107 - u32 new_flags) 106 + enum bpf_attach_type type) 108 107 { 109 108 struct cgroup *p; 110 109 ··· 282 283 * propagate the change to descendants 283 284 * @cgrp: The cgroup which descendants to traverse 284 285 * @prog: A program to attach 286 + * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set 285 287 * @type: Type of attach operation 286 288 * @flags: Option flags 287 289 * 288 290 * Must be called with cgroup_mutex held. 289 291 */ 290 292 int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, 293 + struct bpf_prog *replace_prog, 291 294 enum bpf_attach_type type, u32 flags) 292 295 { 296 + u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); 293 297 struct list_head *progs = &cgrp->bpf.progs[type]; 294 298 struct bpf_prog *old_prog = NULL; 295 299 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], 296 300 *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; 301 + struct bpf_prog_list *pl, *replace_pl = NULL; 297 302 enum bpf_cgroup_storage_type stype; 298 - struct bpf_prog_list *pl; 299 - bool pl_was_allocated; 300 303 int err; 301 304 302 - if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) 305 + if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || 306 + ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) 303 307 /* invalid combination */ 304 308 return -EINVAL; 305 309 306 - if (!hierarchy_allows_attach(cgrp, type, flags)) 310 + if (!hierarchy_allows_attach(cgrp, type)) 307 311 return -EPERM; 308 312 309 - if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) 313 + if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags) 310 314 /* Disallow attaching non-overridable on top 311 315 * of existing overridable in this cgroup. 312 316 * Disallow attaching multi-prog if overridable or none ··· 318 316 319 317 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) 320 318 return -E2BIG; 319 + 320 + if (flags & BPF_F_ALLOW_MULTI) { 321 + list_for_each_entry(pl, progs, node) { 322 + if (pl->prog == prog) 323 + /* disallow attaching the same prog twice */ 324 + return -EINVAL; 325 + if (pl->prog == replace_prog) 326 + replace_pl = pl; 327 + } 328 + if ((flags & BPF_F_REPLACE) && !replace_pl) 329 + /* prog to replace not found for cgroup */ 330 + return -ENOENT; 331 + } else if (!list_empty(progs)) { 332 + replace_pl = list_first_entry(progs, typeof(*pl), node); 333 + } 321 334 322 335 for_each_cgroup_storage_type(stype) { 323 336 storage[stype] = bpf_cgroup_storage_alloc(prog, stype); ··· 344 327 } 345 328 } 346 329 347 - if (flags & BPF_F_ALLOW_MULTI) { 348 - list_for_each_entry(pl, progs, node) { 349 - if (pl->prog == prog) { 350 - /* disallow attaching the same prog twice */ 351 - for_each_cgroup_storage_type(stype) 352 - bpf_cgroup_storage_free(storage[stype]); 353 - return -EINVAL; 354 - } 330 + if (replace_pl) { 331 + pl = replace_pl; 332 + old_prog = pl->prog; 333 + for_each_cgroup_storage_type(stype) { 334 + old_storage[stype] = pl->storage[stype]; 335 + bpf_cgroup_storage_unlink(old_storage[stype]); 355 336 } 356 - 337 + } else { 357 338 pl = kmalloc(sizeof(*pl), GFP_KERNEL); 358 339 if (!pl) { 359 340 for_each_cgroup_storage_type(stype) 360 341 bpf_cgroup_storage_free(storage[stype]); 361 342 return -ENOMEM; 362 343 } 363 - 364 - pl_was_allocated = true; 365 - pl->prog = prog; 366 - for_each_cgroup_storage_type(stype) 367 - pl->storage[stype] = storage[stype]; 368 344 list_add_tail(&pl->node, progs); 369 - } else { 370 - if (list_empty(progs)) { 371 - pl = kmalloc(sizeof(*pl), GFP_KERNEL); 372 - if (!pl) { 373 - for_each_cgroup_storage_type(stype) 374 - bpf_cgroup_storage_free(storage[stype]); 375 - return -ENOMEM; 376 - } 377 - pl_was_allocated = true; 378 - list_add_tail(&pl->node, progs); 379 - } else { 380 - pl = list_first_entry(progs, typeof(*pl), node); 381 - old_prog = pl->prog; 382 - for_each_cgroup_storage_type(stype) { 383 - old_storage[stype] = pl->storage[stype]; 384 - bpf_cgroup_storage_unlink(old_storage[stype]); 385 - } 386 - pl_was_allocated = false; 387 - } 388 - pl->prog = prog; 389 - for_each_cgroup_storage_type(stype) 390 - pl->storage[stype] = storage[stype]; 391 345 } 392 346 393 - cgrp->bpf.flags[type] = flags; 347 + pl->prog = prog; 348 + for_each_cgroup_storage_type(stype) 349 + pl->storage[stype] = storage[stype]; 350 + 351 + cgrp->bpf.flags[type] = saved_flags; 394 352 395 353 err = update_effective_progs(cgrp, type); 396 354 if (err) ··· 393 401 pl->storage[stype] = old_storage[stype]; 394 402 bpf_cgroup_storage_link(old_storage[stype], cgrp, type); 395 403 } 396 - if (pl_was_allocated) { 404 + if (!replace_pl) { 397 405 list_del(&pl->node); 398 406 kfree(pl); 399 407 } ··· 531 539 int cgroup_bpf_prog_attach(const union bpf_attr *attr, 532 540 enum bpf_prog_type ptype, struct bpf_prog *prog) 533 541 { 542 + struct bpf_prog *replace_prog = NULL; 534 543 struct cgroup *cgrp; 535 544 int ret; 536 545 ··· 539 546 if (IS_ERR(cgrp)) 540 547 return PTR_ERR(cgrp); 541 548 542 - ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, 549 + if ((attr->attach_flags & BPF_F_ALLOW_MULTI) && 550 + (attr->attach_flags & BPF_F_REPLACE)) { 551 + replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype); 552 + if (IS_ERR(replace_prog)) { 553 + cgroup_put(cgrp); 554 + return PTR_ERR(replace_prog); 555 + } 556 + } 557 + 558 + ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type, 543 559 attr->attach_flags); 560 + 561 + if (replace_prog) 562 + bpf_prog_put(replace_prog); 544 563 cgroup_put(cgrp); 545 564 return ret; 546 565 }

+2 -4

kernel/bpf/core.c

··· 222 222 u32 pages, delta; 223 223 int ret; 224 224 225 - BUG_ON(fp_old == NULL); 226 - 227 225 size = round_up(size, PAGE_SIZE); 228 226 pages = size / PAGE_SIZE; 229 227 if (pages <= fp_old->pages) ··· 518 520 519 521 #ifdef CONFIG_BPF_JIT 520 522 /* All BPF JIT sysctl knobs here. */ 521 - int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); 523 + int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); 524 + int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); 522 525 int bpf_jit_harden __read_mostly; 523 - int bpf_jit_kallsyms __read_mostly; 524 526 long bpf_jit_limit __read_mostly; 525 527 526 528 static __always_inline void

+26 -50

kernel/bpf/cpumap.c

··· 72 72 struct bpf_map map; 73 73 /* Below members specific for map type */ 74 74 struct bpf_cpu_map_entry **cpu_map; 75 - struct list_head __percpu *flush_list; 76 75 }; 77 76 78 - static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx); 77 + static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list); 78 + 79 + static int bq_flush_to_queue(struct xdp_bulk_queue *bq); 79 80 80 81 static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) 81 82 { 82 83 struct bpf_cpu_map *cmap; 83 84 int err = -ENOMEM; 84 - int ret, cpu; 85 85 u64 cost; 86 + int ret; 86 87 87 88 if (!capable(CAP_SYS_ADMIN)) 88 89 return ERR_PTR(-EPERM); ··· 107 106 108 107 /* make sure page count doesn't overflow */ 109 108 cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); 110 - cost += sizeof(struct list_head) * num_possible_cpus(); 111 109 112 110 /* Notice returns -EPERM on if map size is larger than memlock limit */ 113 111 ret = bpf_map_charge_init(&cmap->map.memory, cost); ··· 115 115 goto free_cmap; 116 116 } 117 117 118 - cmap->flush_list = alloc_percpu(struct list_head); 119 - if (!cmap->flush_list) 120 - goto free_charge; 121 - 122 - for_each_possible_cpu(cpu) 123 - INIT_LIST_HEAD(per_cpu_ptr(cmap->flush_list, cpu)); 124 - 125 118 /* Alloc array for possible remote "destination" CPUs */ 126 119 cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * 127 120 sizeof(struct bpf_cpu_map_entry *), 128 121 cmap->map.numa_node); 129 122 if (!cmap->cpu_map) 130 - goto free_percpu; 123 + goto free_charge; 131 124 132 125 return &cmap->map; 133 - free_percpu: 134 - free_percpu(cmap->flush_list); 135 126 free_charge: 136 127 bpf_map_charge_finish(&cmap->map.memory); 137 128 free_cmap: ··· 390 399 static void __cpu_map_entry_free(struct rcu_head *rcu) 391 400 { 392 401 struct bpf_cpu_map_entry *rcpu; 393 - int cpu; 394 402 395 403 /* This cpu_map_entry have been disconnected from map and one 396 - * RCU graze-period have elapsed. Thus, XDP cannot queue any 404 + * RCU grace-period have elapsed. Thus, XDP cannot queue any 397 405 * new packets and cannot change/set flush_needed that can 398 406 * find this entry. 399 407 */ 400 408 rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); 401 409 402 - /* Flush remaining packets in percpu bulkq */ 403 - for_each_online_cpu(cpu) { 404 - struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); 405 - 406 - /* No concurrent bq_enqueue can run at this point */ 407 - bq_flush_to_queue(bq, false); 408 - } 409 410 free_percpu(rcpu->bulkq); 410 411 /* Cannot kthread_stop() here, last put free rcpu resources */ 411 412 put_cpu_map_entry(rcpu); ··· 419 436 * percpu bulkq to queue. Due to caller map_delete_elem() disable 420 437 * preemption, cannot call kthread_stop() to make sure queue is empty. 421 438 * Instead a work_queue is started for stopping kthread, 422 - * cpu_map_kthread_stop, which waits for an RCU graze period before 439 + * cpu_map_kthread_stop, which waits for an RCU grace period before 423 440 * stopping kthread, emptying the queue. 424 441 */ 425 442 static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, ··· 490 507 static void cpu_map_free(struct bpf_map *map) 491 508 { 492 509 struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); 493 - int cpu; 494 510 u32 i; 495 511 496 512 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, ··· 504 522 bpf_clear_redirect_map(map); 505 523 synchronize_rcu(); 506 524 507 - /* To ensure all pending flush operations have completed wait for flush 508 - * list be empty on _all_ cpus. Because the above synchronize_rcu() 509 - * ensures the map is disconnected from the program we can assume no new 510 - * items will be added to the list. 511 - */ 512 - for_each_online_cpu(cpu) { 513 - struct list_head *flush_list = per_cpu_ptr(cmap->flush_list, cpu); 514 - 515 - while (!list_empty(flush_list)) 516 - cond_resched(); 517 - } 518 - 519 525 /* For cpu_map the remote CPUs can still be using the entries 520 526 * (struct bpf_cpu_map_entry). 521 527 */ ··· 514 544 if (!rcpu) 515 545 continue; 516 546 517 - /* bq flush and cleanup happens after RCU graze-period */ 547 + /* bq flush and cleanup happens after RCU grace-period */ 518 548 __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ 519 549 } 520 - free_percpu(cmap->flush_list); 521 550 bpf_map_area_free(cmap->cpu_map); 522 551 kfree(cmap); 523 552 } ··· 568 599 .map_check_btf = map_check_no_btf, 569 600 }; 570 601 571 - static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx) 602 + static int bq_flush_to_queue(struct xdp_bulk_queue *bq) 572 603 { 573 604 struct bpf_cpu_map_entry *rcpu = bq->obj; 574 605 unsigned int processed = 0, drops = 0; ··· 589 620 err = __ptr_ring_produce(q, xdpf); 590 621 if (err) { 591 622 drops++; 592 - if (likely(in_napi_ctx)) 593 - xdp_return_frame_rx_napi(xdpf); 594 - else 595 - xdp_return_frame(xdpf); 623 + xdp_return_frame_rx_napi(xdpf); 596 624 } 597 625 processed++; 598 626 } ··· 608 642 */ 609 643 static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) 610 644 { 611 - struct list_head *flush_list = this_cpu_ptr(rcpu->cmap->flush_list); 645 + struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); 612 646 struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); 613 647 614 648 if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) 615 - bq_flush_to_queue(bq, true); 649 + bq_flush_to_queue(bq); 616 650 617 651 /* Notice, xdp_buff/page MUST be queued here, long enough for 618 652 * driver to code invoking us to finished, due to driver ··· 647 681 return 0; 648 682 } 649 683 650 - void __cpu_map_flush(struct bpf_map *map) 684 + void __cpu_map_flush(void) 651 685 { 652 - struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); 653 - struct list_head *flush_list = this_cpu_ptr(cmap->flush_list); 686 + struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); 654 687 struct xdp_bulk_queue *bq, *tmp; 655 688 656 689 list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { 657 - bq_flush_to_queue(bq, true); 690 + bq_flush_to_queue(bq); 658 691 659 692 /* If already running, costs spin_lock_irqsave + smb_mb */ 660 693 wake_up_process(bq->obj->kthread); 661 694 } 662 695 } 696 + 697 + static int __init cpu_map_init(void) 698 + { 699 + int cpu; 700 + 701 + for_each_possible_cpu(cpu) 702 + INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu)); 703 + return 0; 704 + } 705 + 706 + subsys_initcall(cpu_map_init);

+18 -60

kernel/bpf/devmap.c

··· 75 75 struct bpf_dtab { 76 76 struct bpf_map map; 77 77 struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */ 78 - struct list_head __percpu *flush_list; 79 78 struct list_head list; 80 79 81 80 /* these are only used for DEVMAP_HASH type maps */ ··· 84 85 u32 n_buckets; 85 86 }; 86 87 88 + static DEFINE_PER_CPU(struct list_head, dev_map_flush_list); 87 89 static DEFINE_SPINLOCK(dev_map_lock); 88 90 static LIST_HEAD(dev_map_list); 89 91 ··· 109 109 110 110 static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) 111 111 { 112 - int err, cpu; 113 - u64 cost; 112 + u64 cost = 0; 113 + int err; 114 114 115 115 /* check sanity of attributes */ 116 116 if (attr->max_entries == 0 || attr->key_size != 4 || ··· 124 124 125 125 126 126 bpf_map_init_from_attr(&dtab->map, attr); 127 - 128 - /* make sure page count doesn't overflow */ 129 - cost = (u64) sizeof(struct list_head) * num_possible_cpus(); 130 127 131 128 if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 132 129 dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); ··· 140 143 if (err) 141 144 return -EINVAL; 142 145 143 - dtab->flush_list = alloc_percpu(struct list_head); 144 - if (!dtab->flush_list) 145 - goto free_charge; 146 - 147 - for_each_possible_cpu(cpu) 148 - INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu)); 149 - 150 146 if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 151 147 dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); 152 148 if (!dtab->dev_index_head) 153 - goto free_percpu; 149 + goto free_charge; 154 150 155 151 spin_lock_init(&dtab->index_lock); 156 152 } else { ··· 151 161 sizeof(struct bpf_dtab_netdev *), 152 162 dtab->map.numa_node); 153 163 if (!dtab->netdev_map) 154 - goto free_percpu; 164 + goto free_charge; 155 165 } 156 166 157 167 return 0; 158 168 159 - free_percpu: 160 - free_percpu(dtab->flush_list); 161 169 free_charge: 162 170 bpf_map_charge_finish(&dtab->map.memory); 163 171 return -ENOMEM; ··· 189 201 static void dev_map_free(struct bpf_map *map) 190 202 { 191 203 struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 192 - int i, cpu; 204 + int i; 193 205 194 206 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 195 207 * so the programs (can be more than one that used this map) were ··· 208 220 209 221 /* Make sure prior __dev_map_entry_free() have completed. */ 210 222 rcu_barrier(); 211 - 212 - /* To ensure all pending flush operations have completed wait for flush 213 - * list to empty on _all_ cpus. 214 - * Because the above synchronize_rcu() ensures the map is disconnected 215 - * from the program we can assume no new items will be added. 216 - */ 217 - for_each_online_cpu(cpu) { 218 - struct list_head *flush_list = per_cpu_ptr(dtab->flush_list, cpu); 219 - 220 - while (!list_empty(flush_list)) 221 - cond_resched(); 222 - } 223 223 224 224 if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { 225 225 for (i = 0; i < dtab->n_buckets; i++) { ··· 242 266 bpf_map_area_free(dtab->netdev_map); 243 267 } 244 268 245 - free_percpu(dtab->flush_list); 246 269 kfree(dtab); 247 270 } 248 271 ··· 320 345 return -ENOENT; 321 346 } 322 347 323 - static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, 324 - bool in_napi_ctx) 348 + static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags) 325 349 { 326 350 struct bpf_dtab_netdev *obj = bq->obj; 327 351 struct net_device *dev = obj->dev; ··· 358 384 for (i = 0; i < bq->count; i++) { 359 385 struct xdp_frame *xdpf = bq->q[i]; 360 386 361 - /* RX path under NAPI protection, can return frames faster */ 362 - if (likely(in_napi_ctx)) 363 - xdp_return_frame_rx_napi(xdpf); 364 - else 365 - xdp_return_frame(xdpf); 387 + xdp_return_frame_rx_napi(xdpf); 366 388 drops++; 367 389 } 368 390 goto out; ··· 371 401 * net device can be torn down. On devmap tear down we ensure the flush list 372 402 * is empty before completing to ensure all flush operations have completed. 373 403 */ 374 - void __dev_map_flush(struct bpf_map *map) 404 + void __dev_map_flush(void) 375 405 { 376 - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); 377 - struct list_head *flush_list = this_cpu_ptr(dtab->flush_list); 406 + struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); 378 407 struct xdp_bulk_queue *bq, *tmp; 379 408 380 409 rcu_read_lock(); 381 410 list_for_each_entry_safe(bq, tmp, flush_list, flush_node) 382 - bq_xmit_all(bq, XDP_XMIT_FLUSH, true); 411 + bq_xmit_all(bq, XDP_XMIT_FLUSH); 383 412 rcu_read_unlock(); 384 413 } 385 414 ··· 405 436 struct net_device *dev_rx) 406 437 407 438 { 408 - struct list_head *flush_list = this_cpu_ptr(obj->dtab->flush_list); 439 + struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); 409 440 struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); 410 441 411 442 if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) 412 - bq_xmit_all(bq, 0, true); 443 + bq_xmit_all(bq, 0); 413 444 414 445 /* Ingress dev_rx will be the same for all xdp_frame's in 415 446 * bulk_queue, because bq stored per-CPU and must be flushed ··· 478 509 return dev ? &dev->ifindex : NULL; 479 510 } 480 511 481 - static void dev_map_flush_old(struct bpf_dtab_netdev *dev) 482 - { 483 - if (dev->dev->netdev_ops->ndo_xdp_xmit) { 484 - struct xdp_bulk_queue *bq; 485 - int cpu; 486 - 487 - rcu_read_lock(); 488 - for_each_online_cpu(cpu) { 489 - bq = per_cpu_ptr(dev->bulkq, cpu); 490 - bq_xmit_all(bq, XDP_XMIT_FLUSH, false); 491 - } 492 - rcu_read_unlock(); 493 - } 494 - } 495 - 496 512 static void __dev_map_entry_free(struct rcu_head *rcu) 497 513 { 498 514 struct bpf_dtab_netdev *dev; 499 515 500 516 dev = container_of(rcu, struct bpf_dtab_netdev, rcu); 501 - dev_map_flush_old(dev); 502 517 free_percpu(dev->bulkq); 503 518 dev_put(dev->dev); 504 519 kfree(dev); ··· 763 810 764 811 static int __init dev_map_init(void) 765 812 { 813 + int cpu; 814 + 766 815 /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ 767 816 BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != 768 817 offsetof(struct _bpf_dtab_netdev, dev)); 769 818 register_netdevice_notifier(&dev_map_notifier); 819 + 820 + for_each_possible_cpu(cpu) 821 + INIT_LIST_HEAD(&per_cpu(dev_map_flush_list, cpu)); 770 822 return 0; 771 823 } 772 824

+158

kernel/bpf/dispatcher.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright(c) 2019 Intel Corporation. */ 3 + 4 + #include <linux/hash.h> 5 + #include <linux/bpf.h> 6 + #include <linux/filter.h> 7 + 8 + /* The BPF dispatcher is a multiway branch code generator. The 9 + * dispatcher is a mechanism to avoid the performance penalty of an 10 + * indirect call, which is expensive when retpolines are enabled. A 11 + * dispatch client registers a BPF program into the dispatcher, and if 12 + * there is available room in the dispatcher a direct call to the BPF 13 + * program will be generated. All calls to the BPF programs called via 14 + * the dispatcher will then be a direct call, instead of an 15 + * indirect. The dispatcher hijacks a trampoline function it via the 16 + * __fentry__ of the trampoline. The trampoline function has the 17 + * following signature: 18 + * 19 + * unsigned int trampoline(const void *ctx, const struct bpf_insn *insnsi, 20 + * unsigned int (*bpf_func)(const void *, 21 + * const struct bpf_insn *)); 22 + */ 23 + 24 + static struct bpf_dispatcher_prog *bpf_dispatcher_find_prog( 25 + struct bpf_dispatcher *d, struct bpf_prog *prog) 26 + { 27 + int i; 28 + 29 + for (i = 0; i < BPF_DISPATCHER_MAX; i++) { 30 + if (prog == d->progs[i].prog) 31 + return &d->progs[i]; 32 + } 33 + return NULL; 34 + } 35 + 36 + static struct bpf_dispatcher_prog *bpf_dispatcher_find_free( 37 + struct bpf_dispatcher *d) 38 + { 39 + return bpf_dispatcher_find_prog(d, NULL); 40 + } 41 + 42 + static bool bpf_dispatcher_add_prog(struct bpf_dispatcher *d, 43 + struct bpf_prog *prog) 44 + { 45 + struct bpf_dispatcher_prog *entry; 46 + 47 + if (!prog) 48 + return false; 49 + 50 + entry = bpf_dispatcher_find_prog(d, prog); 51 + if (entry) { 52 + refcount_inc(&entry->users); 53 + return false; 54 + } 55 + 56 + entry = bpf_dispatcher_find_free(d); 57 + if (!entry) 58 + return false; 59 + 60 + bpf_prog_inc(prog); 61 + entry->prog = prog; 62 + refcount_set(&entry->users, 1); 63 + d->num_progs++; 64 + return true; 65 + } 66 + 67 + static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d, 68 + struct bpf_prog *prog) 69 + { 70 + struct bpf_dispatcher_prog *entry; 71 + 72 + if (!prog) 73 + return false; 74 + 75 + entry = bpf_dispatcher_find_prog(d, prog); 76 + if (!entry) 77 + return false; 78 + 79 + if (refcount_dec_and_test(&entry->users)) { 80 + entry->prog = NULL; 81 + bpf_prog_put(prog); 82 + d->num_progs--; 83 + return true; 84 + } 85 + return false; 86 + } 87 + 88 + int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) 89 + { 90 + return -ENOTSUPP; 91 + } 92 + 93 + static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image) 94 + { 95 + s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; 96 + int i; 97 + 98 + for (i = 0; i < BPF_DISPATCHER_MAX; i++) { 99 + if (d->progs[i].prog) 100 + *ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func; 101 + } 102 + return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs); 103 + } 104 + 105 + static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) 106 + { 107 + void *old, *new; 108 + u32 noff; 109 + int err; 110 + 111 + if (!prev_num_progs) { 112 + old = NULL; 113 + noff = 0; 114 + } else { 115 + old = d->image + d->image_off; 116 + noff = d->image_off ^ (PAGE_SIZE / 2); 117 + } 118 + 119 + new = d->num_progs ? d->image + noff : NULL; 120 + if (new) { 121 + if (bpf_dispatcher_prepare(d, new)) 122 + return; 123 + } 124 + 125 + err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); 126 + if (err || !new) 127 + return; 128 + 129 + d->image_off = noff; 130 + } 131 + 132 + void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, 133 + struct bpf_prog *to) 134 + { 135 + bool changed = false; 136 + int prev_num_progs; 137 + 138 + if (from == to) 139 + return; 140 + 141 + mutex_lock(&d->mutex); 142 + if (!d->image) { 143 + d->image = bpf_jit_alloc_exec_page(); 144 + if (!d->image) 145 + goto out; 146 + } 147 + 148 + prev_num_progs = d->num_progs; 149 + changed |= bpf_dispatcher_remove_prog(d, from); 150 + changed |= bpf_dispatcher_add_prog(d, to); 151 + 152 + if (!changed) 153 + goto out; 154 + 155 + bpf_dispatcher_update(d, prev_num_progs); 156 + out: 157 + mutex_unlock(&d->mutex); 158 + }

+53 -10

kernel/bpf/syscall.c

··· 23 23 #include <linux/timekeeping.h> 24 24 #include <linux/ctype.h> 25 25 #include <linux/nospec.h> 26 + #include <linux/audit.h> 26 27 #include <uapi/linux/btf.h> 27 28 28 29 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ ··· 1307 1306 return 0; 1308 1307 } 1309 1308 1309 + enum bpf_audit { 1310 + BPF_AUDIT_LOAD, 1311 + BPF_AUDIT_UNLOAD, 1312 + BPF_AUDIT_MAX, 1313 + }; 1314 + 1315 + static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { 1316 + [BPF_AUDIT_LOAD] = "LOAD", 1317 + [BPF_AUDIT_UNLOAD] = "UNLOAD", 1318 + }; 1319 + 1320 + static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) 1321 + { 1322 + struct audit_context *ctx = NULL; 1323 + struct audit_buffer *ab; 1324 + 1325 + if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) 1326 + return; 1327 + if (audit_enabled == AUDIT_OFF) 1328 + return; 1329 + if (op == BPF_AUDIT_LOAD) 1330 + ctx = audit_context(); 1331 + ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); 1332 + if (unlikely(!ab)) 1333 + return; 1334 + audit_log_format(ab, "prog-id=%u op=%s", 1335 + prog->aux->id, bpf_audit_str[op]); 1336 + audit_log_end(ab); 1337 + } 1338 + 1310 1339 int __bpf_prog_charge(struct user_struct *user, u32 pages) 1311 1340 { 1312 1341 unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; ··· 1452 1421 { 1453 1422 if (atomic64_dec_and_test(&prog->aux->refcnt)) { 1454 1423 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 1424 + bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); 1455 1425 /* bpf_prog_free_id() must be called first */ 1456 1426 bpf_prog_free_id(prog, do_idr_lock); 1457 1427 __bpf_prog_put_noref(prog, true); ··· 1862 1830 */ 1863 1831 bpf_prog_kallsyms_add(prog); 1864 1832 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 1833 + bpf_audit_prog(prog, BPF_AUDIT_LOAD); 1865 1834 1866 1835 err = bpf_prog_new_fd(prog); 1867 1836 if (err < 0) ··· 2073 2040 } 2074 2041 } 2075 2042 2076 - #define BPF_PROG_ATTACH_LAST_FIELD attach_flags 2043 + #define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd 2077 2044 2078 2045 #define BPF_F_ATTACH_MASK \ 2079 - (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) 2046 + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) 2080 2047 2081 2048 static int bpf_prog_attach(const union bpf_attr *attr) 2082 2049 { ··· 2338 2305 2339 2306 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id 2340 2307 2308 + struct bpf_prog *bpf_prog_by_id(u32 id) 2309 + { 2310 + struct bpf_prog *prog; 2311 + 2312 + if (!id) 2313 + return ERR_PTR(-ENOENT); 2314 + 2315 + spin_lock_bh(&prog_idr_lock); 2316 + prog = idr_find(&prog_idr, id); 2317 + if (prog) 2318 + prog = bpf_prog_inc_not_zero(prog); 2319 + else 2320 + prog = ERR_PTR(-ENOENT); 2321 + spin_unlock_bh(&prog_idr_lock); 2322 + return prog; 2323 + } 2324 + 2341 2325 static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) 2342 2326 { 2343 2327 struct bpf_prog *prog; ··· 2367 2317 if (!capable(CAP_SYS_ADMIN)) 2368 2318 return -EPERM; 2369 2319 2370 - spin_lock_bh(&prog_idr_lock); 2371 - prog = idr_find(&prog_idr, id); 2372 - if (prog) 2373 - prog = bpf_prog_inc_not_zero(prog); 2374 - else 2375 - prog = ERR_PTR(-ENOENT); 2376 - spin_unlock_bh(&prog_idr_lock); 2377 - 2320 + prog = bpf_prog_by_id(id); 2378 2321 if (IS_ERR(prog)) 2379 2322 return PTR_ERR(prog); 2380 2323

+17 -7

kernel/bpf/trampoline.c

··· 14 14 /* serializes access to trampoline_table */ 15 15 static DEFINE_MUTEX(trampoline_mutex); 16 16 17 + void *bpf_jit_alloc_exec_page(void) 18 + { 19 + void *image; 20 + 21 + image = bpf_jit_alloc_exec(PAGE_SIZE); 22 + if (!image) 23 + return NULL; 24 + 25 + set_vm_flush_reset_perms(image); 26 + /* Keep image as writeable. The alternative is to keep flipping ro/rw 27 + * everytime new program is attached or detached. 28 + */ 29 + set_memory_x((long)image, 1); 30 + return image; 31 + } 32 + 17 33 struct bpf_trampoline *bpf_trampoline_lookup(u64 key) 18 34 { 19 35 struct bpf_trampoline *tr; ··· 50 34 goto out; 51 35 52 36 /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ 53 - image = bpf_jit_alloc_exec(PAGE_SIZE); 37 + image = bpf_jit_alloc_exec_page(); 54 38 if (!image) { 55 39 kfree(tr); 56 40 tr = NULL; ··· 64 48 mutex_init(&tr->mutex); 65 49 for (i = 0; i < BPF_TRAMP_MAX; i++) 66 50 INIT_HLIST_HEAD(&tr->progs_hlist[i]); 67 - 68 - set_vm_flush_reset_perms(image); 69 - /* Keep image as writeable. The alternative is to keep flipping ro/rw 70 - * everytime new program is attached or detached. 71 - */ 72 - set_memory_x((long)image, 1); 73 51 tr->image = image; 74 52 out: 75 53 mutex_unlock(&trampoline_mutex);

+3 -15

kernel/bpf/xskmap.c

··· 72 72 static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) 73 73 { 74 74 struct bpf_map_memory mem; 75 - int cpu, err, numa_node; 75 + int err, numa_node; 76 76 struct xsk_map *m; 77 - u64 cost, size; 77 + u64 size; 78 78 79 79 if (!capable(CAP_NET_ADMIN)) 80 80 return ERR_PTR(-EPERM); ··· 86 86 87 87 numa_node = bpf_map_attr_numa_node(attr); 88 88 size = struct_size(m, xsk_map, attr->max_entries); 89 - cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus()); 90 89 91 - err = bpf_map_charge_init(&mem, cost); 90 + err = bpf_map_charge_init(&mem, size); 92 91 if (err < 0) 93 92 return ERR_PTR(err); 94 93 ··· 101 102 bpf_map_charge_move(&m->map.memory, &mem); 102 103 spin_lock_init(&m->lock); 103 104 104 - m->flush_list = alloc_percpu(struct list_head); 105 - if (!m->flush_list) { 106 - bpf_map_charge_finish(&m->map.memory); 107 - bpf_map_area_free(m); 108 - return ERR_PTR(-ENOMEM); 109 - } 110 - 111 - for_each_possible_cpu(cpu) 112 - INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); 113 - 114 105 return &m->map; 115 106 } 116 107 ··· 110 121 111 122 bpf_clear_redirect_map(map); 112 123 synchronize_net(); 113 - free_percpu(m->flush_list); 114 124 bpf_map_area_free(m); 115 125 } 116 126

+3 -2

kernel/cgroup/cgroup.c

··· 6288 6288 6289 6289 #ifdef CONFIG_CGROUP_BPF 6290 6290 int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, 6291 - enum bpf_attach_type type, u32 flags) 6291 + struct bpf_prog *replace_prog, enum bpf_attach_type type, 6292 + u32 flags) 6292 6293 { 6293 6294 int ret; 6294 6295 6295 6296 mutex_lock(&cgroup_mutex); 6296 - ret = __cgroup_bpf_attach(cgrp, prog, type, flags); 6297 + ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags); 6297 6298 mutex_unlock(&cgroup_mutex); 6298 6299 return ret; 6299 6300 }

+40 -14

net/bpf/test_run.c

··· 15 15 #include <trace/events/bpf_test_run.h> 16 16 17 17 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, 18 - u32 *retval, u32 *time) 18 + u32 *retval, u32 *time, bool xdp) 19 19 { 20 20 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; 21 21 enum bpf_cgroup_storage_type stype; ··· 41 41 time_start = ktime_get_ns(); 42 42 for (i = 0; i < repeat; i++) { 43 43 bpf_cgroup_storage_set(storage); 44 - *retval = BPF_PROG_RUN(prog, ctx); 44 + 45 + if (xdp) 46 + *retval = bpf_prog_run_xdp(prog, ctx); 47 + else 48 + *retval = BPF_PROG_RUN(prog, ctx); 45 49 46 50 if (signal_pending(current)) { 47 51 ret = -EINTR; ··· 251 247 return 0; 252 248 253 249 /* make sure the fields we don't use are zeroed */ 254 - if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority))) 250 + if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark))) 251 + return -EINVAL; 252 + 253 + /* mark is allowed */ 254 + 255 + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark), 256 + offsetof(struct __sk_buff, priority))) 255 257 return -EINVAL; 256 258 257 259 /* priority is allowed */ 258 260 259 - if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) + 260 - sizeof_field(struct __sk_buff, priority), 261 + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority), 261 262 offsetof(struct __sk_buff, cb))) 262 263 return -EINVAL; 263 264 264 265 /* cb is allowed */ 265 266 266 - if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) + 267 - sizeof_field(struct __sk_buff, cb), 267 + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb), 268 268 offsetof(struct __sk_buff, tstamp))) 269 269 return -EINVAL; 270 270 271 271 /* tstamp is allowed */ 272 + /* wire_len is allowed */ 273 + /* gso_segs is allowed */ 272 274 273 - if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) + 274 - sizeof_field(struct __sk_buff, tstamp), 275 + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), 275 276 sizeof(struct __sk_buff))) 276 277 return -EINVAL; 277 278 279 + skb->mark = __skb->mark; 278 280 skb->priority = __skb->priority; 279 281 skb->tstamp = __skb->tstamp; 280 282 memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); 283 + 284 + if (__skb->wire_len == 0) { 285 + cb->pkt_len = skb->len; 286 + } else { 287 + if (__skb->wire_len < skb->len || 288 + __skb->wire_len > GSO_MAX_SIZE) 289 + return -EINVAL; 290 + cb->pkt_len = __skb->wire_len; 291 + } 292 + 293 + if (__skb->gso_segs > GSO_MAX_SEGS) 294 + return -EINVAL; 295 + skb_shinfo(skb)->gso_segs = __skb->gso_segs; 281 296 282 297 return 0; 283 298 } ··· 308 285 if (!__skb) 309 286 return; 310 287 288 + __skb->mark = skb->mark; 311 289 __skb->priority = skb->priority; 312 290 __skb->tstamp = skb->tstamp; 313 291 memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); 292 + __skb->wire_len = cb->pkt_len; 293 + __skb->gso_segs = skb_shinfo(skb)->gso_segs; 314 294 } 315 295 316 296 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ··· 385 359 ret = convert___skb_to_skb(skb, ctx); 386 360 if (ret) 387 361 goto out; 388 - ret = bpf_test_run(prog, skb, repeat, &retval, &duration); 362 + ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); 389 363 if (ret) 390 364 goto out; 391 365 if (!is_l2) { ··· 442 416 443 417 rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); 444 418 xdp.rxq = &rxqueue->xdp_rxq; 445 - 446 - ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration); 419 + bpf_prog_change_xdp(NULL, prog); 420 + ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); 447 421 if (ret) 448 422 goto out; 449 423 if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN || ··· 451 425 size = xdp.data_end - xdp.data; 452 426 ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); 453 427 out: 428 + bpf_prog_change_xdp(prog, NULL); 454 429 kfree(data); 455 430 return ret; 456 431 } ··· 464 437 465 438 /* flags is allowed */ 466 439 467 - if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) + 468 - sizeof_field(struct bpf_flow_keys, flags), 440 + if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags), 469 441 sizeof(struct bpf_flow_keys))) 470 442 return -EINVAL; 471 443

+18 -1

net/core/dev.c

··· 8542 8542 struct netlink_ext_ack *extack, u32 flags, 8543 8543 struct bpf_prog *prog) 8544 8544 { 8545 + bool non_hw = !(flags & XDP_FLAGS_HW_MODE); 8546 + struct bpf_prog *prev_prog = NULL; 8545 8547 struct netdev_bpf xdp; 8548 + int err; 8549 + 8550 + if (non_hw) { 8551 + prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, 8552 + XDP_QUERY_PROG)); 8553 + if (IS_ERR(prev_prog)) 8554 + prev_prog = NULL; 8555 + } 8546 8556 8547 8557 memset(&xdp, 0, sizeof(xdp)); 8548 8558 if (flags & XDP_FLAGS_HW_MODE) ··· 8563 8553 xdp.flags = flags; 8564 8554 xdp.prog = prog; 8565 8555 8566 - return bpf_op(dev, &xdp); 8556 + err = bpf_op(dev, &xdp); 8557 + if (!err && non_hw) 8558 + bpf_prog_change_xdp(prev_prog, prog); 8559 + 8560 + if (prev_prog) 8561 + bpf_prog_put(prev_prog); 8562 + 8563 + return err; 8567 8564 } 8568 8565 8569 8566 static void dev_xdp_uninstall(struct net_device *dev)

+19 -52

net/core/filter.c

··· 3511 3511 } 3512 3512 3513 3513 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, 3514 - struct bpf_map *map, 3515 - struct xdp_buff *xdp, 3516 - u32 index) 3514 + struct bpf_map *map, struct xdp_buff *xdp) 3517 3515 { 3518 - int err; 3519 - 3520 3516 switch (map->map_type) { 3521 3517 case BPF_MAP_TYPE_DEVMAP: 3522 - case BPF_MAP_TYPE_DEVMAP_HASH: { 3523 - struct bpf_dtab_netdev *dst = fwd; 3524 - 3525 - err = dev_map_enqueue(dst, xdp, dev_rx); 3526 - if (unlikely(err)) 3527 - return err; 3528 - break; 3529 - } 3530 - case BPF_MAP_TYPE_CPUMAP: { 3531 - struct bpf_cpu_map_entry *rcpu = fwd; 3532 - 3533 - err = cpu_map_enqueue(rcpu, xdp, dev_rx); 3534 - if (unlikely(err)) 3535 - return err; 3536 - break; 3537 - } 3538 - case BPF_MAP_TYPE_XSKMAP: { 3539 - struct xdp_sock *xs = fwd; 3540 - 3541 - err = __xsk_map_redirect(map, xdp, xs); 3542 - return err; 3543 - } 3518 + case BPF_MAP_TYPE_DEVMAP_HASH: 3519 + return dev_map_enqueue(fwd, xdp, dev_rx); 3520 + case BPF_MAP_TYPE_CPUMAP: 3521 + return cpu_map_enqueue(fwd, xdp, dev_rx); 3522 + case BPF_MAP_TYPE_XSKMAP: 3523 + return __xsk_map_redirect(fwd, xdp); 3544 3524 default: 3545 3525 break; 3546 3526 } ··· 3529 3549 3530 3550 void xdp_do_flush_map(void) 3531 3551 { 3532 - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); 3533 - struct bpf_map *map = ri->map_to_flush; 3534 - 3535 - ri->map_to_flush = NULL; 3536 - if (map) { 3537 - switch (map->map_type) { 3538 - case BPF_MAP_TYPE_DEVMAP: 3539 - case BPF_MAP_TYPE_DEVMAP_HASH: 3540 - __dev_map_flush(map); 3541 - break; 3542 - case BPF_MAP_TYPE_CPUMAP: 3543 - __cpu_map_flush(map); 3544 - break; 3545 - case BPF_MAP_TYPE_XSKMAP: 3546 - __xsk_map_flush(map); 3547 - break; 3548 - default: 3549 - break; 3550 - } 3551 - } 3552 + __dev_map_flush(); 3553 + __cpu_map_flush(); 3554 + __xsk_map_flush(); 3552 3555 } 3553 3556 EXPORT_SYMBOL_GPL(xdp_do_flush_map); 3554 3557 ··· 3580 3617 ri->tgt_value = NULL; 3581 3618 WRITE_ONCE(ri->map, NULL); 3582 3619 3583 - if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) 3584 - xdp_do_flush_map(); 3585 - 3586 - err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); 3620 + err = __bpf_tx_xdp_map(dev, fwd, map, xdp); 3587 3621 if (unlikely(err)) 3588 3622 goto err; 3589 3623 3590 - ri->map_to_flush = map; 3591 3624 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); 3592 3625 return 0; 3593 3626 err: ··· 8900 8941 const struct bpf_prog_ops sk_reuseport_prog_ops = { 8901 8942 }; 8902 8943 #endif /* CONFIG_INET */ 8944 + 8945 + DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp) 8946 + 8947 + void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) 8948 + { 8949 + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp), 8950 + prev_prog, prog); 8951 + }

+45 -34

net/xdp/xsk.c

··· 31 31 32 32 #define TX_BATCH_SIZE 16 33 33 34 + static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); 35 + 34 36 bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) 35 37 { 36 38 return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) && ··· 41 39 42 40 bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt) 43 41 { 44 - return xskq_has_addrs(umem->fq, cnt); 42 + return xskq_cons_has_entries(umem->fq, cnt); 45 43 } 46 44 EXPORT_SYMBOL(xsk_umem_has_addrs); 47 45 48 - u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) 46 + bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) 49 47 { 50 - return xskq_peek_addr(umem->fq, addr, umem); 48 + return xskq_cons_peek_addr(umem->fq, addr, umem); 51 49 } 52 50 EXPORT_SYMBOL(xsk_umem_peek_addr); 53 51 54 - void xsk_umem_discard_addr(struct xdp_umem *umem) 52 + void xsk_umem_release_addr(struct xdp_umem *umem) 55 53 { 56 - xskq_discard_addr(umem->fq); 54 + xskq_cons_release(umem->fq); 57 55 } 58 - EXPORT_SYMBOL(xsk_umem_discard_addr); 56 + EXPORT_SYMBOL(xsk_umem_release_addr); 59 57 60 58 void xsk_set_rx_need_wakeup(struct xdp_umem *umem) 61 59 { ··· 126 124 void *to_buf = xdp_umem_get_data(umem, addr); 127 125 128 126 addr = xsk_umem_add_offset_to_addr(addr); 129 - if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) { 127 + if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) { 130 128 void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr; 131 129 u64 page_start = addr & ~(PAGE_SIZE - 1); 132 130 u64 first_len = PAGE_SIZE - (addr - page_start); ··· 148 146 u32 metalen; 149 147 int err; 150 148 151 - if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || 149 + if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) || 152 150 len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { 153 151 xs->rx_dropped++; 154 152 return -ENOSPC; ··· 167 165 168 166 offset += metalen; 169 167 addr = xsk_umem_adjust_offset(xs->umem, addr, offset); 170 - err = xskq_produce_batch_desc(xs->rx, addr, len); 168 + err = xskq_prod_reserve_desc(xs->rx, addr, len); 171 169 if (!err) { 172 - xskq_discard_addr(xs->umem->fq); 170 + xskq_cons_release(xs->umem->fq); 173 171 xdp_return_buff(xdp); 174 172 return 0; 175 173 } ··· 180 178 181 179 static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) 182 180 { 183 - int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len); 181 + int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len); 184 182 185 183 if (err) 186 184 xs->rx_dropped++; ··· 216 214 217 215 static void xsk_flush(struct xdp_sock *xs) 218 216 { 219 - xskq_produce_flush_desc(xs->rx); 217 + xskq_prod_submit(xs->rx); 220 218 xs->sk.sk_data_ready(&xs->sk); 221 219 } 222 220 ··· 236 234 goto out_unlock; 237 235 } 238 236 239 - if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || 237 + if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) || 240 238 len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { 241 239 err = -ENOSPC; 242 240 goto out_drop; ··· 247 245 memcpy(buffer, xdp->data_meta, len + metalen); 248 246 249 247 addr = xsk_umem_adjust_offset(xs->umem, addr, metalen); 250 - err = xskq_produce_batch_desc(xs->rx, addr, len); 248 + err = xskq_prod_reserve_desc(xs->rx, addr, len); 251 249 if (err) 252 250 goto out_drop; 253 251 254 - xskq_discard_addr(xs->umem->fq); 255 - xskq_produce_flush_desc(xs->rx); 252 + xskq_cons_release(xs->umem->fq); 253 + xskq_prod_submit(xs->rx); 256 254 257 255 spin_unlock_bh(&xs->rx_lock); 258 256 ··· 266 264 return err; 267 265 } 268 266 269 - int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, 270 - struct xdp_sock *xs) 267 + int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) 271 268 { 272 - struct xsk_map *m = container_of(map, struct xsk_map, map); 273 - struct list_head *flush_list = this_cpu_ptr(m->flush_list); 269 + struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); 274 270 int err; 275 271 276 272 err = xsk_rcv(xs, xdp); ··· 281 281 return 0; 282 282 } 283 283 284 - void __xsk_map_flush(struct bpf_map *map) 284 + void __xsk_map_flush(void) 285 285 { 286 - struct xsk_map *m = container_of(map, struct xsk_map, map); 287 - struct list_head *flush_list = this_cpu_ptr(m->flush_list); 286 + struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); 288 287 struct xdp_sock *xs, *tmp; 289 288 290 289 list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { ··· 294 295 295 296 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) 296 297 { 297 - xskq_produce_flush_addr_n(umem->cq, nb_entries); 298 + xskq_prod_submit_n(umem->cq, nb_entries); 298 299 } 299 300 EXPORT_SYMBOL(xsk_umem_complete_tx); 300 301 ··· 316 317 317 318 rcu_read_lock(); 318 319 list_for_each_entry_rcu(xs, &umem->xsk_list, list) { 319 - if (!xskq_peek_desc(xs->tx, desc, umem)) 320 + if (!xskq_cons_peek_desc(xs->tx, desc, umem)) 320 321 continue; 321 322 322 - if (xskq_produce_addr_lazy(umem->cq, desc->addr)) 323 + /* This is the backpreassure mechanism for the Tx path. 324 + * Reserve space in the completion queue and only proceed 325 + * if there is space in it. This avoids having to implement 326 + * any buffering in the Tx path. 327 + */ 328 + if (xskq_prod_reserve_addr(umem->cq, desc->addr)) 323 329 goto out; 324 330 325 - xskq_discard_desc(xs->tx); 331 + xskq_cons_release(xs->tx); 326 332 rcu_read_unlock(); 327 333 return true; 328 334 } ··· 362 358 unsigned long flags; 363 359 364 360 spin_lock_irqsave(&xs->tx_completion_lock, flags); 365 - WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr)); 361 + xskq_prod_submit_addr(xs->umem->cq, addr); 366 362 spin_unlock_irqrestore(&xs->tx_completion_lock, flags); 367 363 368 364 sock_wfree(skb); ··· 382 378 if (xs->queue_id >= xs->dev->real_num_tx_queues) 383 379 goto out; 384 380 385 - while (xskq_peek_desc(xs->tx, &desc, xs->umem)) { 381 + while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) { 386 382 char *buffer; 387 383 u64 addr; 388 384 u32 len; ··· 403 399 addr = desc.addr; 404 400 buffer = xdp_umem_get_data(xs->umem, addr); 405 401 err = skb_store_bits(skb, 0, buffer, len); 406 - if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) { 402 + /* This is the backpreassure mechanism for the Tx path. 403 + * Reserve space in the completion queue and only proceed 404 + * if there is space in it. This avoids having to implement 405 + * any buffering in the Tx path. 406 + */ 407 + if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) { 407 408 kfree_skb(skb); 408 409 goto out; 409 410 } ··· 420 411 skb->destructor = xsk_destruct_skb; 421 412 422 413 err = dev_direct_xmit(skb, xs->queue_id); 423 - xskq_discard_desc(xs->tx); 414 + xskq_cons_release(xs->tx); 424 415 /* Ignore NET_XMIT_CN as packet might have been sent */ 425 416 if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { 426 417 /* SKB completed but not sent */ ··· 486 477 __xsk_sendmsg(sk); 487 478 } 488 479 489 - if (xs->rx && !xskq_empty_desc(xs->rx)) 480 + if (xs->rx && !xskq_prod_is_empty(xs->rx)) 490 481 mask |= EPOLLIN | EPOLLRDNORM; 491 - if (xs->tx && !xskq_full_desc(xs->tx)) 482 + if (xs->tx && !xskq_cons_is_full(xs->tx)) 492 483 mask |= EPOLLOUT | EPOLLWRNORM; 493 484 494 485 return mask; ··· 1192 1183 1193 1184 static int __init xsk_init(void) 1194 1185 { 1195 - int err; 1186 + int err, cpu; 1196 1187 1197 1188 err = proto_register(&xsk_proto, 0 /* no slab */); 1198 1189 if (err) ··· 1210 1201 if (err) 1211 1202 goto out_pernet; 1212 1203 1204 + for_each_possible_cpu(cpu) 1205 + INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu)); 1213 1206 return 0; 1214 1207 1215 1208 out_pernet:

+7 -8

net/xdp/xsk_queue.c

··· 18 18 q->chunk_mask = chunk_mask; 19 19 } 20 20 21 - static u32 xskq_umem_get_ring_size(struct xsk_queue *q) 21 + static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue) 22 22 { 23 - return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64); 24 - } 23 + struct xdp_umem_ring *umem_ring; 24 + struct xdp_rxtx_ring *rxtx_ring; 25 25 26 - static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) 27 - { 28 - return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc); 26 + if (umem_queue) 27 + return struct_size(umem_ring, desc, q->nentries); 28 + return struct_size(rxtx_ring, desc, q->nentries); 29 29 } 30 30 31 31 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) ··· 43 43 44 44 gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | 45 45 __GFP_COMP | __GFP_NORETRY; 46 - size = umem_queue ? xskq_umem_get_ring_size(q) : 47 - xskq_rxtx_get_ring_size(q); 46 + size = xskq_get_ring_size(q, umem_queue); 48 47 49 48 q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags, 50 49 get_order(size));

+204 -197

net/xdp/xsk_queue.h

··· 10 10 #include <linux/if_xdp.h> 11 11 #include <net/xdp_sock.h> 12 12 13 - #define RX_BATCH_SIZE 16 14 - #define LAZY_UPDATE_THRESHOLD 128 15 - 16 13 struct xdp_ring { 17 14 u32 producer ____cacheline_aligned_in_smp; 18 15 u32 consumer ____cacheline_aligned_in_smp; ··· 33 36 u64 size; 34 37 u32 ring_mask; 35 38 u32 nentries; 36 - u32 prod_head; 37 - u32 prod_tail; 38 - u32 cons_head; 39 - u32 cons_tail; 39 + u32 cached_prod; 40 + u32 cached_cons; 40 41 struct xdp_ring *ring; 41 42 u64 invalid_descs; 42 43 }; ··· 81 86 * now and again after circling through the ring. 82 87 */ 83 88 84 - /* Common functions operating for both RXTX and umem queues */ 89 + /* The operations on the rings are the following: 90 + * 91 + * producer consumer 92 + * 93 + * RESERVE entries PEEK in the ring for entries 94 + * WRITE data into the ring READ data from the ring 95 + * SUBMIT entries RELEASE entries 96 + * 97 + * The producer reserves one or more entries in the ring. It can then 98 + * fill in these entries and finally submit them so that they can be 99 + * seen and read by the consumer. 100 + * 101 + * The consumer peeks into the ring to see if the producer has written 102 + * any new entries. If so, the producer can then read these entries 103 + * and when it is done reading them release them back to the producer 104 + * so that the producer can use these slots to fill in new entries. 105 + * 106 + * The function names below reflect these operations. 107 + */ 85 108 86 - static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) 87 - { 88 - return q ? q->invalid_descs : 0; 89 - } 109 + /* Functions that read and validate content from consumer rings. */ 90 110 91 - static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) 92 - { 93 - u32 entries = q->prod_tail - q->cons_tail; 94 - 95 - if (entries == 0) { 96 - /* Refresh the local pointer */ 97 - q->prod_tail = READ_ONCE(q->ring->producer); 98 - entries = q->prod_tail - q->cons_tail; 99 - } 100 - 101 - return (entries > dcnt) ? dcnt : entries; 102 - } 103 - 104 - static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) 105 - { 106 - u32 free_entries = q->nentries - (producer - q->cons_tail); 107 - 108 - if (free_entries >= dcnt) 109 - return free_entries; 110 - 111 - /* Refresh the local tail pointer */ 112 - q->cons_tail = READ_ONCE(q->ring->consumer); 113 - return q->nentries - (producer - q->cons_tail); 114 - } 115 - 116 - static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt) 117 - { 118 - u32 entries = q->prod_tail - q->cons_tail; 119 - 120 - if (entries >= cnt) 121 - return true; 122 - 123 - /* Refresh the local pointer. */ 124 - q->prod_tail = READ_ONCE(q->ring->producer); 125 - entries = q->prod_tail - q->cons_tail; 126 - 127 - return entries >= cnt; 128 - } 129 - 130 - /* UMEM queue */ 131 - 132 - static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, 133 - u64 length) 111 + static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem, 112 + u64 addr, 113 + u64 length) 134 114 { 135 115 bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; 136 116 bool next_pg_contig = ··· 115 145 return cross_pg && !next_pg_contig; 116 146 } 117 147 118 - static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) 148 + static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q, 149 + u64 addr, 150 + u64 length, 151 + struct xdp_umem *umem) 152 + { 153 + u64 base_addr = xsk_umem_extract_addr(addr); 154 + 155 + addr = xsk_umem_add_offset_to_addr(addr); 156 + if (base_addr >= q->size || addr >= q->size || 157 + xskq_cons_crosses_non_contig_pg(umem, addr, length)) { 158 + q->invalid_descs++; 159 + return false; 160 + } 161 + 162 + return true; 163 + } 164 + 165 + static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr) 119 166 { 120 167 if (addr >= q->size) { 121 168 q->invalid_descs++; ··· 142 155 return true; 143 156 } 144 157 145 - static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr, 146 - u64 length, 147 - struct xdp_umem *umem) 158 + static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr, 159 + struct xdp_umem *umem) 148 160 { 149 - u64 base_addr = xsk_umem_extract_addr(addr); 161 + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 150 162 151 - addr = xsk_umem_add_offset_to_addr(addr); 152 - if (base_addr >= q->size || addr >= q->size || 153 - xskq_crosses_non_contig_pg(umem, addr, length)) { 154 - q->invalid_descs++; 155 - return false; 156 - } 163 + while (q->cached_cons != q->cached_prod) { 164 + u32 idx = q->cached_cons & q->ring_mask; 157 165 158 - return true; 159 - } 160 - 161 - static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr, 162 - struct xdp_umem *umem) 163 - { 164 - while (q->cons_tail != q->cons_head) { 165 - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 166 - unsigned int idx = q->cons_tail & q->ring_mask; 167 - 168 - *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; 166 + *addr = ring->desc[idx] & q->chunk_mask; 169 167 170 168 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { 171 - if (xskq_is_valid_addr_unaligned(q, *addr, 169 + if (xskq_cons_is_valid_unaligned(q, *addr, 172 170 umem->chunk_size_nohr, 173 171 umem)) 174 - return addr; 172 + return true; 175 173 goto out; 176 174 } 177 175 178 - if (xskq_is_valid_addr(q, *addr)) 179 - return addr; 176 + if (xskq_cons_is_valid_addr(q, *addr)) 177 + return true; 180 178 181 179 out: 182 - q->cons_tail++; 180 + q->cached_cons++; 183 181 } 184 182 185 - return NULL; 183 + return false; 186 184 } 187 185 188 - static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr, 189 - struct xdp_umem *umem) 190 - { 191 - if (q->cons_tail == q->cons_head) { 192 - smp_mb(); /* D, matches A */ 193 - WRITE_ONCE(q->ring->consumer, q->cons_tail); 194 - q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); 195 - 196 - /* Order consumer and data */ 197 - smp_rmb(); 198 - } 199 - 200 - return xskq_validate_addr(q, addr, umem); 201 - } 202 - 203 - static inline void xskq_discard_addr(struct xsk_queue *q) 204 - { 205 - q->cons_tail++; 206 - } 207 - 208 - static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr) 209 - { 210 - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 211 - 212 - if (xskq_nb_free(q, q->prod_tail, 1) == 0) 213 - return -ENOSPC; 214 - 215 - /* A, matches D */ 216 - ring->desc[q->prod_tail++ & q->ring_mask] = addr; 217 - 218 - /* Order producer and data */ 219 - smp_wmb(); /* B, matches C */ 220 - 221 - WRITE_ONCE(q->ring->producer, q->prod_tail); 222 - return 0; 223 - } 224 - 225 - static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr) 226 - { 227 - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 228 - 229 - if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0) 230 - return -ENOSPC; 231 - 232 - /* A, matches D */ 233 - ring->desc[q->prod_head++ & q->ring_mask] = addr; 234 - return 0; 235 - } 236 - 237 - static inline void xskq_produce_flush_addr_n(struct xsk_queue *q, 238 - u32 nb_entries) 239 - { 240 - /* Order producer and data */ 241 - smp_wmb(); /* B, matches C */ 242 - 243 - q->prod_tail += nb_entries; 244 - WRITE_ONCE(q->ring->producer, q->prod_tail); 245 - } 246 - 247 - static inline int xskq_reserve_addr(struct xsk_queue *q) 248 - { 249 - if (xskq_nb_free(q, q->prod_head, 1) == 0) 250 - return -ENOSPC; 251 - 252 - /* A, matches D */ 253 - q->prod_head++; 254 - return 0; 255 - } 256 - 257 - /* Rx/Tx queue */ 258 - 259 - static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, 260 - struct xdp_umem *umem) 186 + static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, 187 + struct xdp_desc *d, 188 + struct xdp_umem *umem) 261 189 { 262 190 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { 263 - if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem)) 191 + if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem)) 264 192 return false; 265 193 266 194 if (d->len > umem->chunk_size_nohr || d->options) { ··· 186 284 return true; 187 285 } 188 286 189 - if (!xskq_is_valid_addr(q, d->addr)) 287 + if (!xskq_cons_is_valid_addr(q, d->addr)) 190 288 return false; 191 289 192 290 if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || ··· 198 296 return true; 199 297 } 200 298 201 - static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, 202 - struct xdp_desc *desc, 203 - struct xdp_umem *umem) 299 + static inline bool xskq_cons_read_desc(struct xsk_queue *q, 300 + struct xdp_desc *desc, 301 + struct xdp_umem *umem) 204 302 { 205 - while (q->cons_tail != q->cons_head) { 303 + while (q->cached_cons != q->cached_prod) { 206 304 struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; 207 - unsigned int idx = q->cons_tail & q->ring_mask; 305 + u32 idx = q->cached_cons & q->ring_mask; 208 306 209 - *desc = READ_ONCE(ring->desc[idx]); 210 - if (xskq_is_valid_desc(q, desc, umem)) 211 - return desc; 307 + *desc = ring->desc[idx]; 308 + if (xskq_cons_is_valid_desc(q, desc, umem)) 309 + return true; 212 310 213 - q->cons_tail++; 311 + q->cached_cons++; 214 312 } 215 313 216 - return NULL; 314 + return false; 217 315 } 218 316 219 - static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, 220 - struct xdp_desc *desc, 221 - struct xdp_umem *umem) 317 + /* Functions for consumers */ 318 + 319 + static inline void __xskq_cons_release(struct xsk_queue *q) 222 320 { 223 - if (q->cons_tail == q->cons_head) { 224 - smp_mb(); /* D, matches A */ 225 - WRITE_ONCE(q->ring->consumer, q->cons_tail); 226 - q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); 227 - 228 - /* Order consumer and data */ 229 - smp_rmb(); /* C, matches B */ 230 - } 231 - 232 - return xskq_validate_desc(q, desc, umem); 321 + smp_mb(); /* D, matches A */ 322 + WRITE_ONCE(q->ring->consumer, q->cached_cons); 233 323 } 234 324 235 - static inline void xskq_discard_desc(struct xsk_queue *q) 325 + static inline void __xskq_cons_peek(struct xsk_queue *q) 236 326 { 237 - q->cons_tail++; 327 + /* Refresh the local pointer */ 328 + q->cached_prod = READ_ONCE(q->ring->producer); 329 + smp_rmb(); /* C, matches B */ 238 330 } 239 331 240 - static inline int xskq_produce_batch_desc(struct xsk_queue *q, 241 - u64 addr, u32 len) 332 + static inline void xskq_cons_get_entries(struct xsk_queue *q) 242 333 { 243 - struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; 244 - unsigned int idx; 334 + __xskq_cons_release(q); 335 + __xskq_cons_peek(q); 336 + } 245 337 246 - if (xskq_nb_free(q, q->prod_head, 1) == 0) 338 + static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) 339 + { 340 + u32 entries = q->cached_prod - q->cached_cons; 341 + 342 + if (entries >= cnt) 343 + return true; 344 + 345 + __xskq_cons_peek(q); 346 + entries = q->cached_prod - q->cached_cons; 347 + 348 + return entries >= cnt; 349 + } 350 + 351 + static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr, 352 + struct xdp_umem *umem) 353 + { 354 + if (q->cached_prod == q->cached_cons) 355 + xskq_cons_get_entries(q); 356 + return xskq_cons_read_addr(q, addr, umem); 357 + } 358 + 359 + static inline bool xskq_cons_peek_desc(struct xsk_queue *q, 360 + struct xdp_desc *desc, 361 + struct xdp_umem *umem) 362 + { 363 + if (q->cached_prod == q->cached_cons) 364 + xskq_cons_get_entries(q); 365 + return xskq_cons_read_desc(q, desc, umem); 366 + } 367 + 368 + static inline void xskq_cons_release(struct xsk_queue *q) 369 + { 370 + /* To improve performance, only update local state here. 371 + * Reflect this to global state when we get new entries 372 + * from the ring in xskq_cons_get_entries(). 373 + */ 374 + q->cached_cons++; 375 + } 376 + 377 + static inline bool xskq_cons_is_full(struct xsk_queue *q) 378 + { 379 + /* No barriers needed since data is not accessed */ 380 + return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == 381 + q->nentries; 382 + } 383 + 384 + /* Functions for producers */ 385 + 386 + static inline bool xskq_prod_is_full(struct xsk_queue *q) 387 + { 388 + u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); 389 + 390 + if (free_entries) 391 + return false; 392 + 393 + /* Refresh the local tail pointer */ 394 + q->cached_cons = READ_ONCE(q->ring->consumer); 395 + free_entries = q->nentries - (q->cached_prod - q->cached_cons); 396 + 397 + return !free_entries; 398 + } 399 + 400 + static inline int xskq_prod_reserve(struct xsk_queue *q) 401 + { 402 + if (xskq_prod_is_full(q)) 247 403 return -ENOSPC; 248 404 249 405 /* A, matches D */ 250 - idx = (q->prod_head++) & q->ring_mask; 406 + q->cached_prod++; 407 + return 0; 408 + } 409 + 410 + static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) 411 + { 412 + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 413 + 414 + if (xskq_prod_is_full(q)) 415 + return -ENOSPC; 416 + 417 + /* A, matches D */ 418 + ring->desc[q->cached_prod++ & q->ring_mask] = addr; 419 + return 0; 420 + } 421 + 422 + static inline int xskq_prod_reserve_desc(struct xsk_queue *q, 423 + u64 addr, u32 len) 424 + { 425 + struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; 426 + u32 idx; 427 + 428 + if (xskq_prod_is_full(q)) 429 + return -ENOSPC; 430 + 431 + /* A, matches D */ 432 + idx = q->cached_prod++ & q->ring_mask; 251 433 ring->desc[idx].addr = addr; 252 434 ring->desc[idx].len = len; 253 435 254 436 return 0; 255 437 } 256 438 257 - static inline void xskq_produce_flush_desc(struct xsk_queue *q) 439 + static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) 258 440 { 259 - /* Order producer and data */ 260 441 smp_wmb(); /* B, matches C */ 261 442 262 - q->prod_tail = q->prod_head; 263 - WRITE_ONCE(q->ring->producer, q->prod_tail); 443 + WRITE_ONCE(q->ring->producer, idx); 264 444 } 265 445 266 - static inline bool xskq_full_desc(struct xsk_queue *q) 446 + static inline void xskq_prod_submit(struct xsk_queue *q) 267 447 { 268 - return xskq_nb_avail(q, q->nentries) == q->nentries; 448 + __xskq_prod_submit(q, q->cached_prod); 269 449 } 270 450 271 - static inline bool xskq_empty_desc(struct xsk_queue *q) 451 + static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) 272 452 { 273 - return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries; 453 + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; 454 + u32 idx = q->ring->producer; 455 + 456 + ring->desc[idx++ & q->ring_mask] = addr; 457 + 458 + __xskq_prod_submit(q, idx); 459 + } 460 + 461 + static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) 462 + { 463 + __xskq_prod_submit(q, q->ring->producer + nb_entries); 464 + } 465 + 466 + static inline bool xskq_prod_is_empty(struct xsk_queue *q) 467 + { 468 + /* No barriers needed since data is not accessed */ 469 + return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); 470 + } 471 + 472 + /* For both producers and consumers */ 473 + 474 + static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) 475 + { 476 + return q ? q->invalid_descs : 0; 274 477 } 275 478 276 479 void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);

+4 -1

samples/bpf/Makefile

··· 38 38 tprogs-y += lwt_len_hist 39 39 tprogs-y += xdp_tx_iptunnel 40 40 tprogs-y += test_map_in_map 41 + tprogs-y += per_socket_stats_example 42 + tprogs-y += xdp_redirect 41 43 tprogs-y += xdp_redirect_map 42 44 tprogs-y += xdp_redirect_cpu 43 45 tprogs-y += xdp_monitor ··· 198 196 199 197 TPROGCFLAGS_bpf_load.o += -Wno-unused-variable 200 198 201 - TPROGS_LDLIBS += $(LIBBPF) -lelf 199 + TPROGS_LDLIBS += $(LIBBPF) -lelf -lz 202 200 TPROGLDLIBS_tracex4 += -lrt 203 201 TPROGLDLIBS_trace_output += -lrt 204 202 TPROGLDLIBS_map_perf_test += -lrt ··· 236 234 readelf -S ./llvm_btf_verify.o | grep BTF; \ 237 235 /bin/rm -f ./llvm_btf_verify.o) 238 236 237 + BPF_EXTRA_CFLAGS += -fno-stack-protector 239 238 ifneq ($(BTF_LLVM_PROBE),) 240 239 BPF_EXTRA_CFLAGS += -g 241 240 else

+4 -1

samples/bpf/xdp1_user.c

··· 98 98 xdp_flags |= XDP_FLAGS_SKB_MODE; 99 99 break; 100 100 case 'N': 101 - xdp_flags |= XDP_FLAGS_DRV_MODE; 101 + /* default, set below */ 102 102 break; 103 103 case 'F': 104 104 xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; ··· 108 108 return 1; 109 109 } 110 110 } 111 + 112 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 113 + xdp_flags |= XDP_FLAGS_DRV_MODE; 111 114 112 115 if (optind == argc) { 113 116 usage(basename(argv[0]));

+4 -1

samples/bpf/xdp_adjust_tail_user.c

··· 120 120 xdp_flags |= XDP_FLAGS_SKB_MODE; 121 121 break; 122 122 case 'N': 123 - xdp_flags |= XDP_FLAGS_DRV_MODE; 123 + /* default, set below */ 124 124 break; 125 125 case 'F': 126 126 xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; ··· 131 131 } 132 132 opt_flags[opt] = 0; 133 133 } 134 + 135 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 136 + xdp_flags |= XDP_FLAGS_DRV_MODE; 134 137 135 138 for (i = 0; i < strlen(optstr); i++) { 136 139 if (opt_flags[(unsigned int)optstr[i]]) {

+14 -3

samples/bpf/xdp_fwd_user.c

··· 27 27 #include "libbpf.h" 28 28 #include <bpf/bpf.h> 29 29 30 + static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 31 + 30 32 static int do_attach(int idx, int prog_fd, int map_fd, const char *name) 31 33 { 32 34 int err; 33 35 34 - err = bpf_set_link_xdp_fd(idx, prog_fd, 0); 36 + err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags); 35 37 if (err < 0) { 36 38 printf("ERROR: failed to attach program to %s\n", name); 37 39 return err; ··· 51 49 { 52 50 int err; 53 51 54 - err = bpf_set_link_xdp_fd(idx, -1, 0); 52 + err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); 55 53 if (err < 0) 56 54 printf("ERROR: failed to detach program from %s\n", name); 57 55 ··· 85 83 int attach = 1; 86 84 int ret = 0; 87 85 88 - while ((opt = getopt(argc, argv, ":dD")) != -1) { 86 + while ((opt = getopt(argc, argv, ":dDSF")) != -1) { 89 87 switch (opt) { 90 88 case 'd': 91 89 attach = 0; 90 + break; 91 + case 'S': 92 + xdp_flags |= XDP_FLAGS_SKB_MODE; 93 + break; 94 + case 'F': 95 + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 92 96 break; 93 97 case 'D': 94 98 prog_name = "xdp_fwd_direct"; ··· 104 96 return 1; 105 97 } 106 98 } 99 + 100 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 101 + xdp_flags |= XDP_FLAGS_DRV_MODE; 107 102 108 103 if (optind == argc) { 109 104 usage(basename(argv[0]));

+59 -4

samples/bpf/xdp_redirect_cpu_user.c

··· 16 16 #include <getopt.h> 17 17 #include <net/if.h> 18 18 #include <time.h> 19 + #include <linux/limits.h> 20 + 21 + #define __must_check 22 + #include <linux/err.h> 19 23 20 24 #include <arpa/inet.h> 21 25 #include <linux/if_link.h> ··· 49 45 static int cpus_count_map_fd; 50 46 static int cpus_iterator_map_fd; 51 47 static int exception_cnt_map_fd; 48 + 49 + #define NUM_TP 5 50 + struct bpf_link *tp_links[NUM_TP] = { 0 }; 51 + static int tp_cnt = 0; 52 52 53 53 /* Exit return codes */ 54 54 #define EXIT_OK 0 ··· 96 88 printf("program on interface changed, not removing\n"); 97 89 } 98 90 } 91 + /* Detach tracepoints */ 92 + while (tp_cnt) 93 + bpf_link__destroy(tp_links[--tp_cnt]); 94 + 99 95 exit(EXIT_OK); 100 96 } 101 97 ··· 600 588 free_stats_record(prev); 601 589 } 602 590 591 + static struct bpf_link * attach_tp(struct bpf_object *obj, 592 + const char *tp_category, 593 + const char* tp_name) 594 + { 595 + struct bpf_program *prog; 596 + struct bpf_link *link; 597 + char sec_name[PATH_MAX]; 598 + int len; 599 + 600 + len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s", 601 + tp_category, tp_name); 602 + if (len < 0) 603 + exit(EXIT_FAIL); 604 + 605 + prog = bpf_object__find_program_by_title(obj, sec_name); 606 + if (!prog) { 607 + fprintf(stderr, "ERR: finding progsec: %s\n", sec_name); 608 + exit(EXIT_FAIL_BPF); 609 + } 610 + 611 + link = bpf_program__attach_tracepoint(prog, tp_category, tp_name); 612 + if (IS_ERR(link)) 613 + exit(EXIT_FAIL_BPF); 614 + 615 + return link; 616 + } 617 + 618 + static void init_tracepoints(struct bpf_object *obj) { 619 + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err"); 620 + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err"); 621 + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception"); 622 + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue"); 623 + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread"); 624 + } 625 + 603 626 static int init_map_fds(struct bpf_object *obj) 604 627 { 605 - cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); 606 - rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); 628 + /* Maps updated by tracepoints */ 607 629 redirect_err_cnt_map_fd = 608 630 bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); 631 + exception_cnt_map_fd = 632 + bpf_object__find_map_fd_by_name(obj, "exception_cnt"); 609 633 cpumap_enqueue_cnt_map_fd = 610 634 bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); 611 635 cpumap_kthread_cnt_map_fd = 612 636 bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); 637 + 638 + /* Maps used by XDP */ 639 + rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); 640 + cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); 613 641 cpus_available_map_fd = 614 642 bpf_object__find_map_fd_by_name(obj, "cpus_available"); 615 643 cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); 616 644 cpus_iterator_map_fd = 617 645 bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); 618 - exception_cnt_map_fd = 619 - bpf_object__find_map_fd_by_name(obj, "exception_cnt"); 620 646 621 647 if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || 622 648 redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || ··· 712 662 strerror(errno)); 713 663 return EXIT_FAIL; 714 664 } 665 + init_tracepoints(obj); 715 666 if (init_map_fds(obj) < 0) { 716 667 fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); 717 668 return EXIT_FAIL; ··· 779 728 return EXIT_FAIL_OPTION; 780 729 } 781 730 } 731 + 732 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 733 + xdp_flags |= XDP_FLAGS_DRV_MODE; 734 + 782 735 /* Required option */ 783 736 if (ifindex == -1) { 784 737 fprintf(stderr, "ERR: required option --dev missing\n");

+4 -1

samples/bpf/xdp_redirect_map_user.c

··· 116 116 xdp_flags |= XDP_FLAGS_SKB_MODE; 117 117 break; 118 118 case 'N': 119 - xdp_flags |= XDP_FLAGS_DRV_MODE; 119 + /* default, set below */ 120 120 break; 121 121 case 'F': 122 122 xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; ··· 126 126 return 1; 127 127 } 128 128 } 129 + 130 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 131 + xdp_flags |= XDP_FLAGS_DRV_MODE; 129 132 130 133 if (optind == argc) { 131 134 printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);

+4 -1

samples/bpf/xdp_redirect_user.c

··· 117 117 xdp_flags |= XDP_FLAGS_SKB_MODE; 118 118 break; 119 119 case 'N': 120 - xdp_flags |= XDP_FLAGS_DRV_MODE; 120 + /* default, set below */ 121 121 break; 122 122 case 'F': 123 123 xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; ··· 127 127 return 1; 128 128 } 129 129 } 130 + 131 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 132 + xdp_flags |= XDP_FLAGS_DRV_MODE; 130 133 131 134 if (optind == argc) { 132 135 printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);

+3

samples/bpf/xdp_router_ipv4_user.c

··· 662 662 } 663 663 } 664 664 665 + if (!(flags & XDP_FLAGS_SKB_MODE)) 666 + flags |= XDP_FLAGS_DRV_MODE; 667 + 665 668 if (optind == ac) { 666 669 usage(basename(argv[0])); 667 670 return 1;

+4

samples/bpf/xdp_rxq_info_user.c

··· 551 551 return EXIT_FAIL_OPTION; 552 552 } 553 553 } 554 + 555 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 556 + xdp_flags |= XDP_FLAGS_DRV_MODE; 557 + 554 558 /* Required option */ 555 559 if (ifindex == -1) { 556 560 fprintf(stderr, "ERR: required option --dev missing\n");

+9 -3

samples/bpf/xdp_sample_pkts_user.c

··· 52 52 __u32 curr_prog_id = 0; 53 53 int err = 0; 54 54 55 - err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0); 55 + err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags); 56 56 if (err) { 57 57 printf("bpf_get_link_xdp_id failed\n"); 58 58 return err; 59 59 } 60 60 if (prog_id == curr_prog_id) { 61 - err = bpf_set_link_xdp_fd(idx, -1, 0); 61 + err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); 62 62 if (err < 0) 63 63 printf("ERROR: failed to detach prog from %s\n", name); 64 64 } else if (!curr_prog_id) { ··· 115 115 .prog_type = BPF_PROG_TYPE_XDP, 116 116 }; 117 117 struct perf_buffer_opts pb_opts = {}; 118 - const char *optstr = "F"; 118 + const char *optstr = "FS"; 119 119 int prog_fd, map_fd, opt; 120 120 struct bpf_object *obj; 121 121 struct bpf_map *map; ··· 127 127 case 'F': 128 128 xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 129 129 break; 130 + case 'S': 131 + xdp_flags |= XDP_FLAGS_SKB_MODE; 132 + break; 130 133 default: 131 134 usage(basename(argv[0])); 132 135 return 1; 133 136 } 134 137 } 138 + 139 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 140 + xdp_flags |= XDP_FLAGS_DRV_MODE; 135 141 136 142 if (optind == argc) { 137 143 usage(basename(argv[0]));

+4 -1

samples/bpf/xdp_tx_iptunnel_user.c

··· 231 231 xdp_flags |= XDP_FLAGS_SKB_MODE; 232 232 break; 233 233 case 'N': 234 - xdp_flags |= XDP_FLAGS_DRV_MODE; 234 + /* default, set below */ 235 235 break; 236 236 case 'F': 237 237 xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; ··· 242 242 } 243 243 opt_flags[opt] = 0; 244 244 } 245 + 246 + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 247 + xdp_flags |= XDP_FLAGS_DRV_MODE; 245 248 246 249 for (i = 0; i < strlen(optstr); i++) { 247 250 if (opt_flags[(unsigned int)optstr[i]]) {

+392 -41

samples/bpf/xdpsock_user.c

··· 10 10 #include <linux/if_link.h> 11 11 #include <linux/if_xdp.h> 12 12 #include <linux/if_ether.h> 13 + #include <linux/ip.h> 14 + #include <linux/udp.h> 15 + #include <arpa/inet.h> 13 16 #include <locale.h> 14 17 #include <net/ethernet.h> 15 18 #include <net/if.h> ··· 48 45 #endif 49 46 50 47 #define NUM_FRAMES (4 * 1024) 51 - #define BATCH_SIZE 64 48 + #define MIN_PKT_SIZE 64 52 49 53 50 #define DEBUG_HEXDUMP 0 54 51 55 52 typedef __u64 u64; 56 53 typedef __u32 u32; 54 + typedef __u16 u16; 55 + typedef __u8 u8; 57 56 58 57 static unsigned long prev_time; 59 58 ··· 70 65 static const char *opt_if = ""; 71 66 static int opt_ifindex; 72 67 static int opt_queue; 68 + static unsigned long opt_duration; 69 + static unsigned long start_time; 70 + static bool benchmark_done; 71 + static u32 opt_batch_size = 64; 72 + static int opt_pkt_count; 73 + static u16 opt_pkt_size = MIN_PKT_SIZE; 74 + static u32 opt_pkt_fill_pattern = 0x12345678; 73 75 static int opt_poll; 74 76 static int opt_interval = 1; 75 77 static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; ··· 179 167 } 180 168 } 181 169 170 + static bool is_benchmark_done(void) 171 + { 172 + if (opt_duration > 0) { 173 + unsigned long dt = (get_nsecs() - start_time); 174 + 175 + if (dt >= opt_duration) 176 + benchmark_done = true; 177 + } 178 + return benchmark_done; 179 + } 180 + 182 181 static void *poller(void *arg) 183 182 { 184 183 (void)arg; 185 - for (;;) { 184 + while (!is_benchmark_done()) { 186 185 sleep(opt_interval); 187 186 dump_stats(); 188 187 } ··· 219 196 220 197 static void int_exit(int sig) 221 198 { 199 + benchmark_done = true; 200 + } 201 + 202 + static void xdpsock_cleanup(void) 203 + { 222 204 struct xsk_umem *umem = xsks[0]->umem->umem; 223 205 int i; 224 206 ··· 232 204 xsk_socket__delete(xsks[i]->xsk); 233 205 (void)xsk_umem__delete(umem); 234 206 remove_xdp_program(); 235 - 236 - exit(EXIT_SUCCESS); 237 207 } 238 208 239 209 static void __exit_with_error(int error, const char *file, const char *func, ··· 246 220 247 221 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ 248 222 __LINE__) 249 - 250 - static const char pkt_data[] = 251 - "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00" 252 - "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14" 253 - "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b" 254 - "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa"; 255 - 256 223 static void swap_mac_addresses(void *data) 257 224 { 258 225 struct ether_header *eth = (struct ether_header *)data; ··· 293 274 printf("\n"); 294 275 } 295 276 296 - static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr) 277 + static void *memset32_htonl(void *dest, u32 val, u32 size) 278 + { 279 + u32 *ptr = (u32 *)dest; 280 + int i; 281 + 282 + val = htonl(val); 283 + 284 + for (i = 0; i < (size & (~0x3)); i += 4) 285 + ptr[i >> 2] = val; 286 + 287 + for (; i < size; i++) 288 + ((char *)dest)[i] = ((char *)&val)[i & 3]; 289 + 290 + return dest; 291 + } 292 + 293 + /* 294 + * This function code has been taken from 295 + * Linux kernel lib/checksum.c 296 + */ 297 + static inline unsigned short from32to16(unsigned int x) 298 + { 299 + /* add up 16-bit and 16-bit for 16+c bit */ 300 + x = (x & 0xffff) + (x >> 16); 301 + /* add up carry.. */ 302 + x = (x & 0xffff) + (x >> 16); 303 + return x; 304 + } 305 + 306 + /* 307 + * This function code has been taken from 308 + * Linux kernel lib/checksum.c 309 + */ 310 + static unsigned int do_csum(const unsigned char *buff, int len) 311 + { 312 + unsigned int result = 0; 313 + int odd; 314 + 315 + if (len <= 0) 316 + goto out; 317 + odd = 1 & (unsigned long)buff; 318 + if (odd) { 319 + #ifdef __LITTLE_ENDIAN 320 + result += (*buff << 8); 321 + #else 322 + result = *buff; 323 + #endif 324 + len--; 325 + buff++; 326 + } 327 + if (len >= 2) { 328 + if (2 & (unsigned long)buff) { 329 + result += *(unsigned short *)buff; 330 + len -= 2; 331 + buff += 2; 332 + } 333 + if (len >= 4) { 334 + const unsigned char *end = buff + 335 + ((unsigned int)len & ~3); 336 + unsigned int carry = 0; 337 + 338 + do { 339 + unsigned int w = *(unsigned int *)buff; 340 + 341 + buff += 4; 342 + result += carry; 343 + result += w; 344 + carry = (w > result); 345 + } while (buff < end); 346 + result += carry; 347 + result = (result & 0xffff) + (result >> 16); 348 + } 349 + if (len & 2) { 350 + result += *(unsigned short *)buff; 351 + buff += 2; 352 + } 353 + } 354 + if (len & 1) 355 + #ifdef __LITTLE_ENDIAN 356 + result += *buff; 357 + #else 358 + result += (*buff << 8); 359 + #endif 360 + result = from32to16(result); 361 + if (odd) 362 + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); 363 + out: 364 + return result; 365 + } 366 + 367 + __sum16 ip_fast_csum(const void *iph, unsigned int ihl); 368 + 369 + /* 370 + * This is a version of ip_compute_csum() optimized for IP headers, 371 + * which always checksum on 4 octet boundaries. 372 + * This function code has been taken from 373 + * Linux kernel lib/checksum.c 374 + */ 375 + __sum16 ip_fast_csum(const void *iph, unsigned int ihl) 376 + { 377 + return (__force __sum16)~do_csum(iph, ihl * 4); 378 + } 379 + 380 + /* 381 + * Fold a partial checksum 382 + * This function code has been taken from 383 + * Linux kernel include/asm-generic/checksum.h 384 + */ 385 + static inline __sum16 csum_fold(__wsum csum) 386 + { 387 + u32 sum = (__force u32)csum; 388 + 389 + sum = (sum & 0xffff) + (sum >> 16); 390 + sum = (sum & 0xffff) + (sum >> 16); 391 + return (__force __sum16)~sum; 392 + } 393 + 394 + /* 395 + * This function code has been taken from 396 + * Linux kernel lib/checksum.c 397 + */ 398 + static inline u32 from64to32(u64 x) 399 + { 400 + /* add up 32-bit and 32-bit for 32+c bit */ 401 + x = (x & 0xffffffff) + (x >> 32); 402 + /* add up carry.. */ 403 + x = (x & 0xffffffff) + (x >> 32); 404 + return (u32)x; 405 + } 406 + 407 + __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, 408 + __u32 len, __u8 proto, __wsum sum); 409 + 410 + /* 411 + * This function code has been taken from 412 + * Linux kernel lib/checksum.c 413 + */ 414 + __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, 415 + __u32 len, __u8 proto, __wsum sum) 416 + { 417 + unsigned long long s = (__force u32)sum; 418 + 419 + s += (__force u32)saddr; 420 + s += (__force u32)daddr; 421 + #ifdef __BIG_ENDIAN__ 422 + s += proto + len; 423 + #else 424 + s += (proto + len) << 8; 425 + #endif 426 + return (__force __wsum)from64to32(s); 427 + } 428 + 429 + /* 430 + * This function has been taken from 431 + * Linux kernel include/asm-generic/checksum.h 432 + */ 433 + static inline __sum16 434 + csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, 435 + __u8 proto, __wsum sum) 436 + { 437 + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); 438 + } 439 + 440 + static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, 441 + u8 proto, u16 *udp_pkt) 442 + { 443 + u32 csum = 0; 444 + u32 cnt = 0; 445 + 446 + /* udp hdr and data */ 447 + for (; cnt < len; cnt += 2) 448 + csum += udp_pkt[cnt >> 1]; 449 + 450 + return csum_tcpudp_magic(saddr, daddr, len, proto, csum); 451 + } 452 + 453 + #define ETH_FCS_SIZE 4 454 + 455 + #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ 456 + sizeof(struct udphdr)) 457 + 458 + #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) 459 + #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) 460 + #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) 461 + #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) 462 + 463 + static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; 464 + 465 + static void gen_eth_hdr_data(void) 466 + { 467 + struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + 468 + sizeof(struct ethhdr) + 469 + sizeof(struct iphdr)); 470 + struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + 471 + sizeof(struct ethhdr)); 472 + struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; 473 + 474 + /* ethernet header */ 475 + memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); 476 + memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); 477 + eth_hdr->h_proto = htons(ETH_P_IP); 478 + 479 + /* IP header */ 480 + ip_hdr->version = IPVERSION; 481 + ip_hdr->ihl = 0x5; /* 20 byte header */ 482 + ip_hdr->tos = 0x0; 483 + ip_hdr->tot_len = htons(IP_PKT_SIZE); 484 + ip_hdr->id = 0; 485 + ip_hdr->frag_off = 0; 486 + ip_hdr->ttl = IPDEFTTL; 487 + ip_hdr->protocol = IPPROTO_UDP; 488 + ip_hdr->saddr = htonl(0x0a0a0a10); 489 + ip_hdr->daddr = htonl(0x0a0a0a20); 490 + 491 + /* IP header checksum */ 492 + ip_hdr->check = 0; 493 + ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); 494 + 495 + /* UDP header */ 496 + udp_hdr->source = htons(0x1000); 497 + udp_hdr->dest = htons(0x1000); 498 + udp_hdr->len = htons(UDP_PKT_SIZE); 499 + 500 + /* UDP data */ 501 + memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, 502 + UDP_PKT_DATA_SIZE); 503 + 504 + /* UDP header checksum */ 505 + udp_hdr->check = 0; 506 + udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, 507 + IPPROTO_UDP, (u16 *)udp_hdr); 508 + } 509 + 510 + static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) 297 511 { 298 512 memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, 299 - sizeof(pkt_data) - 1); 300 - return sizeof(pkt_data) - 1; 513 + PKT_SIZE); 301 514 } 302 515 303 516 static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) ··· 626 375 {"unaligned", no_argument, 0, 'u'}, 627 376 {"shared-umem", no_argument, 0, 'M'}, 628 377 {"force", no_argument, 0, 'F'}, 378 + {"duration", required_argument, 0, 'd'}, 379 + {"batch-size", required_argument, 0, 'b'}, 380 + {"tx-pkt-count", required_argument, 0, 'C'}, 381 + {"tx-pkt-size", required_argument, 0, 's'}, 382 + {"tx-pkt-pattern", required_argument, 0, 'P'}, 629 383 {0, 0, 0, 0} 630 384 }; 631 385 ··· 655 399 " -u, --unaligned Enable unaligned chunk placement\n" 656 400 " -M, --shared-umem Enable XDP_SHARED_UMEM\n" 657 401 " -F, --force Force loading the XDP prog\n" 402 + " -d, --duration=n Duration in secs to run command.\n" 403 + " Default: forever.\n" 404 + " -b, --batch-size=n Batch size for sending or receiving\n" 405 + " packets. Default: %d\n" 406 + " -C, --tx-pkt-count=n Number of packets to send.\n" 407 + " Default: Continuous packets.\n" 408 + " -s, --tx-pkt-size=n Transmit packet size.\n" 409 + " (Default: %d bytes)\n" 410 + " Min size: %d, Max size %d.\n" 411 + " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" 658 412 "\n"; 659 - fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); 413 + fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, 414 + opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, 415 + XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); 416 + 660 417 exit(EXIT_FAILURE); 661 418 } 662 419 ··· 680 411 opterr = 0; 681 412 682 413 for (;;) { 683 - c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:muM", 414 + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:", 684 415 long_options, &option_index); 685 416 if (c == -1) 686 417 break; ··· 709 440 opt_xdp_bind_flags |= XDP_COPY; 710 441 break; 711 442 case 'N': 712 - opt_xdp_flags |= XDP_FLAGS_DRV_MODE; 443 + /* default, set below */ 713 444 break; 714 445 case 'n': 715 446 opt_interval = atoi(optarg); ··· 738 469 case 'M': 739 470 opt_num_xsks = MAX_SOCKS; 740 471 break; 472 + case 'd': 473 + opt_duration = atoi(optarg); 474 + opt_duration *= 1000000000; 475 + break; 476 + case 'b': 477 + opt_batch_size = atoi(optarg); 478 + break; 479 + case 'C': 480 + opt_pkt_count = atoi(optarg); 481 + break; 482 + case 's': 483 + opt_pkt_size = atoi(optarg); 484 + if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) || 485 + opt_pkt_size < MIN_PKT_SIZE) { 486 + fprintf(stderr, 487 + "ERROR: Invalid frame size %d\n", 488 + opt_pkt_size); 489 + usage(basename(argv[0])); 490 + } 491 + break; 492 + case 'P': 493 + opt_pkt_fill_pattern = strtol(optarg, NULL, 16); 494 + break; 741 495 default: 742 496 usage(basename(argv[0])); 743 497 } 744 498 } 499 + 500 + if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE)) 501 + opt_xdp_flags |= XDP_FLAGS_DRV_MODE; 745 502 746 503 opt_ifindex = if_nametoindex(opt_if); 747 504 if (!opt_ifindex) { ··· 808 513 if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) 809 514 kick_tx(xsk); 810 515 811 - ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : 516 + ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : 812 517 xsk->outstanding_tx; 813 518 814 519 /* re-add completed Tx buffers */ ··· 837 542 } 838 543 } 839 544 840 - static inline void complete_tx_only(struct xsk_socket_info *xsk) 545 + static inline void complete_tx_only(struct xsk_socket_info *xsk, 546 + int batch_size) 841 547 { 842 548 unsigned int rcvd; 843 549 u32 idx; ··· 849 553 if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) 850 554 kick_tx(xsk); 851 555 852 - rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx); 556 + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); 853 557 if (rcvd > 0) { 854 558 xsk_ring_cons__release(&xsk->umem->cq, rcvd); 855 559 xsk->outstanding_tx -= rcvd; ··· 863 567 u32 idx_rx = 0, idx_fq = 0; 864 568 int ret; 865 569 866 - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); 570 + rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); 867 571 if (!rcvd) { 868 572 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) 869 573 ret = poll(fds, num_socks, opt_timeout); ··· 915 619 916 620 for (i = 0; i < num_socks; i++) 917 621 rx_drop(xsks[i], fds); 622 + 623 + if (benchmark_done) 624 + break; 918 625 } 919 626 } 920 627 921 - static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb) 628 + static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size) 922 629 { 923 630 u32 idx; 631 + unsigned int i; 924 632 925 - if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) { 926 - unsigned int i; 927 - 928 - for (i = 0; i < BATCH_SIZE; i++) { 929 - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr = 930 - (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; 931 - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = 932 - sizeof(pkt_data) - 1; 933 - } 934 - 935 - xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE); 936 - xsk->outstanding_tx += BATCH_SIZE; 937 - frame_nb += BATCH_SIZE; 938 - frame_nb %= NUM_FRAMES; 633 + while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < 634 + batch_size) { 635 + complete_tx_only(xsk, batch_size); 939 636 } 940 637 941 - complete_tx_only(xsk); 638 + for (i = 0; i < batch_size; i++) { 639 + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, 640 + idx + i); 641 + tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; 642 + tx_desc->len = PKT_SIZE; 643 + } 644 + 645 + xsk_ring_prod__submit(&xsk->tx, batch_size); 646 + xsk->outstanding_tx += batch_size; 647 + frame_nb += batch_size; 648 + frame_nb %= NUM_FRAMES; 649 + complete_tx_only(xsk, batch_size); 650 + } 651 + 652 + static inline int get_batch_size(int pkt_cnt) 653 + { 654 + if (!opt_pkt_count) 655 + return opt_batch_size; 656 + 657 + if (pkt_cnt + opt_batch_size <= opt_pkt_count) 658 + return opt_batch_size; 659 + 660 + return opt_pkt_count - pkt_cnt; 661 + } 662 + 663 + static void complete_tx_only_all(void) 664 + { 665 + bool pending; 666 + int i; 667 + 668 + do { 669 + pending = false; 670 + for (i = 0; i < num_socks; i++) { 671 + if (xsks[i]->outstanding_tx) { 672 + complete_tx_only(xsks[i], opt_batch_size); 673 + pending = !!xsks[i]->outstanding_tx; 674 + } 675 + } 676 + } while (pending); 942 677 } 943 678 944 679 static void tx_only_all(void) 945 680 { 946 681 struct pollfd fds[MAX_SOCKS] = {}; 947 682 u32 frame_nb[MAX_SOCKS] = {}; 683 + int pkt_cnt = 0; 948 684 int i, ret; 949 685 950 686 for (i = 0; i < num_socks; i++) { ··· 984 656 fds[0].events = POLLOUT; 985 657 } 986 658 987 - for (;;) { 659 + while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { 660 + int batch_size = get_batch_size(pkt_cnt); 661 + 988 662 if (opt_poll) { 989 663 ret = poll(fds, num_socks, opt_timeout); 990 664 if (ret <= 0) ··· 997 667 } 998 668 999 669 for (i = 0; i < num_socks; i++) 1000 - tx_only(xsks[i], frame_nb[i]); 670 + tx_only(xsks[i], frame_nb[i], batch_size); 671 + 672 + pkt_cnt += batch_size; 673 + 674 + if (benchmark_done) 675 + break; 1001 676 } 677 + 678 + if (opt_pkt_count) 679 + complete_tx_only_all(); 1002 680 } 1003 681 1004 682 static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) ··· 1017 679 1018 680 complete_tx_l2fwd(xsk, fds); 1019 681 1020 - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); 682 + rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); 1021 683 if (!rcvd) { 1022 684 if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) 1023 685 ret = poll(fds, num_socks, opt_timeout); ··· 1074 736 1075 737 for (i = 0; i < num_socks; i++) 1076 738 l2fwd(xsks[i], fds); 739 + 740 + if (benchmark_done) 741 + break; 1077 742 } 1078 743 } 1079 744 ··· 1172 831 for (i = 0; i < opt_num_xsks; i++) 1173 832 xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); 1174 833 1175 - if (opt_bench == BENCH_TXONLY) 834 + if (opt_bench == BENCH_TXONLY) { 835 + gen_eth_hdr_data(); 836 + 1176 837 for (i = 0; i < NUM_FRAMES; i++) 1177 838 gen_eth_frame(umem, i * opt_xsk_frame_size); 839 + } 1178 840 1179 841 if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) 1180 842 enter_xsks_into_map(obj); ··· 1193 849 exit_with_error(ret); 1194 850 1195 851 prev_time = get_nsecs(); 852 + start_time = prev_time; 1196 853 1197 854 if (opt_bench == BENCH_RXDROP) 1198 855 rx_drop_all(); ··· 1201 856 tx_only_all(); 1202 857 else 1203 858 l2fwd_all(); 859 + 860 + benchmark_done = true; 861 + 862 + pthread_join(pt, NULL); 863 + 864 + xdpsock_cleanup(); 1204 865 1205 866 return 0; 1206 867 }

+305

tools/bpf/bpftool/Documentation/bpftool-gen.rst

··· 1 + ================ 2 + bpftool-gen 3 + ================ 4 + ------------------------------------------------------------------------------- 5 + tool for BPF code-generation 6 + ------------------------------------------------------------------------------- 7 + 8 + :Manual section: 8 9 + 10 + SYNOPSIS 11 + ======== 12 + 13 + **bpftool** [*OPTIONS*] **gen** *COMMAND* 14 + 15 + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } 16 + 17 + *COMMAND* := { **skeleton | **help** } 18 + 19 + GEN COMMANDS 20 + ============= 21 + 22 + | **bpftool** **gen skeleton** *FILE* 23 + | **bpftool** **gen help** 24 + 25 + DESCRIPTION 26 + =========== 27 + **bpftool gen skeleton** *FILE* 28 + Generate BPF skeleton C header file for a given *FILE*. 29 + 30 + BPF skeleton is an alternative interface to existing libbpf 31 + APIs for working with BPF objects. Skeleton code is intended 32 + to significantly shorten and simplify code to load and work 33 + with BPF programs from userspace side. Generated code is 34 + tailored to specific input BPF object *FILE*, reflecting its 35 + structure by listing out available maps, program, variables, 36 + etc. Skeleton eliminates the need to lookup mentioned 37 + components by name. Instead, if skeleton instantiation 38 + succeeds, they are populated in skeleton structure as valid 39 + libbpf types (e.g., struct bpf_map pointer) and can be 40 + passed to existing generic libbpf APIs. 41 + 42 + In addition to simple and reliable access to maps and 43 + programs, skeleton provides a storage for BPF links (struct 44 + bpf_link) for each BPF program within BPF object. When 45 + requested, supported BPF programs will be automatically 46 + attached and resulting BPF links stored for further use by 47 + user in pre-allocated fields in skeleton struct. For BPF 48 + programs that can't be automatically attached by libbpf, 49 + user can attach them manually, but store resulting BPF link 50 + in per-program link field. All such set up links will be 51 + automatically destroyed on BPF skeleton destruction. This 52 + eliminates the need for users to manage links manually and 53 + rely on libbpf support to detach programs and free up 54 + resources. 55 + 56 + Another facility provided by BPF skeleton is an interface to 57 + global variables of all supported kinds: mutable, read-only, 58 + as well as extern ones. This interface allows to pre-setup 59 + initial values of variables before BPF object is loaded and 60 + verified by kernel. For non-read-only variables, the same 61 + interface can be used to fetch values of global variables on 62 + userspace side, even if they are modified by BPF code. 63 + 64 + During skeleton generation, contents of source BPF object 65 + *FILE* is embedded within generated code and is thus not 66 + necessary to keep around. This ensures skeleton and BPF 67 + object file are matching 1-to-1 and always stay in sync. 68 + Generated code is dual-licensed under LGPL-2.1 and 69 + BSD-2-Clause licenses. 70 + 71 + It is a design goal and guarantee that skeleton interfaces 72 + are interoperable with generic libbpf APIs. User should 73 + always be able to use skeleton API to create and load BPF 74 + object, and later use libbpf APIs to keep working with 75 + specific maps, programs, etc. 76 + 77 + As part of skeleton, few custom functions are generated. 78 + Each of them is prefixed with object name, derived from 79 + object file name. I.e., if BPF object file name is 80 + **example.o**, BPF object name will be **example**. The 81 + following custom functions are provided in such case: 82 + 83 + - **example__open** and **example__open_opts**. 84 + These functions are used to instantiate skeleton. It 85 + corresponds to libbpf's **bpf_object__open()** API. 86 + **_opts** variants accepts extra **bpf_object_open_opts** 87 + options. 88 + 89 + - **example__load**. 90 + This function creates maps, loads and verifies BPF 91 + programs, initializes global data maps. It corresponds to 92 + libppf's **bpf_object__load** API. 93 + 94 + - **example__open_and_load** combines **example__open** and 95 + **example__load** invocations in one commonly used 96 + operation. 97 + 98 + - **example__attach** and **example__detach** 99 + This pair of functions allow to attach and detach, 100 + correspondingly, already loaded BPF object. Only BPF 101 + programs of types supported by libbpf for auto-attachment 102 + will be auto-attached and their corresponding BPF links 103 + instantiated. For other BPF programs, user can manually 104 + create a BPF link and assign it to corresponding fields in 105 + skeleton struct. **example__detach** will detach both 106 + links created automatically, as well as those populated by 107 + user manually. 108 + 109 + - **example__destroy** 110 + Detach and unload BPF programs, free up all the resources 111 + used by skeleton and BPF object. 112 + 113 + If BPF object has global variables, corresponding structs 114 + with memory layout corresponding to global data data section 115 + layout will be created. Currently supported ones are: *.data*, 116 + *.bss*, *.rodata*, and *.kconfig* structs/data sections. 117 + These data sections/structs can be used to set up initial 118 + values of variables, if set before **example__load**. 119 + Afterwards, if target kernel supports memory-mapped BPF 120 + arrays, same structs can be used to fetch and update 121 + (non-read-only) data from userspace, with same simplicity 122 + as for BPF side. 123 + 124 + **bpftool gen help** 125 + Print short help message. 126 + 127 + OPTIONS 128 + ======= 129 + -h, --help 130 + Print short generic help message (similar to **bpftool help**). 131 + 132 + -V, --version 133 + Print version number (similar to **bpftool version**). 134 + 135 + -j, --json 136 + Generate JSON output. For commands that cannot produce JSON, 137 + this option has no effect. 138 + 139 + -p, --pretty 140 + Generate human-readable JSON output. Implies **-j**. 141 + 142 + -d, --debug 143 + Print all logs available from libbpf, including debug-level 144 + information. 145 + 146 + EXAMPLES 147 + ======== 148 + **$ cat example.c** 149 + :: 150 + 151 + #include <stdbool.h> 152 + #include <linux/ptrace.h> 153 + #include <linux/bpf.h> 154 + #include "bpf_helpers.h" 155 + 156 + const volatile int param1 = 42; 157 + bool global_flag = true; 158 + struct { int x; } data = {}; 159 + 160 + struct { 161 + __uint(type, BPF_MAP_TYPE_HASH); 162 + __uint(max_entries, 128); 163 + __type(key, int); 164 + __type(value, long); 165 + } my_map SEC(".maps"); 166 + 167 + SEC("raw_tp/sys_enter") 168 + int handle_sys_enter(struct pt_regs *ctx) 169 + { 170 + static long my_static_var; 171 + if (global_flag) 172 + my_static_var++; 173 + else 174 + data.x += param1; 175 + return 0; 176 + } 177 + 178 + SEC("raw_tp/sys_exit") 179 + int handle_sys_exit(struct pt_regs *ctx) 180 + { 181 + int zero = 0; 182 + bpf_map_lookup_elem(&my_map, &zero); 183 + return 0; 184 + } 185 + 186 + This is example BPF application with two BPF programs and a mix of BPF maps 187 + and global variables. 188 + 189 + **$ bpftool gen skeleton example.o** 190 + :: 191 + 192 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 193 + 194 + /* THIS FILE IS AUTOGENERATED! */ 195 + #ifndef __EXAMPLE_SKEL_H__ 196 + #define __EXAMPLE_SKEL_H__ 197 + 198 + #include <stdlib.h> 199 + #include <libbpf.h> 200 + 201 + struct example { 202 + struct bpf_object_skeleton *skeleton; 203 + struct bpf_object *obj; 204 + struct { 205 + struct bpf_map *rodata; 206 + struct bpf_map *data; 207 + struct bpf_map *bss; 208 + struct bpf_map *my_map; 209 + } maps; 210 + struct { 211 + struct bpf_program *handle_sys_enter; 212 + struct bpf_program *handle_sys_exit; 213 + } progs; 214 + struct { 215 + struct bpf_link *handle_sys_enter; 216 + struct bpf_link *handle_sys_exit; 217 + } links; 218 + struct example__bss { 219 + struct { 220 + int x; 221 + } data; 222 + } *bss; 223 + struct example__data { 224 + _Bool global_flag; 225 + long int handle_sys_enter_my_static_var; 226 + } *data; 227 + struct example__rodata { 228 + int param1; 229 + } *rodata; 230 + }; 231 + 232 + static void example__destroy(struct example *obj); 233 + static inline struct example *example__open_opts( 234 + const struct bpf_object_open_opts *opts); 235 + static inline struct example *example__open(); 236 + static inline int example__load(struct example *obj); 237 + static inline struct example *example__open_and_load(); 238 + static inline int example__attach(struct example *obj); 239 + static inline void example__detach(struct example *obj); 240 + 241 + #endif /* __EXAMPLE_SKEL_H__ */ 242 + 243 + **$ cat example_user.c** 244 + :: 245 + 246 + #include "example.skel.h" 247 + 248 + int main() 249 + { 250 + struct example *skel; 251 + int err = 0; 252 + 253 + skel = example__open(); 254 + if (!skel) 255 + goto cleanup; 256 + 257 + skel->rodata->param1 = 128; 258 + 259 + err = example__load(skel); 260 + if (err) 261 + goto cleanup; 262 + 263 + err = example__attach(skel); 264 + if (err) 265 + goto cleanup; 266 + 267 + /* all libbpf APIs are usable */ 268 + printf("my_map name: %s\n", bpf_map__name(skel->maps.my_map)); 269 + printf("sys_enter prog FD: %d\n", 270 + bpf_program__fd(skel->progs.handle_sys_enter)); 271 + 272 + /* detach and re-attach sys_exit program */ 273 + bpf_link__destroy(skel->links.handle_sys_exit); 274 + skel->links.handle_sys_exit = 275 + bpf_program__attach(skel->progs.handle_sys_exit); 276 + 277 + printf("my_static_var: %ld\n", 278 + skel->bss->handle_sys_enter_my_static_var); 279 + 280 + cleanup: 281 + example__destroy(skel); 282 + return err; 283 + } 284 + 285 + **# ./example_user** 286 + :: 287 + 288 + my_map name: my_map 289 + sys_enter prog FD: 8 290 + my_static_var: 7 291 + 292 + This is a stripped-out version of skeleton generated for above example code. 293 + 294 + SEE ALSO 295 + ======== 296 + **bpf**\ (2), 297 + **bpf-helpers**\ (7), 298 + **bpftool**\ (8), 299 + **bpftool-map**\ (8), 300 + **bpftool-prog**\ (8), 301 + **bpftool-cgroup**\ (8), 302 + **bpftool-feature**\ (8), 303 + **bpftool-net**\ (8), 304 + **bpftool-perf**\ (8), 305 + **bpftool-btf**\ (8)

+7 -5

tools/bpf/bpftool/Documentation/bpftool-map.rst

··· 39 39 | **bpftool** **map freeze** *MAP* 40 40 | **bpftool** **map help** 41 41 | 42 - | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } 42 + | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* } 43 43 | *DATA* := { [**hex**] *BYTES* } 44 - | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } 44 + | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } 45 45 | *VALUE* := { *DATA* | *MAP* | *PROG* } 46 46 | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } 47 47 | *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** ··· 55 55 =========== 56 56 **bpftool map { show | list }** [*MAP*] 57 57 Show information about loaded maps. If *MAP* is specified 58 - show information only about given map, otherwise list all 59 - maps currently loaded on the system. 58 + show information only about given maps, otherwise list all 59 + maps currently loaded on the system. In case of **name**, 60 + *MAP* may match several maps which will all be shown. 60 61 61 62 Output will start with map ID followed by map type and 62 63 zero or more named attributes (depending on kernel version). ··· 67 66 as *FILE*. 68 67 69 68 **bpftool map dump** *MAP* 70 - Dump all entries in a given *MAP*. 69 + Dump all entries in a given *MAP*. In case of **name**, 70 + *MAP* may match several maps which will all be dumped. 71 71 72 72 **bpftool map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] 73 73 Update map entry for a given *KEY*.

+13 -5

tools/bpf/bpftool/Documentation/bpftool-prog.rst

··· 33 33 | **bpftool** **prog help** 34 34 | 35 35 | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } 36 - | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } 36 + | *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } 37 37 | *TYPE* := { 38 38 | **socket** | **kprobe** | **kretprobe** | **classifier** | **action** | 39 39 | **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** | ··· 53 53 =========== 54 54 **bpftool prog { show | list }** [*PROG*] 55 55 Show information about loaded programs. If *PROG* is 56 - specified show information only about given program, otherwise 57 - list all programs currently loaded on the system. 56 + specified show information only about given programs, 57 + otherwise list all programs currently loaded on the system. 58 + In case of **tag** or **name**, *PROG* may match several 59 + programs which will all be shown. 58 60 59 61 Output will start with program ID followed by program type and 60 62 zero or more named attributes (depending on kernel version). ··· 70 68 performed via the **kernel.bpf_stats_enabled** sysctl knob. 71 69 72 70 **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] 73 - Dump eBPF instructions of the program from the kernel. By 71 + Dump eBPF instructions of the programs from the kernel. By 74 72 default, eBPF will be disassembled and printed to standard 75 73 output in human-readable format. In this case, **opcodes** 76 74 controls if raw opcodes should be printed as well. 75 + 76 + In case of **tag** or **name**, *PROG* may match several 77 + programs which will all be dumped. However, if **file** or 78 + **visual** is specified, *PROG* must match a single program. 77 79 78 80 If **file** is specified, the binary image will instead be 79 81 written to *FILE*. ··· 86 80 built instead, and eBPF instructions will be presented with 87 81 CFG in DOT format, on standard output. 88 82 89 - If the prog has line_info available, the source line will 83 + If the programs have line_info available, the source line will 90 84 be displayed by default. If **linum** is specified, 91 85 the filename, line number and line column will also be 92 86 displayed on top of the source line. 93 87 94 88 **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] 95 89 Dump jited image (host machine code) of the program. 90 + 96 91 If *FILE* is specified image will be written to a file, 97 92 otherwise it will be disassembled and printed to stdout. 93 + *PROG* must match a single program when **file** is specified. 98 94 99 95 **opcodes** controls if raw opcodes will be printed. 100 96

+2 -1

tools/bpf/bpftool/Documentation/bpftool.rst

··· 81 81 **bpftool-feature**\ (8), 82 82 **bpftool-net**\ (8), 83 83 **bpftool-perf**\ (8), 84 - **bpftool-btf**\ (8) 84 + **bpftool-btf**\ (8), 85 + **bpftool-gen**\ (8),

+145 -11

tools/bpf/bpftool/bash-completion/bpftool

··· 59 59 command sed -n 's/.*"id": $.*$,$/\1/p' )" -- "$cur" ) ) 60 60 } 61 61 62 + _bpftool_get_map_names() 63 + { 64 + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ 65 + command sed -n 's/.*"name": $.*$,$/\1/p' )" -- "$cur" ) ) 66 + } 67 + 68 + # Takes map type and adds matching map names to the list of suggestions. 69 + _bpftool_get_map_names_for_type() 70 + { 71 + local type="$1" 72 + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ 73 + command grep -C2 "$type" | \ 74 + command sed -n 's/.*"name": $.*$,$/\1/p' )" -- "$cur" ) ) 75 + } 76 + 62 77 _bpftool_get_prog_ids() 63 78 { 64 79 COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ ··· 84 69 { 85 70 COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ 86 71 command sed -n 's/.*"tag": "$.*$",$/\1/p' )" -- "$cur" ) ) 72 + } 73 + 74 + _bpftool_get_prog_names() 75 + { 76 + COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ 77 + command sed -n 's/.*"name": "$.*$",$/\1/p' )" -- "$cur" ) ) 87 78 } 88 79 89 80 _bpftool_get_btf_ids() ··· 201 180 esac 202 181 } 203 182 183 + _bpftool_map_update_get_name() 184 + { 185 + local command="$1" 186 + 187 + # Is it the map to update, or a map to insert into the map to update? 188 + # Search for "value" keyword. 189 + local idx value 190 + for (( idx=7; idx < ${#words[@]}-1; idx++ )); do 191 + if [[ ${words[idx]} == "value" ]]; then 192 + value=1 193 + break 194 + fi 195 + done 196 + if [[ $value -eq 0 ]]; then 197 + case "$command" in 198 + push) 199 + _bpftool_get_map_names_for_type stack 200 + ;; 201 + enqueue) 202 + _bpftool_get_map_names_for_type queue 203 + ;; 204 + *) 205 + _bpftool_get_map_names 206 + ;; 207 + esac 208 + return 0 209 + fi 210 + 211 + # Name to complete is for a value. It can be either prog name or map name. This 212 + # depends on the type of the map to update. 213 + local type=$(_bpftool_map_guess_map_type) 214 + case $type in 215 + array_of_maps|hash_of_maps) 216 + _bpftool_get_map_names 217 + return 0 218 + ;; 219 + prog_array) 220 + _bpftool_get_prog_names 221 + return 0 222 + ;; 223 + *) 224 + return 0 225 + ;; 226 + esac 227 + } 228 + 204 229 _bpftool() 205 230 { 206 231 local cur prev words objword ··· 318 251 # Completion depends on object and command in use 319 252 case $object in 320 253 prog) 321 - # Complete id, only for subcommands that use prog (but no map) ids 254 + # Complete id and name, only for subcommands that use prog (but no 255 + # map) ids/names. 322 256 case $command in 323 257 show|list|dump|pin) 324 258 case $prev in ··· 327 259 _bpftool_get_prog_ids 328 260 return 0 329 261 ;; 262 + name) 263 + _bpftool_get_prog_names 264 + return 0 265 + ;; 330 266 esac 331 267 ;; 332 268 esac 333 269 334 - local PROG_TYPE='id pinned tag' 335 - local MAP_TYPE='id pinned' 270 + local PROG_TYPE='id pinned tag name' 271 + local MAP_TYPE='id pinned name' 336 272 case $command in 337 273 show|list) 338 274 [[ $prev != "$command" ]] && return 0 ··· 387 315 id) 388 316 _bpftool_get_prog_ids 389 317 ;; 318 + name) 319 + _bpftool_get_map_names 320 + ;; 390 321 pinned) 391 322 _filedir 392 323 ;; ··· 409 334 case $prev in 410 335 id) 411 336 _bpftool_get_map_ids 337 + ;; 338 + name) 339 + _bpftool_get_map_names 412 340 ;; 413 341 pinned) 414 342 _filedir ··· 477 399 _bpftool_get_map_ids 478 400 return 0 479 401 ;; 402 + name) 403 + _bpftool_get_map_names 404 + return 0 405 + ;; 480 406 pinned|pinmaps) 481 407 _filedir 482 408 return 0 ··· 529 447 esac 530 448 ;; 531 449 map) 532 - local MAP_TYPE='id pinned' 450 + local MAP_TYPE='id pinned name' 533 451 case $command in 534 452 show|list|dump|peek|pop|dequeue|freeze) 535 453 case $prev in ··· 551 469 ;; 552 470 *) 553 471 _bpftool_get_map_ids 472 + ;; 473 + esac 474 + return 0 475 + ;; 476 + name) 477 + case "$command" in 478 + peek) 479 + _bpftool_get_map_names_for_type stack 480 + _bpftool_get_map_names_for_type queue 481 + ;; 482 + pop) 483 + _bpftool_get_map_names_for_type stack 484 + ;; 485 + dequeue) 486 + _bpftool_get_map_names_for_type queue 487 + ;; 488 + *) 489 + _bpftool_get_map_names 554 490 ;; 555 491 esac 556 492 return 0 ··· 620 520 _bpftool_get_map_ids 621 521 return 0 622 522 ;; 523 + name) 524 + _bpftool_get_map_names 525 + return 0 526 + ;; 623 527 key) 624 528 COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) 625 529 ;; ··· 649 545 _bpftool_map_update_get_id $command 650 546 return 0 651 547 ;; 548 + name) 549 + _bpftool_map_update_get_name $command 550 + return 0 551 + ;; 652 552 key) 653 553 COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) 654 554 ;; ··· 661 553 # map, depending on the type of the map to update. 662 554 case "$(_bpftool_map_guess_map_type)" in 663 555 array_of_maps|hash_of_maps) 664 - local MAP_TYPE='id pinned' 556 + local MAP_TYPE='id pinned name' 665 557 COMPREPLY+=( $( compgen -W "$MAP_TYPE" \ 666 558 -- "$cur" ) ) 667 559 return 0 668 560 ;; 669 561 prog_array) 670 - local PROG_TYPE='id pinned tag' 562 + local PROG_TYPE='id pinned tag name' 671 563 COMPREPLY+=( $( compgen -W "$PROG_TYPE" \ 672 564 -- "$cur" ) ) 673 565 return 0 ··· 729 621 _bpftool_get_map_ids_for_type perf_event_array 730 622 return 0 731 623 ;; 624 + name) 625 + _bpftool_get_map_names_for_type perf_event_array 626 + return 0 627 + ;; 732 628 cpu) 733 629 return 0 734 630 ;; ··· 756 644 esac 757 645 ;; 758 646 btf) 759 - local PROG_TYPE='id pinned tag' 760 - local MAP_TYPE='id pinned' 647 + local PROG_TYPE='id pinned tag name' 648 + local MAP_TYPE='id pinned name' 761 649 case $command in 762 650 dump) 763 651 case $prev in ··· 784 672 ;; 785 673 $command) 786 674 _bpftool_get_btf_ids 675 + ;; 676 + esac 677 + return 0 678 + ;; 679 + name) 680 + case $pprev in 681 + prog) 682 + _bpftool_get_prog_names 683 + ;; 684 + map) 685 + _bpftool_get_map_names 787 686 ;; 788 687 esac 789 688 return 0 ··· 839 716 ;; 840 717 esac 841 718 ;; 719 + gen) 720 + case $command in 721 + skeleton) 722 + _filedir 723 + ;; 724 + *) 725 + [[ $prev == $object ]] && \ 726 + COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) ) 727 + ;; 728 + esac 729 + ;; 842 730 cgroup) 843 731 case $command in 844 732 show|list|tree) ··· 869 735 connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \ 870 736 getsockopt setsockopt' 871 737 local ATTACH_FLAGS='multi override' 872 - local PROG_TYPE='id pinned tag' 738 + local PROG_TYPE='id pinned tag name' 873 739 case $prev in 874 740 $command) 875 741 _filedir ··· 894 760 elif [[ "$command" == "attach" ]]; then 895 761 # We have an attach type on the command line, 896 762 # but it is not the previous word, or 897 - # "id|pinned|tag" (we already checked for 763 + # "id|pinned|tag|name" (we already checked for 898 764 # that). This should only leave the case when 899 765 # we need attach flags for "attach" commamnd. 900 766 _bpftool_one_of_list "$ATTACH_FLAGS" ··· 920 786 esac 921 787 ;; 922 788 net) 923 - local PROG_TYPE='id pinned tag' 789 + local PROG_TYPE='id pinned tag name' 924 790 local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload' 925 791 case $command in 926 792 show|list)

+39 -17

tools/bpf/bpftool/cgroup.c

··· 117 117 return prog_cnt; 118 118 } 119 119 120 + static int cgroup_has_attached_progs(int cgroup_fd) 121 + { 122 + enum bpf_attach_type type; 123 + bool no_prog = true; 124 + 125 + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { 126 + int count = count_attached_bpf_progs(cgroup_fd, type); 127 + 128 + if (count < 0 && errno != EINVAL) 129 + return -1; 130 + 131 + if (count > 0) { 132 + no_prog = false; 133 + break; 134 + } 135 + } 136 + 137 + return no_prog ? 0 : 1; 138 + } 120 139 static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, 121 140 int level) 122 141 { ··· 180 161 static int do_show(int argc, char **argv) 181 162 { 182 163 enum bpf_attach_type type; 164 + int has_attached_progs; 183 165 const char *path; 184 166 int cgroup_fd; 185 167 int ret = -1; ··· 212 192 goto exit; 213 193 } 214 194 195 + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); 196 + if (has_attached_progs < 0) { 197 + p_err("can't query bpf programs attached to %s: %s", 198 + path, strerror(errno)); 199 + goto exit_cgroup; 200 + } else if (!has_attached_progs) { 201 + ret = 0; 202 + goto exit_cgroup; 203 + } 204 + 215 205 if (json_output) 216 206 jsonw_start_array(json_wtr); 217 207 else ··· 242 212 if (json_output) 243 213 jsonw_end_array(json_wtr); 244 214 215 + exit_cgroup: 245 216 close(cgroup_fd); 246 217 exit: 247 218 return ret; ··· 259 228 int typeflag, struct FTW *ftw) 260 229 { 261 230 enum bpf_attach_type type; 262 - bool skip = true; 231 + int has_attached_progs; 263 232 int cgroup_fd; 264 233 265 234 if (typeflag != FTW_D) ··· 271 240 return SHOW_TREE_FN_ERR; 272 241 } 273 242 274 - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { 275 - int count = count_attached_bpf_progs(cgroup_fd, type); 276 - 277 - if (count < 0 && errno != EINVAL) { 278 - p_err("can't query bpf programs attached to %s: %s", 279 - fpath, strerror(errno)); 280 - close(cgroup_fd); 281 - return SHOW_TREE_FN_ERR; 282 - } 283 - if (count > 0) { 284 - skip = false; 285 - break; 286 - } 287 - } 288 - 289 - if (skip) { 243 + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); 244 + if (has_attached_progs < 0) { 245 + p_err("can't query bpf programs attached to %s: %s", 246 + fpath, strerror(errno)); 247 + close(cgroup_fd); 248 + return SHOW_TREE_FN_ERR; 249 + } else if (!has_attached_progs) { 290 250 close(cgroup_fd); 291 251 return 0; 292 252 }

+609

tools/bpf/bpftool/gen.c

··· 1 + // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 2 + /* Copyright (C) 2019 Facebook */ 3 + 4 + #ifndef _GNU_SOURCE 5 + #define _GNU_SOURCE 6 + #endif 7 + #include <ctype.h> 8 + #include <errno.h> 9 + #include <fcntl.h> 10 + #include <linux/err.h> 11 + #include <stdbool.h> 12 + #include <stdio.h> 13 + #include <string.h> 14 + #include <unistd.h> 15 + #include <bpf.h> 16 + #include <libbpf.h> 17 + #include <sys/types.h> 18 + #include <sys/stat.h> 19 + #include <sys/mman.h> 20 + #include <unistd.h> 21 + 22 + #include "btf.h" 23 + #include "libbpf_internal.h" 24 + #include "json_writer.h" 25 + #include "main.h" 26 + 27 + 28 + #define MAX_OBJ_NAME_LEN 64 29 + 30 + static void sanitize_identifier(char *name) 31 + { 32 + int i; 33 + 34 + for (i = 0; name[i]; i++) 35 + if (!isalnum(name[i]) && name[i] != '_') 36 + name[i] = '_'; 37 + } 38 + 39 + static bool str_has_suffix(const char *str, const char *suffix) 40 + { 41 + size_t i, n1 = strlen(str), n2 = strlen(suffix); 42 + 43 + if (n1 < n2) 44 + return false; 45 + 46 + for (i = 0; i < n2; i++) { 47 + if (str[n1 - i - 1] != suffix[n2 - i - 1]) 48 + return false; 49 + } 50 + 51 + return true; 52 + } 53 + 54 + static void get_obj_name(char *name, const char *file) 55 + { 56 + /* Using basename() GNU version which doesn't modify arg. */ 57 + strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1); 58 + name[MAX_OBJ_NAME_LEN - 1] = '\0'; 59 + if (str_has_suffix(name, ".o")) 60 + name[strlen(name) - 2] = '\0'; 61 + sanitize_identifier(name); 62 + } 63 + 64 + static void get_header_guard(char *guard, const char *obj_name) 65 + { 66 + int i; 67 + 68 + sprintf(guard, "__%s_SKEL_H__", obj_name); 69 + for (i = 0; guard[i]; i++) 70 + guard[i] = toupper(guard[i]); 71 + } 72 + 73 + static const char *get_map_ident(const struct bpf_map *map) 74 + { 75 + const char *name = bpf_map__name(map); 76 + 77 + if (!bpf_map__is_internal(map)) 78 + return name; 79 + 80 + if (str_has_suffix(name, ".data")) 81 + return "data"; 82 + else if (str_has_suffix(name, ".rodata")) 83 + return "rodata"; 84 + else if (str_has_suffix(name, ".bss")) 85 + return "bss"; 86 + else if (str_has_suffix(name, ".kconfig")) 87 + return "kconfig"; 88 + else 89 + return NULL; 90 + } 91 + 92 + static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args) 93 + { 94 + vprintf(fmt, args); 95 + } 96 + 97 + static int codegen_datasec_def(struct bpf_object *obj, 98 + struct btf *btf, 99 + struct btf_dump *d, 100 + const struct btf_type *sec, 101 + const char *obj_name) 102 + { 103 + const char *sec_name = btf__name_by_offset(btf, sec->name_off); 104 + const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec); 105 + int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec); 106 + const char *sec_ident; 107 + char var_ident[256]; 108 + 109 + if (strcmp(sec_name, ".data") == 0) 110 + sec_ident = "data"; 111 + else if (strcmp(sec_name, ".bss") == 0) 112 + sec_ident = "bss"; 113 + else if (strcmp(sec_name, ".rodata") == 0) 114 + sec_ident = "rodata"; 115 + else if (strcmp(sec_name, ".kconfig") == 0) 116 + sec_ident = "kconfig"; 117 + else 118 + return 0; 119 + 120 + printf(" struct %s__%s {\n", obj_name, sec_ident); 121 + for (i = 0; i < vlen; i++, sec_var++) { 122 + const struct btf_type *var = btf__type_by_id(btf, sec_var->type); 123 + const char *var_name = btf__name_by_offset(btf, var->name_off); 124 + DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, 125 + .field_name = var_ident, 126 + .indent_level = 2, 127 + ); 128 + int need_off = sec_var->offset, align_off, align; 129 + __u32 var_type_id = var->type; 130 + const struct btf_type *t; 131 + 132 + t = btf__type_by_id(btf, var_type_id); 133 + while (btf_is_mod(t)) { 134 + var_type_id = t->type; 135 + t = btf__type_by_id(btf, var_type_id); 136 + } 137 + 138 + if (off > need_off) { 139 + p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n", 140 + sec_name, i, need_off, off); 141 + return -EINVAL; 142 + } 143 + 144 + align = btf__align_of(btf, var->type); 145 + if (align <= 0) { 146 + p_err("Failed to determine alignment of variable '%s': %d", 147 + var_name, align); 148 + return -EINVAL; 149 + } 150 + 151 + align_off = (off + align - 1) / align * align; 152 + if (align_off != need_off) { 153 + printf("\t\tchar __pad%d[%d];\n", 154 + pad_cnt, need_off - off); 155 + pad_cnt++; 156 + } 157 + 158 + /* sanitize variable name, e.g., for static vars inside 159 + * a function, it's name is '<function name>.<variable name>', 160 + * which we'll turn into a '<function name>_<variable name>' 161 + */ 162 + var_ident[0] = '\0'; 163 + strncat(var_ident, var_name, sizeof(var_ident) - 1); 164 + sanitize_identifier(var_ident); 165 + 166 + printf("\t\t"); 167 + err = btf_dump__emit_type_decl(d, var_type_id, &opts); 168 + if (err) 169 + return err; 170 + printf(";\n"); 171 + 172 + off = sec_var->offset + sec_var->size; 173 + } 174 + printf(" } *%s;\n", sec_ident); 175 + return 0; 176 + } 177 + 178 + static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) 179 + { 180 + struct btf *btf = bpf_object__btf(obj); 181 + int n = btf__get_nr_types(btf); 182 + struct btf_dump *d; 183 + int i, err = 0; 184 + 185 + d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); 186 + if (IS_ERR(d)) 187 + return PTR_ERR(d); 188 + 189 + for (i = 1; i <= n; i++) { 190 + const struct btf_type *t = btf__type_by_id(btf, i); 191 + 192 + if (!btf_is_datasec(t)) 193 + continue; 194 + 195 + err = codegen_datasec_def(obj, btf, d, t, obj_name); 196 + if (err) 197 + goto out; 198 + } 199 + out: 200 + btf_dump__free(d); 201 + return err; 202 + } 203 + 204 + static int codegen(const char *template, ...) 205 + { 206 + const char *src, *end; 207 + int skip_tabs = 0, n; 208 + char *s, *dst; 209 + va_list args; 210 + char c; 211 + 212 + n = strlen(template); 213 + s = malloc(n + 1); 214 + if (!s) 215 + return -ENOMEM; 216 + src = template; 217 + dst = s; 218 + 219 + /* find out "baseline" indentation to skip */ 220 + while ((c = *src++)) { 221 + if (c == '\t') { 222 + skip_tabs++; 223 + } else if (c == '\n') { 224 + break; 225 + } else { 226 + p_err("unrecognized character at pos %td in template '%s'", 227 + src - template - 1, template); 228 + return -EINVAL; 229 + } 230 + } 231 + 232 + while (*src) { 233 + /* skip baseline indentation tabs */ 234 + for (n = skip_tabs; n > 0; n--, src++) { 235 + if (*src != '\t') { 236 + p_err("not enough tabs at pos %td in template '%s'", 237 + src - template - 1, template); 238 + return -EINVAL; 239 + } 240 + } 241 + /* trim trailing whitespace */ 242 + end = strchrnul(src, '\n'); 243 + for (n = end - src; n > 0 && isspace(src[n - 1]); n--) 244 + ; 245 + memcpy(dst, src, n); 246 + dst += n; 247 + if (*end) 248 + *dst++ = '\n'; 249 + src = *end ? end + 1 : end; 250 + } 251 + *dst++ = '\0'; 252 + 253 + /* print out using adjusted template */ 254 + va_start(args, template); 255 + n = vprintf(s, args); 256 + va_end(args); 257 + 258 + free(s); 259 + return n; 260 + } 261 + 262 + static int do_skeleton(int argc, char **argv) 263 + { 264 + char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; 265 + size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz; 266 + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); 267 + char obj_name[MAX_OBJ_NAME_LEN], *obj_data; 268 + struct bpf_object *obj = NULL; 269 + const char *file, *ident; 270 + struct bpf_program *prog; 271 + int fd, len, err = -1; 272 + struct bpf_map *map; 273 + struct btf *btf; 274 + struct stat st; 275 + 276 + if (!REQ_ARGS(1)) { 277 + usage(); 278 + return -1; 279 + } 280 + file = GET_ARG(); 281 + 282 + if (argc) { 283 + p_err("extra unknown arguments"); 284 + return -1; 285 + } 286 + 287 + if (stat(file, &st)) { 288 + p_err("failed to stat() %s: %s", file, strerror(errno)); 289 + return -1; 290 + } 291 + file_sz = st.st_size; 292 + mmap_sz = roundup(file_sz, sysconf(_SC_PAGE_SIZE)); 293 + fd = open(file, O_RDONLY); 294 + if (fd < 0) { 295 + p_err("failed to open() %s: %s", file, strerror(errno)); 296 + return -1; 297 + } 298 + obj_data = mmap(NULL, mmap_sz, PROT_READ, MAP_PRIVATE, fd, 0); 299 + if (obj_data == MAP_FAILED) { 300 + obj_data = NULL; 301 + p_err("failed to mmap() %s: %s", file, strerror(errno)); 302 + goto out; 303 + } 304 + get_obj_name(obj_name, file); 305 + opts.object_name = obj_name; 306 + obj = bpf_object__open_mem(obj_data, file_sz, &opts); 307 + if (IS_ERR(obj)) { 308 + obj = NULL; 309 + p_err("failed to open BPF object file: %ld", PTR_ERR(obj)); 310 + goto out; 311 + } 312 + 313 + bpf_object__for_each_map(map, obj) { 314 + ident = get_map_ident(map); 315 + if (!ident) { 316 + p_err("ignoring unrecognized internal map '%s'...", 317 + bpf_map__name(map)); 318 + continue; 319 + } 320 + map_cnt++; 321 + } 322 + bpf_object__for_each_program(prog, obj) { 323 + prog_cnt++; 324 + } 325 + 326 + get_header_guard(header_guard, obj_name); 327 + codegen("\ 328 + \n\ 329 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ 330 + \n\ 331 + /* THIS FILE IS AUTOGENERATED! */ \n\ 332 + #ifndef %2$s \n\ 333 + #define %2$s \n\ 334 + \n\ 335 + #include <stdlib.h> \n\ 336 + #include <libbpf.h> \n\ 337 + \n\ 338 + struct %1$s { \n\ 339 + struct bpf_object_skeleton *skeleton; \n\ 340 + struct bpf_object *obj; \n\ 341 + ", 342 + obj_name, header_guard 343 + ); 344 + 345 + if (map_cnt) { 346 + printf("\tstruct {\n"); 347 + bpf_object__for_each_map(map, obj) { 348 + ident = get_map_ident(map); 349 + if (!ident) 350 + continue; 351 + printf("\t\tstruct bpf_map *%s;\n", ident); 352 + } 353 + printf("\t} maps;\n"); 354 + } 355 + 356 + if (prog_cnt) { 357 + printf("\tstruct {\n"); 358 + bpf_object__for_each_program(prog, obj) { 359 + printf("\t\tstruct bpf_program *%s;\n", 360 + bpf_program__name(prog)); 361 + } 362 + printf("\t} progs;\n"); 363 + printf("\tstruct {\n"); 364 + bpf_object__for_each_program(prog, obj) { 365 + printf("\t\tstruct bpf_link *%s;\n", 366 + bpf_program__name(prog)); 367 + } 368 + printf("\t} links;\n"); 369 + } 370 + 371 + btf = bpf_object__btf(obj); 372 + if (btf) { 373 + err = codegen_datasecs(obj, obj_name); 374 + if (err) 375 + goto out; 376 + } 377 + 378 + codegen("\ 379 + \n\ 380 + }; \n\ 381 + \n\ 382 + static void \n\ 383 + %1$s__destroy(struct %1$s *obj) \n\ 384 + { \n\ 385 + if (!obj) \n\ 386 + return; \n\ 387 + if (obj->skeleton) \n\ 388 + bpf_object__destroy_skeleton(obj->skeleton);\n\ 389 + free(obj); \n\ 390 + } \n\ 391 + \n\ 392 + static inline int \n\ 393 + %1$s__create_skeleton(struct %1$s *obj); \n\ 394 + \n\ 395 + static inline struct %1$s * \n\ 396 + %1$s__open_opts(const struct bpf_object_open_opts *opts) \n\ 397 + { \n\ 398 + struct %1$s *obj; \n\ 399 + \n\ 400 + obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\ 401 + if (!obj) \n\ 402 + return NULL; \n\ 403 + if (%1$s__create_skeleton(obj)) \n\ 404 + goto err; \n\ 405 + if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\ 406 + goto err; \n\ 407 + \n\ 408 + return obj; \n\ 409 + err: \n\ 410 + %1$s__destroy(obj); \n\ 411 + return NULL; \n\ 412 + } \n\ 413 + \n\ 414 + static inline struct %1$s * \n\ 415 + %1$s__open(void) \n\ 416 + { \n\ 417 + return %1$s__open_opts(NULL); \n\ 418 + } \n\ 419 + \n\ 420 + static inline int \n\ 421 + %1$s__load(struct %1$s *obj) \n\ 422 + { \n\ 423 + return bpf_object__load_skeleton(obj->skeleton); \n\ 424 + } \n\ 425 + \n\ 426 + static inline struct %1$s * \n\ 427 + %1$s__open_and_load(void) \n\ 428 + { \n\ 429 + struct %1$s *obj; \n\ 430 + \n\ 431 + obj = %1$s__open(); \n\ 432 + if (!obj) \n\ 433 + return NULL; \n\ 434 + if (%1$s__load(obj)) { \n\ 435 + %1$s__destroy(obj); \n\ 436 + return NULL; \n\ 437 + } \n\ 438 + return obj; \n\ 439 + } \n\ 440 + \n\ 441 + static inline int \n\ 442 + %1$s__attach(struct %1$s *obj) \n\ 443 + { \n\ 444 + return bpf_object__attach_skeleton(obj->skeleton); \n\ 445 + } \n\ 446 + \n\ 447 + static inline void \n\ 448 + %1$s__detach(struct %1$s *obj) \n\ 449 + { \n\ 450 + return bpf_object__detach_skeleton(obj->skeleton); \n\ 451 + } \n\ 452 + ", 453 + obj_name 454 + ); 455 + 456 + codegen("\ 457 + \n\ 458 + \n\ 459 + static inline int \n\ 460 + %1$s__create_skeleton(struct %1$s *obj) \n\ 461 + { \n\ 462 + struct bpf_object_skeleton *s; \n\ 463 + \n\ 464 + s = (typeof(s))calloc(1, sizeof(*s)); \n\ 465 + if (!s) \n\ 466 + return -1; \n\ 467 + obj->skeleton = s; \n\ 468 + \n\ 469 + s->sz = sizeof(*s); \n\ 470 + s->name = \"%1$s\"; \n\ 471 + s->obj = &obj->obj; \n\ 472 + ", 473 + obj_name 474 + ); 475 + if (map_cnt) { 476 + codegen("\ 477 + \n\ 478 + \n\ 479 + /* maps */ \n\ 480 + s->map_cnt = %zu; \n\ 481 + s->map_skel_sz = sizeof(*s->maps); \n\ 482 + s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\ 483 + if (!s->maps) \n\ 484 + goto err; \n\ 485 + ", 486 + map_cnt 487 + ); 488 + i = 0; 489 + bpf_object__for_each_map(map, obj) { 490 + ident = get_map_ident(map); 491 + 492 + if (!ident) 493 + continue; 494 + 495 + codegen("\ 496 + \n\ 497 + \n\ 498 + s->maps[%zu].name = \"%s\"; \n\ 499 + s->maps[%zu].map = &obj->maps.%s; \n\ 500 + ", 501 + i, bpf_map__name(map), i, ident); 502 + /* memory-mapped internal maps */ 503 + if (bpf_map__is_internal(map) && 504 + (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) { 505 + printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", 506 + i, ident); 507 + } 508 + i++; 509 + } 510 + } 511 + if (prog_cnt) { 512 + codegen("\ 513 + \n\ 514 + \n\ 515 + /* programs */ \n\ 516 + s->prog_cnt = %zu; \n\ 517 + s->prog_skel_sz = sizeof(*s->progs); \n\ 518 + s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\ 519 + if (!s->progs) \n\ 520 + goto err; \n\ 521 + ", 522 + prog_cnt 523 + ); 524 + i = 0; 525 + bpf_object__for_each_program(prog, obj) { 526 + codegen("\ 527 + \n\ 528 + \n\ 529 + s->progs[%1$zu].name = \"%2$s\"; \n\ 530 + s->progs[%1$zu].prog = &obj->progs.%2$s;\n\ 531 + s->progs[%1$zu].link = &obj->links.%2$s;\n\ 532 + ", 533 + i, bpf_program__name(prog)); 534 + i++; 535 + } 536 + } 537 + codegen("\ 538 + \n\ 539 + \n\ 540 + s->data_sz = %d; \n\ 541 + s->data = (void *)\"\\ \n\ 542 + ", 543 + file_sz); 544 + 545 + /* embed contents of BPF object file */ 546 + for (i = 0, len = 0; i < file_sz; i++) { 547 + int w = obj_data[i] ? 4 : 2; 548 + 549 + len += w; 550 + if (len > 78) { 551 + printf("\\\n"); 552 + len = w; 553 + } 554 + if (!obj_data[i]) 555 + printf("\\0"); 556 + else 557 + printf("\\x%02x", (unsigned char)obj_data[i]); 558 + } 559 + 560 + codegen("\ 561 + \n\ 562 + \"; \n\ 563 + \n\ 564 + return 0; \n\ 565 + err: \n\ 566 + bpf_object__destroy_skeleton(s); \n\ 567 + return -1; \n\ 568 + } \n\ 569 + \n\ 570 + #endif /* %s */ \n\ 571 + ", 572 + header_guard); 573 + err = 0; 574 + out: 575 + bpf_object__close(obj); 576 + if (obj_data) 577 + munmap(obj_data, mmap_sz); 578 + close(fd); 579 + return err; 580 + } 581 + 582 + static int do_help(int argc, char **argv) 583 + { 584 + if (json_output) { 585 + jsonw_null(json_wtr); 586 + return 0; 587 + } 588 + 589 + fprintf(stderr, 590 + "Usage: %1$s gen skeleton FILE\n" 591 + " %1$s gen help\n" 592 + "\n" 593 + " " HELP_SPEC_OPTIONS "\n" 594 + "", 595 + bin_name); 596 + 597 + return 0; 598 + } 599 + 600 + static const struct cmd cmds[] = { 601 + { "skeleton", do_skeleton }, 602 + { "help", do_help }, 603 + { 0 } 604 + }; 605 + 606 + int do_gen(int argc, char **argv) 607 + { 608 + return cmd_select(cmds, argc, argv, do_help); 609 + }

+2 -1

tools/bpf/bpftool/main.c

··· 58 58 " %s batch file FILE\n" 59 59 " %s version\n" 60 60 "\n" 61 - " OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n" 61 + " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n" 62 62 " " HELP_SPEC_OPTIONS "\n" 63 63 "", 64 64 bin_name, bin_name, bin_name); ··· 227 227 { "net", do_net }, 228 228 { "feature", do_feature }, 229 229 { "btf", do_btf }, 230 + { "gen", do_gen }, 230 231 { "version", do_version }, 231 232 { 0 } 232 233 };

+3 -2

tools/bpf/bpftool/main.h

··· 42 42 #define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" 43 43 44 44 #define HELP_SPEC_PROGRAM \ 45 - "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" 45 + "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" 46 46 #define HELP_SPEC_OPTIONS \ 47 47 "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ 48 48 "\t {-m|--mapcompat} | {-n|--nomount} }" 49 49 #define HELP_SPEC_MAP \ 50 - "MAP := { id MAP_ID | pinned FILE }" 50 + "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" 51 51 52 52 static const char * const prog_type_name[] = { 53 53 [BPF_PROG_TYPE_UNSPEC] = "unspec", ··· 155 155 int do_tracelog(int argc, char **arg); 156 156 int do_feature(int argc, char **argv); 157 157 int do_btf(int argc, char **argv); 158 + int do_gen(int argc, char **argv); 158 159 159 160 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); 160 161 int prog_parse_fd(int *argc, char ***argv);

+310 -74

tools/bpf/bpftool/map.c

··· 91 91 return malloc(info->value_size); 92 92 } 93 93 94 - int map_parse_fd(int *argc, char ***argv) 94 + static int map_fd_by_name(char *name, int **fds) 95 95 { 96 - int fd; 96 + unsigned int id = 0; 97 + int fd, nb_fds = 0; 98 + void *tmp; 99 + int err; 97 100 101 + while (true) { 102 + struct bpf_map_info info = {}; 103 + __u32 len = sizeof(info); 104 + 105 + err = bpf_map_get_next_id(id, &id); 106 + if (err) { 107 + if (errno != ENOENT) { 108 + p_err("%s", strerror(errno)); 109 + goto err_close_fds; 110 + } 111 + return nb_fds; 112 + } 113 + 114 + fd = bpf_map_get_fd_by_id(id); 115 + if (fd < 0) { 116 + p_err("can't get map by id (%u): %s", 117 + id, strerror(errno)); 118 + goto err_close_fds; 119 + } 120 + 121 + err = bpf_obj_get_info_by_fd(fd, &info, &len); 122 + if (err) { 123 + p_err("can't get map info (%u): %s", 124 + id, strerror(errno)); 125 + goto err_close_fd; 126 + } 127 + 128 + if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { 129 + close(fd); 130 + continue; 131 + } 132 + 133 + if (nb_fds > 0) { 134 + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); 135 + if (!tmp) { 136 + p_err("failed to realloc"); 137 + goto err_close_fd; 138 + } 139 + *fds = tmp; 140 + } 141 + (*fds)[nb_fds++] = fd; 142 + } 143 + 144 + err_close_fd: 145 + close(fd); 146 + err_close_fds: 147 + while (--nb_fds >= 0) 148 + close((*fds)[nb_fds]); 149 + return -1; 150 + } 151 + 152 + static int map_parse_fds(int *argc, char ***argv, int **fds) 153 + { 98 154 if (is_prefix(**argv, "id")) { 99 155 unsigned int id; 100 156 char *endptr; ··· 164 108 } 165 109 NEXT_ARGP(); 166 110 167 - fd = bpf_map_get_fd_by_id(id); 168 - if (fd < 0) 111 + (*fds)[0] = bpf_map_get_fd_by_id(id); 112 + if ((*fds)[0] < 0) { 169 113 p_err("get map by id (%u): %s", id, strerror(errno)); 170 - return fd; 114 + return -1; 115 + } 116 + return 1; 117 + } else if (is_prefix(**argv, "name")) { 118 + char *name; 119 + 120 + NEXT_ARGP(); 121 + 122 + name = **argv; 123 + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { 124 + p_err("can't parse name"); 125 + return -1; 126 + } 127 + NEXT_ARGP(); 128 + 129 + return map_fd_by_name(name, fds); 171 130 } else if (is_prefix(**argv, "pinned")) { 172 131 char *path; 173 132 ··· 191 120 path = **argv; 192 121 NEXT_ARGP(); 193 122 194 - return open_obj_pinned_any(path, BPF_OBJ_MAP); 123 + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); 124 + if ((*fds)[0] < 0) 125 + return -1; 126 + return 1; 195 127 } 196 128 197 - p_err("expected 'id' or 'pinned', got: '%s'?", **argv); 129 + p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); 198 130 return -1; 131 + } 132 + 133 + int map_parse_fd(int *argc, char ***argv) 134 + { 135 + int *fds = NULL; 136 + int nb_fds, fd; 137 + 138 + fds = malloc(sizeof(int)); 139 + if (!fds) { 140 + p_err("mem alloc failed"); 141 + return -1; 142 + } 143 + nb_fds = map_parse_fds(argc, argv, &fds); 144 + if (nb_fds != 1) { 145 + if (nb_fds > 1) { 146 + p_err("several maps match this handle"); 147 + while (nb_fds--) 148 + close(fds[nb_fds]); 149 + } 150 + fd = -1; 151 + goto exit_free; 152 + } 153 + 154 + fd = fds[0]; 155 + exit_free: 156 + free(fds); 157 + return fd; 199 158 } 200 159 201 160 int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) ··· 580 479 return -1; 581 480 } 582 481 482 + static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr) 483 + { 484 + jsonw_uint_field(wtr, "id", info->id); 485 + if (info->type < ARRAY_SIZE(map_type_name)) 486 + jsonw_string_field(wtr, "type", map_type_name[info->type]); 487 + else 488 + jsonw_uint_field(wtr, "type", info->type); 489 + 490 + if (*info->name) 491 + jsonw_string_field(wtr, "name", info->name); 492 + 493 + jsonw_name(wtr, "flags"); 494 + jsonw_printf(wtr, "%d", info->map_flags); 495 + } 496 + 583 497 static int show_map_close_json(int fd, struct bpf_map_info *info) 584 498 { 585 499 char *memlock, *frozen_str; ··· 605 489 606 490 jsonw_start_object(json_wtr); 607 491 608 - jsonw_uint_field(json_wtr, "id", info->id); 609 - if (info->type < ARRAY_SIZE(map_type_name)) 610 - jsonw_string_field(json_wtr, "type", 611 - map_type_name[info->type]); 612 - else 613 - jsonw_uint_field(json_wtr, "type", info->type); 614 - 615 - if (*info->name) 616 - jsonw_string_field(json_wtr, "name", info->name); 617 - 618 - jsonw_name(json_wtr, "flags"); 619 - jsonw_printf(json_wtr, "%d", info->map_flags); 492 + show_map_header_json(info, json_wtr); 620 493 621 494 print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); 622 495 ··· 666 561 return 0; 667 562 } 668 563 669 - static int show_map_close_plain(int fd, struct bpf_map_info *info) 564 + static void show_map_header_plain(struct bpf_map_info *info) 670 565 { 671 - char *memlock, *frozen_str; 672 - int frozen = 0; 673 - 674 - memlock = get_fdinfo(fd, "memlock"); 675 - frozen_str = get_fdinfo(fd, "frozen"); 676 - 677 566 printf("%u: ", info->id); 678 567 if (info->type < ARRAY_SIZE(map_type_name)) 679 568 printf("%s ", map_type_name[info->type]); ··· 680 581 printf("flags 0x%x", info->map_flags); 681 582 print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); 682 583 printf("\n"); 584 + } 585 + 586 + static int show_map_close_plain(int fd, struct bpf_map_info *info) 587 + { 588 + char *memlock, *frozen_str; 589 + int frozen = 0; 590 + 591 + memlock = get_fdinfo(fd, "memlock"); 592 + frozen_str = get_fdinfo(fd, "frozen"); 593 + 594 + show_map_header_plain(info); 683 595 printf("\tkey %uB value %uB max_entries %u", 684 596 info->key_size, info->value_size, info->max_entries); 685 597 ··· 752 642 return 0; 753 643 } 754 644 645 + static int do_show_subset(int argc, char **argv) 646 + { 647 + struct bpf_map_info info = {}; 648 + __u32 len = sizeof(info); 649 + int *fds = NULL; 650 + int nb_fds, i; 651 + int err = -1; 652 + 653 + fds = malloc(sizeof(int)); 654 + if (!fds) { 655 + p_err("mem alloc failed"); 656 + return -1; 657 + } 658 + nb_fds = map_parse_fds(&argc, &argv, &fds); 659 + if (nb_fds < 1) 660 + goto exit_free; 661 + 662 + if (json_output && nb_fds > 1) 663 + jsonw_start_array(json_wtr); /* root array */ 664 + for (i = 0; i < nb_fds; i++) { 665 + err = bpf_obj_get_info_by_fd(fds[i], &info, &len); 666 + if (err) { 667 + p_err("can't get map info: %s", 668 + strerror(errno)); 669 + for (; i < nb_fds; i++) 670 + close(fds[i]); 671 + break; 672 + } 673 + 674 + if (json_output) 675 + show_map_close_json(fds[i], &info); 676 + else 677 + show_map_close_plain(fds[i], &info); 678 + 679 + close(fds[i]); 680 + } 681 + if (json_output && nb_fds > 1) 682 + jsonw_end_array(json_wtr); /* root array */ 683 + 684 + exit_free: 685 + free(fds); 686 + return err; 687 + } 688 + 755 689 static int do_show(int argc, char **argv) 756 690 { 757 691 struct bpf_map_info info = {}; ··· 807 653 if (show_pinned) 808 654 build_pinned_obj_table(&map_table, BPF_OBJ_MAP); 809 655 810 - if (argc == 2) { 811 - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); 812 - if (fd < 0) 813 - return -1; 814 - 815 - if (json_output) 816 - return show_map_close_json(fd, &info); 817 - else 818 - return show_map_close_plain(fd, &info); 819 - } 656 + if (argc == 2) 657 + return do_show_subset(argc, argv); 820 658 821 659 if (argc) 822 660 return BAD_ARG(); ··· 911 765 return 0; 912 766 } 913 767 914 - static int do_dump(int argc, char **argv) 768 + static int maps_have_btf(int *fds, int nb_fds) 915 769 { 916 770 struct bpf_map_info info = {}; 771 + __u32 len = sizeof(info); 772 + struct btf *btf = NULL; 773 + int err, i; 774 + 775 + for (i = 0; i < nb_fds; i++) { 776 + err = bpf_obj_get_info_by_fd(fds[i], &info, &len); 777 + if (err) { 778 + p_err("can't get map info: %s", strerror(errno)); 779 + goto err_close; 780 + } 781 + 782 + err = btf__get_from_id(info.btf_id, &btf); 783 + if (err) { 784 + p_err("failed to get btf"); 785 + goto err_close; 786 + } 787 + 788 + if (!btf) 789 + return 0; 790 + } 791 + 792 + return 1; 793 + 794 + err_close: 795 + for (; i < nb_fds; i++) 796 + close(fds[i]); 797 + return -1; 798 + } 799 + 800 + static int 801 + map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, 802 + bool enable_btf, bool show_header) 803 + { 917 804 void *key, *value, *prev_key; 918 805 unsigned int num_elems = 0; 919 - __u32 len = sizeof(info); 920 - json_writer_t *btf_wtr; 921 806 struct btf *btf = NULL; 922 807 int err; 923 - int fd; 924 808 925 - if (argc != 2) 926 - usage(); 927 - 928 - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); 929 - if (fd < 0) 930 - return -1; 931 - 932 - key = malloc(info.key_size); 933 - value = alloc_value(&info); 809 + key = malloc(info->key_size); 810 + value = alloc_value(info); 934 811 if (!key || !value) { 935 812 p_err("mem alloc failed"); 936 813 err = -1; ··· 962 793 963 794 prev_key = NULL; 964 795 965 - err = btf__get_from_id(info.btf_id, &btf); 966 - if (err) { 967 - p_err("failed to get btf"); 968 - goto exit_free; 796 + if (enable_btf) { 797 + err = btf__get_from_id(info->btf_id, &btf); 798 + if (err || !btf) { 799 + /* enable_btf is true only if we've already checked 800 + * that all maps have BTF information. 801 + */ 802 + p_err("failed to get btf"); 803 + goto exit_free; 804 + } 969 805 } 970 806 971 - if (json_output) 972 - jsonw_start_array(json_wtr); 973 - else 974 - if (btf) { 975 - btf_wtr = get_btf_writer(); 976 - if (!btf_wtr) { 977 - p_info("failed to create json writer for btf. falling back to plain output"); 978 - btf__free(btf); 979 - btf = NULL; 980 - } else { 981 - jsonw_start_array(btf_wtr); 982 - } 807 + if (wtr) { 808 + if (show_header) { 809 + jsonw_start_object(wtr); /* map object */ 810 + show_map_header_json(info, wtr); 811 + jsonw_name(wtr, "elements"); 983 812 } 813 + jsonw_start_array(wtr); /* elements */ 814 + } else if (show_header) { 815 + show_map_header_plain(info); 816 + } 984 817 985 - if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && 986 - info.value_size != 8) 818 + if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && 819 + info->value_size != 8) 987 820 p_info("Warning: cannot read values from %s map with value_size != 8", 988 - map_type_name[info.type]); 821 + map_type_name[info->type]); 989 822 while (true) { 990 823 err = bpf_map_get_next_key(fd, prev_key, key); 991 824 if (err) { ··· 995 824 err = 0; 996 825 break; 997 826 } 998 - num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr); 827 + num_elems += dump_map_elem(fd, key, value, info, btf, wtr); 999 828 prev_key = key; 1000 829 } 1001 830 1002 - if (json_output) 1003 - jsonw_end_array(json_wtr); 1004 - else if (btf) { 1005 - jsonw_end_array(btf_wtr); 1006 - jsonw_destroy(&btf_wtr); 831 + if (wtr) { 832 + jsonw_end_array(wtr); /* elements */ 833 + if (show_header) 834 + jsonw_end_object(wtr); /* map object */ 1007 835 } else { 1008 836 printf("Found %u element%s\n", num_elems, 1009 837 num_elems != 1 ? "s" : ""); ··· 1014 844 close(fd); 1015 845 btf__free(btf); 1016 846 847 + return err; 848 + } 849 + 850 + static int do_dump(int argc, char **argv) 851 + { 852 + json_writer_t *wtr = NULL, *btf_wtr = NULL; 853 + struct bpf_map_info info = {}; 854 + int nb_fds, i = 0, btf = 0; 855 + __u32 len = sizeof(info); 856 + int *fds = NULL; 857 + int err = -1; 858 + 859 + if (argc != 2) 860 + usage(); 861 + 862 + fds = malloc(sizeof(int)); 863 + if (!fds) { 864 + p_err("mem alloc failed"); 865 + return -1; 866 + } 867 + nb_fds = map_parse_fds(&argc, &argv, &fds); 868 + if (nb_fds < 1) 869 + goto exit_free; 870 + 871 + if (json_output) { 872 + wtr = json_wtr; 873 + } else { 874 + btf = maps_have_btf(fds, nb_fds); 875 + if (btf < 0) 876 + goto exit_close; 877 + if (btf) { 878 + btf_wtr = get_btf_writer(); 879 + if (btf_wtr) { 880 + wtr = btf_wtr; 881 + } else { 882 + p_info("failed to create json writer for btf. falling back to plain output"); 883 + btf = 0; 884 + } 885 + } 886 + } 887 + 888 + if (wtr && nb_fds > 1) 889 + jsonw_start_array(wtr); /* root array */ 890 + for (i = 0; i < nb_fds; i++) { 891 + if (bpf_obj_get_info_by_fd(fds[i], &info, &len)) { 892 + p_err("can't get map info: %s", strerror(errno)); 893 + break; 894 + } 895 + err = map_dump(fds[i], &info, wtr, btf, nb_fds > 1); 896 + if (!wtr && i != nb_fds - 1) 897 + printf("\n"); 898 + 899 + if (err) 900 + break; 901 + close(fds[i]); 902 + } 903 + if (wtr && nb_fds > 1) 904 + jsonw_end_array(wtr); /* root array */ 905 + 906 + if (btf) 907 + jsonw_destroy(&btf_wtr); 908 + exit_close: 909 + for (; i < nb_fds; i++) 910 + close(fds[i]); 911 + exit_free: 912 + free(fds); 1017 913 return err; 1018 914 } 1019 915

+1

tools/bpf/bpftool/net.c

··· 18 18 19 19 #include <bpf.h> 20 20 #include <nlattr.h> 21 + #include "libbpf_internal.h" 21 22 #include "main.h" 22 23 #include "netlink_dumper.h" 23 24

+271 -117

tools/bpf/bpftool/prog.c

··· 25 25 #include "main.h" 26 26 #include "xlated_dumper.h" 27 27 28 + enum dump_mode { 29 + DUMP_JITED, 30 + DUMP_XLATED, 31 + }; 32 + 28 33 static const char * const attach_type_strings[] = { 29 34 [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", 30 35 [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", ··· 82 77 strftime(buf, size, "%FT%T%z", &load_tm); 83 78 } 84 79 85 - static int prog_fd_by_tag(unsigned char *tag) 80 + static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) 86 81 { 87 82 unsigned int id = 0; 83 + int fd, nb_fds = 0; 84 + void *tmp; 88 85 int err; 89 - int fd; 90 86 91 87 while (true) { 92 88 struct bpf_prog_info info = {}; ··· 95 89 96 90 err = bpf_prog_get_next_id(id, &id); 97 91 if (err) { 98 - p_err("%s", strerror(errno)); 99 - return -1; 92 + if (errno != ENOENT) { 93 + p_err("%s", strerror(errno)); 94 + goto err_close_fds; 95 + } 96 + return nb_fds; 100 97 } 101 98 102 99 fd = bpf_prog_get_fd_by_id(id); 103 100 if (fd < 0) { 104 101 p_err("can't get prog by id (%u): %s", 105 102 id, strerror(errno)); 106 - return -1; 103 + goto err_close_fds; 107 104 } 108 105 109 106 err = bpf_obj_get_info_by_fd(fd, &info, &len); 110 107 if (err) { 111 108 p_err("can't get prog info (%u): %s", 112 109 id, strerror(errno)); 113 - close(fd); 114 - return -1; 110 + goto err_close_fd; 115 111 } 116 112 117 - if (!memcmp(tag, info.tag, BPF_TAG_SIZE)) 118 - return fd; 113 + if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || 114 + (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { 115 + close(fd); 116 + continue; 117 + } 119 118 120 - close(fd); 119 + if (nb_fds > 0) { 120 + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); 121 + if (!tmp) { 122 + p_err("failed to realloc"); 123 + goto err_close_fd; 124 + } 125 + *fds = tmp; 126 + } 127 + (*fds)[nb_fds++] = fd; 121 128 } 129 + 130 + err_close_fd: 131 + close(fd); 132 + err_close_fds: 133 + while (--nb_fds >= 0) 134 + close((*fds)[nb_fds]); 135 + return -1; 122 136 } 123 137 124 - int prog_parse_fd(int *argc, char ***argv) 138 + static int prog_parse_fds(int *argc, char ***argv, int **fds) 125 139 { 126 - int fd; 127 - 128 140 if (is_prefix(**argv, "id")) { 129 141 unsigned int id; 130 142 char *endptr; ··· 156 132 } 157 133 NEXT_ARGP(); 158 134 159 - fd = bpf_prog_get_fd_by_id(id); 160 - if (fd < 0) 135 + (*fds)[0] = bpf_prog_get_fd_by_id(id); 136 + if ((*fds)[0] < 0) { 161 137 p_err("get by id (%u): %s", id, strerror(errno)); 162 - return fd; 138 + return -1; 139 + } 140 + return 1; 163 141 } else if (is_prefix(**argv, "tag")) { 164 142 unsigned char tag[BPF_TAG_SIZE]; 165 143 ··· 175 149 } 176 150 NEXT_ARGP(); 177 151 178 - return prog_fd_by_tag(tag); 152 + return prog_fd_by_nametag(tag, fds, true); 153 + } else if (is_prefix(**argv, "name")) { 154 + char *name; 155 + 156 + NEXT_ARGP(); 157 + 158 + name = **argv; 159 + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { 160 + p_err("can't parse name"); 161 + return -1; 162 + } 163 + NEXT_ARGP(); 164 + 165 + return prog_fd_by_nametag(name, fds, false); 179 166 } else if (is_prefix(**argv, "pinned")) { 180 167 char *path; 181 168 ··· 197 158 path = **argv; 198 159 NEXT_ARGP(); 199 160 200 - return open_obj_pinned_any(path, BPF_OBJ_PROG); 161 + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); 162 + if ((*fds)[0] < 0) 163 + return -1; 164 + return 1; 201 165 } 202 166 203 - p_err("expected 'id', 'tag' or 'pinned', got: '%s'?", **argv); 167 + p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); 204 168 return -1; 169 + } 170 + 171 + int prog_parse_fd(int *argc, char ***argv) 172 + { 173 + int *fds = NULL; 174 + int nb_fds, fd; 175 + 176 + fds = malloc(sizeof(int)); 177 + if (!fds) { 178 + p_err("mem alloc failed"); 179 + return -1; 180 + } 181 + nb_fds = prog_parse_fds(argc, argv, &fds); 182 + if (nb_fds != 1) { 183 + if (nb_fds > 1) { 184 + p_err("several programs match this handle"); 185 + while (nb_fds--) 186 + close(fds[nb_fds]); 187 + } 188 + fd = -1; 189 + goto exit_free; 190 + } 191 + 192 + fd = fds[0]; 193 + exit_free: 194 + free(fds); 195 + return fd; 205 196 } 206 197 207 198 static void show_prog_maps(int fd, u32 num_maps) ··· 263 194 } 264 195 } 265 196 266 - static void print_prog_json(struct bpf_prog_info *info, int fd) 197 + static void print_prog_header_json(struct bpf_prog_info *info) 267 198 { 268 - char *memlock; 269 - 270 - jsonw_start_object(json_wtr); 271 199 jsonw_uint_field(json_wtr, "id", info->id); 272 200 if (info->type < ARRAY_SIZE(prog_type_name)) 273 201 jsonw_string_field(json_wtr, "type", ··· 285 219 jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns); 286 220 jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt); 287 221 } 222 + } 288 223 224 + static void print_prog_json(struct bpf_prog_info *info, int fd) 225 + { 226 + char *memlock; 227 + 228 + jsonw_start_object(json_wtr); 229 + print_prog_header_json(info); 289 230 print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); 290 231 291 232 if (info->load_time) { ··· 341 268 jsonw_end_object(json_wtr); 342 269 } 343 270 344 - static void print_prog_plain(struct bpf_prog_info *info, int fd) 271 + static void print_prog_header_plain(struct bpf_prog_info *info) 345 272 { 346 - char *memlock; 347 - 348 273 printf("%u: ", info->id); 349 274 if (info->type < ARRAY_SIZE(prog_type_name)) 350 275 printf("%s ", prog_type_name[info->type]); ··· 360 289 printf(" run_time_ns %lld run_cnt %lld", 361 290 info->run_time_ns, info->run_cnt); 362 291 printf("\n"); 292 + } 293 + 294 + static void print_prog_plain(struct bpf_prog_info *info, int fd) 295 + { 296 + char *memlock; 297 + 298 + print_prog_header_plain(info); 363 299 364 300 if (info->load_time) { 365 301 char buf[32]; ··· 427 349 return 0; 428 350 } 429 351 352 + static int do_show_subset(int argc, char **argv) 353 + { 354 + int *fds = NULL; 355 + int nb_fds, i; 356 + int err = -1; 357 + 358 + fds = malloc(sizeof(int)); 359 + if (!fds) { 360 + p_err("mem alloc failed"); 361 + return -1; 362 + } 363 + nb_fds = prog_parse_fds(&argc, &argv, &fds); 364 + if (nb_fds < 1) 365 + goto exit_free; 366 + 367 + if (json_output && nb_fds > 1) 368 + jsonw_start_array(json_wtr); /* root array */ 369 + for (i = 0; i < nb_fds; i++) { 370 + err = show_prog(fds[i]); 371 + if (err) { 372 + for (; i < nb_fds; i++) 373 + close(fds[i]); 374 + break; 375 + } 376 + close(fds[i]); 377 + } 378 + if (json_output && nb_fds > 1) 379 + jsonw_end_array(json_wtr); /* root array */ 380 + 381 + exit_free: 382 + free(fds); 383 + return err; 384 + } 385 + 430 386 static int do_show(int argc, char **argv) 431 387 { 432 388 __u32 id = 0; ··· 470 358 if (show_pinned) 471 359 build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); 472 360 473 - if (argc == 2) { 474 - fd = prog_parse_fd(&argc, &argv); 475 - if (fd < 0) 476 - return -1; 477 - 478 - err = show_prog(fd); 479 - close(fd); 480 - return err; 481 - } 361 + if (argc == 2) 362 + return do_show_subset(argc, argv); 482 363 483 364 if (argc) 484 365 return BAD_ARG(); ··· 513 408 return err; 514 409 } 515 410 516 - static int do_dump(int argc, char **argv) 411 + static int 412 + prog_dump(struct bpf_prog_info *info, enum dump_mode mode, 413 + char *filepath, bool opcodes, bool visual, bool linum) 517 414 { 518 - struct bpf_prog_info_linear *info_linear; 519 415 struct bpf_prog_linfo *prog_linfo = NULL; 520 - enum {DUMP_JITED, DUMP_XLATED} mode; 521 416 const char *disasm_opt = NULL; 522 - struct bpf_prog_info *info; 523 417 struct dump_data dd = {}; 524 418 void *func_info = NULL; 525 419 struct btf *btf = NULL; 526 - char *filepath = NULL; 527 - bool opcodes = false; 528 - bool visual = false; 529 420 char func_sig[1024]; 530 421 unsigned char *buf; 531 - bool linum = false; 532 422 __u32 member_len; 533 - __u64 arrays; 534 423 ssize_t n; 535 424 int fd; 536 425 537 - if (is_prefix(*argv, "jited")) { 538 - if (disasm_init()) 539 - return -1; 540 - mode = DUMP_JITED; 541 - } else if (is_prefix(*argv, "xlated")) { 542 - mode = DUMP_XLATED; 543 - } else { 544 - p_err("expected 'xlated' or 'jited', got: %s", *argv); 545 - return -1; 546 - } 547 - NEXT_ARG(); 548 - 549 - if (argc < 2) 550 - usage(); 551 - 552 - fd = prog_parse_fd(&argc, &argv); 553 - if (fd < 0) 554 - return -1; 555 - 556 - if (is_prefix(*argv, "file")) { 557 - NEXT_ARG(); 558 - if (!argc) { 559 - p_err("expected file path"); 560 - return -1; 561 - } 562 - 563 - filepath = *argv; 564 - NEXT_ARG(); 565 - } else if (is_prefix(*argv, "opcodes")) { 566 - opcodes = true; 567 - NEXT_ARG(); 568 - } else if (is_prefix(*argv, "visual")) { 569 - visual = true; 570 - NEXT_ARG(); 571 - } else if (is_prefix(*argv, "linum")) { 572 - linum = true; 573 - NEXT_ARG(); 574 - } 575 - 576 - if (argc) { 577 - usage(); 578 - return -1; 579 - } 580 - 581 - if (mode == DUMP_JITED) 582 - arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; 583 - else 584 - arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; 585 - 586 - arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; 587 - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; 588 - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; 589 - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; 590 - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; 591 - 592 - info_linear = bpf_program__get_prog_info_linear(fd, arrays); 593 - close(fd); 594 - if (IS_ERR_OR_NULL(info_linear)) { 595 - p_err("can't get prog info: %s", strerror(errno)); 596 - return -1; 597 - } 598 - 599 - info = &info_linear->info; 600 426 if (mode == DUMP_JITED) { 601 427 if (info->jited_prog_len == 0 || !info->jited_prog_insns) { 602 428 p_info("no instructions returned"); 603 - goto err_free; 429 + return -1; 604 430 } 605 431 buf = (unsigned char *)(info->jited_prog_insns); 606 432 member_len = info->jited_prog_len; 607 433 } else { /* DUMP_XLATED */ 608 434 if (info->xlated_prog_len == 0) { 609 435 p_err("error retrieving insn dump: kernel.kptr_restrict set?"); 610 - goto err_free; 436 + return -1; 611 437 } 612 438 buf = (unsigned char *)info->xlated_prog_insns; 613 439 member_len = info->xlated_prog_len; ··· 546 510 547 511 if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { 548 512 p_err("failed to get btf"); 549 - goto err_free; 513 + return -1; 550 514 } 551 515 552 516 func_info = (void *)info->func_info; ··· 562 526 if (fd < 0) { 563 527 p_err("can't open file %s: %s", filepath, 564 528 strerror(errno)); 565 - goto err_free; 529 + return -1; 566 530 } 567 531 568 532 n = write(fd, buf, member_len); ··· 570 534 if (n != member_len) { 571 535 p_err("error writing output file: %s", 572 536 n < 0 ? strerror(errno) : "short write"); 573 - goto err_free; 537 + return -1; 574 538 } 575 539 576 540 if (json_output) ··· 584 548 info->netns_ino, 585 549 &disasm_opt); 586 550 if (!name) 587 - goto err_free; 551 + return -1; 588 552 } 589 553 590 554 if (info->nr_jited_func_lens && info->jited_func_lens) { ··· 679 643 kernel_syms_destroy(&dd); 680 644 } 681 645 682 - free(info_linear); 683 646 return 0; 647 + } 684 648 685 - err_free: 686 - free(info_linear); 687 - return -1; 649 + static int do_dump(int argc, char **argv) 650 + { 651 + struct bpf_prog_info_linear *info_linear; 652 + char *filepath = NULL; 653 + bool opcodes = false; 654 + bool visual = false; 655 + enum dump_mode mode; 656 + bool linum = false; 657 + int *fds = NULL; 658 + int nb_fds, i = 0; 659 + int err = -1; 660 + __u64 arrays; 661 + 662 + if (is_prefix(*argv, "jited")) { 663 + if (disasm_init()) 664 + return -1; 665 + mode = DUMP_JITED; 666 + } else if (is_prefix(*argv, "xlated")) { 667 + mode = DUMP_XLATED; 668 + } else { 669 + p_err("expected 'xlated' or 'jited', got: %s", *argv); 670 + return -1; 671 + } 672 + NEXT_ARG(); 673 + 674 + if (argc < 2) 675 + usage(); 676 + 677 + fds = malloc(sizeof(int)); 678 + if (!fds) { 679 + p_err("mem alloc failed"); 680 + return -1; 681 + } 682 + nb_fds = prog_parse_fds(&argc, &argv, &fds); 683 + if (nb_fds < 1) 684 + goto exit_free; 685 + 686 + if (is_prefix(*argv, "file")) { 687 + NEXT_ARG(); 688 + if (!argc) { 689 + p_err("expected file path"); 690 + goto exit_close; 691 + } 692 + if (nb_fds > 1) { 693 + p_err("several programs matched"); 694 + goto exit_close; 695 + } 696 + 697 + filepath = *argv; 698 + NEXT_ARG(); 699 + } else if (is_prefix(*argv, "opcodes")) { 700 + opcodes = true; 701 + NEXT_ARG(); 702 + } else if (is_prefix(*argv, "visual")) { 703 + if (nb_fds > 1) { 704 + p_err("several programs matched"); 705 + goto exit_close; 706 + } 707 + 708 + visual = true; 709 + NEXT_ARG(); 710 + } else if (is_prefix(*argv, "linum")) { 711 + linum = true; 712 + NEXT_ARG(); 713 + } 714 + 715 + if (argc) { 716 + usage(); 717 + goto exit_close; 718 + } 719 + 720 + if (mode == DUMP_JITED) 721 + arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; 722 + else 723 + arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; 724 + 725 + arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; 726 + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; 727 + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; 728 + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; 729 + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; 730 + 731 + if (json_output && nb_fds > 1) 732 + jsonw_start_array(json_wtr); /* root array */ 733 + for (i = 0; i < nb_fds; i++) { 734 + info_linear = bpf_program__get_prog_info_linear(fds[i], arrays); 735 + if (IS_ERR_OR_NULL(info_linear)) { 736 + p_err("can't get prog info: %s", strerror(errno)); 737 + break; 738 + } 739 + 740 + if (json_output && nb_fds > 1) { 741 + jsonw_start_object(json_wtr); /* prog object */ 742 + print_prog_header_json(&info_linear->info); 743 + jsonw_name(json_wtr, "insns"); 744 + } else if (nb_fds > 1) { 745 + print_prog_header_plain(&info_linear->info); 746 + } 747 + 748 + err = prog_dump(&info_linear->info, mode, filepath, opcodes, 749 + visual, linum); 750 + 751 + if (json_output && nb_fds > 1) 752 + jsonw_end_object(json_wtr); /* prog object */ 753 + else if (i != nb_fds - 1 && nb_fds > 1) 754 + printf("\n"); 755 + 756 + free(info_linear); 757 + if (err) 758 + break; 759 + close(fds[i]); 760 + } 761 + if (json_output && nb_fds > 1) 762 + jsonw_end_array(json_wtr); /* root array */ 763 + 764 + exit_close: 765 + for (; i < nb_fds; i++) 766 + close(fds[i]); 767 + exit_free: 768 + free(fds); 769 + return err; 688 770 } 689 771 690 772 static int do_pin(int argc, char **argv)

+2

tools/include/uapi/asm/bpf_perf_event.h

··· 2 2 #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" 3 3 #elif defined(__s390__) 4 4 #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" 5 + #elif defined(__riscv) 6 + #include "../../arch/riscv/include/uapi/asm/bpf_perf_event.h" 5 7 #else 6 8 #include <uapi/asm-generic/bpf_perf_event.h> 7 9 #endif

+10

tools/include/uapi/linux/bpf.h

··· 231 231 * When children program makes decision (like picking TCP CA or sock bind) 232 232 * parent program has a chance to override it. 233 233 * 234 + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of 235 + * programs for a cgroup. Though it's possible to replace an old program at 236 + * any position by also specifying BPF_F_REPLACE flag and position itself in 237 + * replace_bpf_fd attribute. Old program at this position will be released. 238 + * 234 239 * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. 235 240 * A cgroup with NONE doesn't allow any programs in sub-cgroups. 236 241 * Ex1: ··· 254 249 */ 255 250 #define BPF_F_ALLOW_OVERRIDE (1U << 0) 256 251 #define BPF_F_ALLOW_MULTI (1U << 1) 252 + #define BPF_F_REPLACE (1U << 2) 257 253 258 254 /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the 259 255 * verifier will perform strict alignment checking as if the kernel ··· 448 442 __u32 attach_bpf_fd; /* eBPF program to attach */ 449 443 __u32 attach_type; 450 444 __u32 attach_flags; 445 + __u32 replace_bpf_fd; /* previously attached eBPF 446 + * program to replace if 447 + * BPF_F_REPLACE is used 448 + */ 451 449 }; 452 450 453 451 struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */

+4 -3

tools/include/uapi/linux/btf.h

··· 22 22 }; 23 23 24 24 /* Max # of type identifier */ 25 - #define BTF_MAX_TYPE 0x0000ffff 25 + #define BTF_MAX_TYPE 0x000fffff 26 26 /* Max offset into the string section */ 27 - #define BTF_MAX_NAME_OFFSET 0x0000ffff 27 + #define BTF_MAX_NAME_OFFSET 0x00ffffff 28 28 /* Max # of struct/union/enum members or func args */ 29 29 #define BTF_MAX_VLEN 0xffff 30 30 ··· 142 142 143 143 enum { 144 144 BTF_VAR_STATIC = 0, 145 - BTF_VAR_GLOBAL_ALLOCATED, 145 + BTF_VAR_GLOBAL_ALLOCATED = 1, 146 + BTF_VAR_GLOBAL_EXTERN = 2, 146 147 }; 147 148 148 149 /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe

+12 -6

tools/lib/bpf/Makefile

··· 56 56 endif 57 57 58 58 FEATURE_USER = .libbpf 59 - FEATURE_TESTS = libelf libelf-mmap bpf reallocarray 60 - FEATURE_DISPLAY = libelf bpf 59 + FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray 60 + FEATURE_DISPLAY = libelf zlib bpf 61 61 62 62 INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi 63 63 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) ··· 147 147 148 148 GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ 149 149 cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ 150 + sed 's/\[.*\]//' | \ 150 151 awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ 151 152 sort -u | wc -l) 152 153 VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ ··· 160 159 161 160 all_cmd: $(CMD_TARGETS) check 162 161 163 - $(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h 162 + $(BPF_IN_SHARED): force elfdep zdep bpfdep bpf_helper_defs.h 164 163 @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ 165 164 (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ 166 165 echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true ··· 178 177 echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true 179 178 $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" 180 179 181 - $(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h 180 + $(BPF_IN_STATIC): force elfdep zdep bpfdep bpf_helper_defs.h 182 181 $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) 183 182 184 183 bpf_helper_defs.h: $(srctree)/tools/include/uapi/linux/bpf.h ··· 190 189 $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) 191 190 $(QUIET_LINK)$(CC) $(LDFLAGS) \ 192 191 --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ 193 - -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ 192 + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@ 194 193 @ln -sf $(@F) $(OUTPUT)libbpf.so 195 194 @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) 196 195 ··· 214 213 "versioned in $(VERSION_SCRIPT)." >&2; \ 215 214 readelf -s --wide $(BPF_IN_SHARED) | \ 216 215 cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ 216 + sed 's/\[.*\]//' | \ 217 217 awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ 218 218 sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ 219 219 readelf -s --wide $(OUTPUT)libbpf.so | \ ··· 251 249 $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ 252 250 $(call do_install,btf.h,$(prefix)/include/bpf,644); \ 253 251 $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ 252 + $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \ 254 253 $(call do_install,xsk.h,$(prefix)/include/bpf,644); \ 255 254 $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ 256 255 $(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \ ··· 280 277 281 278 282 279 283 - PHONY += force elfdep bpfdep cscope tags 280 + PHONY += force elfdep zdep bpfdep cscope tags 284 281 force: 285 282 286 283 elfdep: 287 284 @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi 285 + 286 + zdep: 287 + @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi 288 288 289 289 bpfdep: 290 290 @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi

+16 -1

tools/lib/bpf/bpf.c

··· 467 467 int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, 468 468 unsigned int flags) 469 469 { 470 + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts, 471 + .flags = flags, 472 + ); 473 + 474 + return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); 475 + } 476 + 477 + int bpf_prog_attach_xattr(int prog_fd, int target_fd, 478 + enum bpf_attach_type type, 479 + const struct bpf_prog_attach_opts *opts) 480 + { 470 481 union bpf_attr attr; 482 + 483 + if (!OPTS_VALID(opts, bpf_prog_attach_opts)) 484 + return -EINVAL; 471 485 472 486 memset(&attr, 0, sizeof(attr)); 473 487 attr.target_fd = target_fd; 474 488 attr.attach_bpf_fd = prog_fd; 475 489 attr.attach_type = type; 476 - attr.attach_flags = flags; 490 + attr.attach_flags = OPTS_GET(opts, flags, 0); 491 + attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); 477 492 478 493 return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); 479 494 }

+13 -4

tools/lib/bpf/bpf.h

··· 28 28 #include <stddef.h> 29 29 #include <stdint.h> 30 30 31 + #include "libbpf_common.h" 32 + 31 33 #ifdef __cplusplus 32 34 extern "C" { 33 - #endif 34 - 35 - #ifndef LIBBPF_API 36 - #define LIBBPF_API __attribute__((visibility("default"))) 37 35 #endif 38 36 39 37 struct bpf_create_map_attr { ··· 126 128 LIBBPF_API int bpf_map_freeze(int fd); 127 129 LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); 128 130 LIBBPF_API int bpf_obj_get(const char *pathname); 131 + 132 + struct bpf_prog_attach_opts { 133 + size_t sz; /* size of this struct for forward/backward compatibility */ 134 + unsigned int flags; 135 + int replace_prog_fd; 136 + }; 137 + #define bpf_prog_attach_opts__last_field replace_prog_fd 138 + 129 139 LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, 130 140 enum bpf_attach_type type, unsigned int flags); 141 + LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, 142 + enum bpf_attach_type type, 143 + const struct bpf_prog_attach_opts *opts); 131 144 LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); 132 145 LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, 133 146 enum bpf_attach_type type);

+11

tools/lib/bpf/bpf_helpers.h

··· 25 25 #ifndef __always_inline 26 26 #define __always_inline __attribute__((always_inline)) 27 27 #endif 28 + #ifndef __weak 29 + #define __weak __attribute__((weak)) 30 + #endif 28 31 29 32 /* 30 33 * Helper structure used by eBPF C program ··· 46 43 /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ 47 44 LIBBPF_PIN_BY_NAME, 48 45 }; 46 + 47 + enum libbpf_tristate { 48 + TRI_NO = 0, 49 + TRI_YES = 1, 50 + TRI_MODULE = 2, 51 + }; 52 + 53 + #define __kconfig __attribute__((section(".kconfig"))) 49 54 50 55 #endif

+47 -1

tools/lib/bpf/btf.c

··· 278 278 return nelems * size; 279 279 } 280 280 281 + int btf__align_of(const struct btf *btf, __u32 id) 282 + { 283 + const struct btf_type *t = btf__type_by_id(btf, id); 284 + __u16 kind = btf_kind(t); 285 + 286 + switch (kind) { 287 + case BTF_KIND_INT: 288 + case BTF_KIND_ENUM: 289 + return min(sizeof(void *), t->size); 290 + case BTF_KIND_PTR: 291 + return sizeof(void *); 292 + case BTF_KIND_TYPEDEF: 293 + case BTF_KIND_VOLATILE: 294 + case BTF_KIND_CONST: 295 + case BTF_KIND_RESTRICT: 296 + return btf__align_of(btf, t->type); 297 + case BTF_KIND_ARRAY: 298 + return btf__align_of(btf, btf_array(t)->type); 299 + case BTF_KIND_STRUCT: 300 + case BTF_KIND_UNION: { 301 + const struct btf_member *m = btf_members(t); 302 + __u16 vlen = btf_vlen(t); 303 + int i, max_align = 1, align; 304 + 305 + for (i = 0; i < vlen; i++, m++) { 306 + align = btf__align_of(btf, m->type); 307 + if (align <= 0) 308 + return align; 309 + max_align = max(max_align, align); 310 + } 311 + 312 + return max_align; 313 + } 314 + default: 315 + pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); 316 + return 0; 317 + } 318 + } 319 + 281 320 int btf__resolve_type(const struct btf *btf, __u32 type_id) 282 321 { 283 322 const struct btf_type *t; ··· 578 539 return -ENOENT; 579 540 } 580 541 542 + /* .extern datasec size and var offsets were set correctly during 543 + * extern collection step, so just skip straight to sorting variables 544 + */ 545 + if (t->size) 546 + goto sort_vars; 547 + 581 548 ret = bpf_object__section_size(obj, name, &size); 582 549 if (ret || !size || (t->size && t->size != size)) { 583 550 pr_debug("Invalid size for section %s: %u bytes\n", name, size); ··· 620 575 vsi->offset = off; 621 576 } 622 577 623 - qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); 578 + sort_vars: 579 + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); 624 580 return 0; 625 581 } 626 582

+25 -4

tools/lib/bpf/btf.h

··· 8 8 #include <linux/btf.h> 9 9 #include <linux/types.h> 10 10 11 + #include "libbpf_common.h" 12 + 11 13 #ifdef __cplusplus 12 14 extern "C" { 13 - #endif 14 - 15 - #ifndef LIBBPF_API 16 - #define LIBBPF_API __attribute__((visibility("default"))) 17 15 #endif 18 16 19 17 #define BTF_ELF_SEC ".BTF" ··· 77 79 __u32 id); 78 80 LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); 79 81 LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); 82 + LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); 80 83 LIBBPF_API int btf__fd(const struct btf *btf); 81 84 LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); 82 85 LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); ··· 125 126 LIBBPF_API void btf_dump__free(struct btf_dump *d); 126 127 127 128 LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); 129 + 130 + struct btf_dump_emit_type_decl_opts { 131 + /* size of this struct, for forward/backward compatiblity */ 132 + size_t sz; 133 + /* optional field name for type declaration, e.g.: 134 + * - struct my_struct <FNAME> 135 + * - void (*<FNAME>)(int) 136 + * - char (*<FNAME>)[123] 137 + */ 138 + const char *field_name; 139 + /* extra indentation level (in number of tabs) to emit for multi-line 140 + * type declarations (e.g., anonymous struct); applies for lines 141 + * starting from the second one (first line is assumed to have 142 + * necessary indentation already 143 + */ 144 + int indent_level; 145 + }; 146 + #define btf_dump_emit_type_decl_opts__last_field indent_level 147 + 148 + LIBBPF_API int 149 + btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, 150 + const struct btf_dump_emit_type_decl_opts *opts); 128 151 129 152 /* 130 153 * A set of helpers for easier BTF types handling

+48 -67

tools/lib/bpf/btf_dump.c

··· 116 116 va_end(args); 117 117 } 118 118 119 + static int btf_dump_mark_referenced(struct btf_dump *d); 120 + 119 121 struct btf_dump *btf_dump__new(const struct btf *btf, 120 122 const struct btf_ext *btf_ext, 121 123 const struct btf_dump_opts *opts, ··· 139 137 if (IS_ERR(d->type_names)) { 140 138 err = PTR_ERR(d->type_names); 141 139 d->type_names = NULL; 142 - btf_dump__free(d); 143 - return ERR_PTR(err); 144 140 } 145 141 d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); 146 142 if (IS_ERR(d->ident_names)) { 147 143 err = PTR_ERR(d->ident_names); 148 144 d->ident_names = NULL; 149 - btf_dump__free(d); 150 - return ERR_PTR(err); 145 + goto err; 146 + } 147 + d->type_states = calloc(1 + btf__get_nr_types(d->btf), 148 + sizeof(d->type_states[0])); 149 + if (!d->type_states) { 150 + err = -ENOMEM; 151 + goto err; 152 + } 153 + d->cached_names = calloc(1 + btf__get_nr_types(d->btf), 154 + sizeof(d->cached_names[0])); 155 + if (!d->cached_names) { 156 + err = -ENOMEM; 157 + goto err; 151 158 } 152 159 160 + /* VOID is special */ 161 + d->type_states[0].order_state = ORDERED; 162 + d->type_states[0].emit_state = EMITTED; 163 + 164 + /* eagerly determine referenced types for anon enums */ 165 + err = btf_dump_mark_referenced(d); 166 + if (err) 167 + goto err; 168 + 153 169 return d; 170 + err: 171 + btf_dump__free(d); 172 + return ERR_PTR(err); 154 173 } 155 174 156 175 void btf_dump__free(struct btf_dump *d) ··· 198 175 free(d); 199 176 } 200 177 201 - static int btf_dump_mark_referenced(struct btf_dump *d); 202 178 static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); 203 179 static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); 204 180 ··· 223 201 224 202 if (id > btf__get_nr_types(d->btf)) 225 203 return -EINVAL; 226 - 227 - /* type states are lazily allocated, as they might not be needed */ 228 - if (!d->type_states) { 229 - d->type_states = calloc(1 + btf__get_nr_types(d->btf), 230 - sizeof(d->type_states[0])); 231 - if (!d->type_states) 232 - return -ENOMEM; 233 - d->cached_names = calloc(1 + btf__get_nr_types(d->btf), 234 - sizeof(d->cached_names[0])); 235 - if (!d->cached_names) 236 - return -ENOMEM; 237 - 238 - /* VOID is special */ 239 - d->type_states[0].order_state = ORDERED; 240 - d->type_states[0].emit_state = EMITTED; 241 - 242 - /* eagerly determine referenced types for anon enums */ 243 - err = btf_dump_mark_referenced(d); 244 - if (err) 245 - return err; 246 - } 247 204 248 205 d->emit_queue_cnt = 0; 249 206 err = btf_dump_order_type(d, id, false); ··· 753 752 } 754 753 } 755 754 756 - static int btf_align_of(const struct btf *btf, __u32 id) 757 - { 758 - const struct btf_type *t = btf__type_by_id(btf, id); 759 - __u16 kind = btf_kind(t); 760 - 761 - switch (kind) { 762 - case BTF_KIND_INT: 763 - case BTF_KIND_ENUM: 764 - return min(sizeof(void *), t->size); 765 - case BTF_KIND_PTR: 766 - return sizeof(void *); 767 - case BTF_KIND_TYPEDEF: 768 - case BTF_KIND_VOLATILE: 769 - case BTF_KIND_CONST: 770 - case BTF_KIND_RESTRICT: 771 - return btf_align_of(btf, t->type); 772 - case BTF_KIND_ARRAY: 773 - return btf_align_of(btf, btf_array(t)->type); 774 - case BTF_KIND_STRUCT: 775 - case BTF_KIND_UNION: { 776 - const struct btf_member *m = btf_members(t); 777 - __u16 vlen = btf_vlen(t); 778 - int i, align = 1; 779 - 780 - for (i = 0; i < vlen; i++, m++) 781 - align = max(align, btf_align_of(btf, m->type)); 782 - 783 - return align; 784 - } 785 - default: 786 - pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); 787 - return 1; 788 - } 789 - } 790 - 791 755 static bool btf_is_struct_packed(const struct btf *btf, __u32 id, 792 756 const struct btf_type *t) 793 757 { ··· 760 794 int align, i, bit_sz; 761 795 __u16 vlen; 762 796 763 - align = btf_align_of(btf, id); 797 + align = btf__align_of(btf, id); 764 798 /* size of a non-packed struct has to be a multiple of its alignment*/ 765 - if (t->size % align) 799 + if (align && t->size % align) 766 800 return true; 767 801 768 802 m = btf_members(t); 769 803 vlen = btf_vlen(t); 770 804 /* all non-bitfield fields have to be naturally aligned */ 771 805 for (i = 0; i < vlen; i++, m++) { 772 - align = btf_align_of(btf, m->type); 806 + align = btf__align_of(btf, m->type); 773 807 bit_sz = btf_member_bitfield_size(t, i); 774 - if (bit_sz == 0 && m->offset % (8 * align) != 0) 808 + if (align && bit_sz == 0 && m->offset % (8 * align) != 0) 775 809 return true; 776 810 } 777 811 ··· 855 889 fname = btf_name_of(d, m->name_off); 856 890 m_sz = btf_member_bitfield_size(t, i); 857 891 m_off = btf_member_bit_offset(t, i); 858 - align = packed ? 1 : btf_align_of(d->btf, m->type); 892 + align = packed ? 1 : btf__align_of(d->btf, m->type); 859 893 860 894 btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); 861 895 btf_dump_printf(d, "\n%s", pfx(lvl + 1)); ··· 873 907 874 908 /* pad at the end, if necessary */ 875 909 if (is_struct) { 876 - align = packed ? 1 : btf_align_of(d->btf, id); 910 + align = packed ? 1 : btf__align_of(d->btf, id); 877 911 btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, 878 912 lvl + 1); 879 913 } ··· 1017 1051 * of a stack frame. Some care is required to "pop" stack frames after 1018 1052 * processing type declaration chain. 1019 1053 */ 1054 + int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, 1055 + const struct btf_dump_emit_type_decl_opts *opts) 1056 + { 1057 + const char *fname; 1058 + int lvl; 1059 + 1060 + if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) 1061 + return -EINVAL; 1062 + 1063 + fname = OPTS_GET(opts, field_name, NULL); 1064 + lvl = OPTS_GET(opts, indent_level, 0); 1065 + btf_dump_emit_type_decl(d, id, fname, lvl); 1066 + return 0; 1067 + } 1068 + 1020 1069 static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, 1021 1070 const char *fname, int lvl) 1022 1071 {

+1458 -291

tools/lib/bpf/libbpf.c

··· 18 18 #include <stdarg.h> 19 19 #include <libgen.h> 20 20 #include <inttypes.h> 21 + #include <limits.h> 21 22 #include <string.h> 22 23 #include <unistd.h> 23 24 #include <endian.h> ··· 42 41 #include <sys/types.h> 43 42 #include <sys/vfs.h> 44 43 #include <sys/utsname.h> 44 + #include <sys/resource.h> 45 45 #include <tools/libc_compat.h> 46 46 #include <libelf.h> 47 47 #include <gelf.h> 48 + #include <zlib.h> 48 49 49 50 #include "libbpf.h" 50 51 #include "bpf.h" ··· 102 99 va_end(args); 103 100 } 104 101 102 + static void pr_perm_msg(int err) 103 + { 104 + struct rlimit limit; 105 + char buf[100]; 106 + 107 + if (err != -EPERM || geteuid() != 0) 108 + return; 109 + 110 + err = getrlimit(RLIMIT_MEMLOCK, &limit); 111 + if (err) 112 + return; 113 + 114 + if (limit.rlim_cur == RLIM_INFINITY) 115 + return; 116 + 117 + if (limit.rlim_cur < 1024) 118 + snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); 119 + else if (limit.rlim_cur < 1024*1024) 120 + snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); 121 + else 122 + snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); 123 + 124 + pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", 125 + buf); 126 + } 127 + 105 128 #define STRERR_BUFSIZE 128 106 - 107 - #define CHECK_ERR(action, err, out) do { \ 108 - err = action; \ 109 - if (err) \ 110 - goto out; \ 111 - } while (0) 112 - 113 129 114 130 /* Copied from tools/perf/util/util.h */ 115 131 #ifndef zfree ··· 168 146 __u32 array_mmap:1; 169 147 }; 170 148 149 + enum reloc_type { 150 + RELO_LD64, 151 + RELO_CALL, 152 + RELO_DATA, 153 + RELO_EXTERN, 154 + }; 155 + 156 + struct reloc_desc { 157 + enum reloc_type type; 158 + int insn_idx; 159 + int map_idx; 160 + int sym_off; 161 + }; 162 + 171 163 /* 172 164 * bpf_prog should be a better name but it has been used in 173 165 * linux/filter.h. ··· 200 164 size_t insns_cnt, main_prog_cnt; 201 165 enum bpf_prog_type type; 202 166 203 - struct reloc_desc { 204 - enum { 205 - RELO_LD64, 206 - RELO_CALL, 207 - RELO_DATA, 208 - } type; 209 - int insn_idx; 210 - int map_idx; 211 - int sym_off; 212 - } *reloc_desc; 167 + struct reloc_desc *reloc_desc; 213 168 int nr_reloc; 214 169 int log_level; 215 170 ··· 229 202 __u32 prog_flags; 230 203 }; 231 204 205 + #define DATA_SEC ".data" 206 + #define BSS_SEC ".bss" 207 + #define RODATA_SEC ".rodata" 208 + #define KCONFIG_SEC ".kconfig" 209 + 232 210 enum libbpf_map_type { 233 211 LIBBPF_MAP_UNSPEC, 234 212 LIBBPF_MAP_DATA, 235 213 LIBBPF_MAP_BSS, 236 214 LIBBPF_MAP_RODATA, 215 + LIBBPF_MAP_KCONFIG, 237 216 }; 238 217 239 218 static const char * const libbpf_type_to_btf_name[] = { 240 - [LIBBPF_MAP_DATA] = ".data", 241 - [LIBBPF_MAP_BSS] = ".bss", 242 - [LIBBPF_MAP_RODATA] = ".rodata", 219 + [LIBBPF_MAP_DATA] = DATA_SEC, 220 + [LIBBPF_MAP_BSS] = BSS_SEC, 221 + [LIBBPF_MAP_RODATA] = RODATA_SEC, 222 + [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, 243 223 }; 244 224 245 225 struct bpf_map { 246 - int fd; 247 226 char *name; 227 + int fd; 248 228 int sec_idx; 249 229 size_t sec_offset; 250 230 int map_ifindex; ··· 262 228 void *priv; 263 229 bpf_map_clear_priv_t clear_priv; 264 230 enum libbpf_map_type libbpf_type; 231 + void *mmaped; 265 232 char *pin_path; 266 233 bool pinned; 267 234 bool reused; 268 235 }; 269 236 270 - struct bpf_secdata { 271 - void *rodata; 272 - void *data; 237 + enum extern_type { 238 + EXT_UNKNOWN, 239 + EXT_CHAR, 240 + EXT_BOOL, 241 + EXT_INT, 242 + EXT_TRISTATE, 243 + EXT_CHAR_ARR, 244 + }; 245 + 246 + struct extern_desc { 247 + const char *name; 248 + int sym_idx; 249 + int btf_id; 250 + enum extern_type type; 251 + int sz; 252 + int align; 253 + int data_off; 254 + bool is_signed; 255 + bool is_weak; 256 + bool is_set; 273 257 }; 274 258 275 259 static LIST_HEAD(bpf_objects_list); ··· 302 250 struct bpf_map *maps; 303 251 size_t nr_maps; 304 252 size_t maps_cap; 305 - struct bpf_secdata sections; 253 + 254 + char *kconfig; 255 + struct extern_desc *externs; 256 + int nr_extern; 257 + int kconfig_map_idx; 306 258 307 259 bool loaded; 308 260 bool has_pseudo_calls; ··· 335 279 int maps_shndx; 336 280 int btf_maps_shndx; 337 281 int text_shndx; 282 + int symbols_shndx; 338 283 int data_shndx; 339 284 int rodata_shndx; 340 285 int bss_shndx; ··· 607 550 obj->efile.data_shndx = -1; 608 551 obj->efile.rodata_shndx = -1; 609 552 obj->efile.bss_shndx = -1; 553 + obj->kconfig_map_idx = -1; 610 554 611 555 obj->kern_version = get_kernel_version(); 612 556 obj->loaded = false; ··· 806 748 *size = 0; 807 749 if (!name) { 808 750 return -EINVAL; 809 - } else if (!strcmp(name, ".data")) { 751 + } else if (!strcmp(name, DATA_SEC)) { 810 752 if (obj->efile.data) 811 753 *size = obj->efile.data->d_size; 812 - } else if (!strcmp(name, ".bss")) { 754 + } else if (!strcmp(name, BSS_SEC)) { 813 755 if (obj->efile.bss) 814 756 *size = obj->efile.bss->d_size; 815 - } else if (!strcmp(name, ".rodata")) { 757 + } else if (!strcmp(name, RODATA_SEC)) { 816 758 if (obj->efile.rodata) 817 759 *size = obj->efile.rodata->d_size; 818 760 } else { ··· 893 835 return &obj->maps[obj->nr_maps++]; 894 836 } 895 837 896 - static int 897 - bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 898 - int sec_idx, Elf_Data *data, void **data_buff) 838 + static size_t bpf_map_mmap_sz(const struct bpf_map *map) 839 + { 840 + long page_sz = sysconf(_SC_PAGE_SIZE); 841 + size_t map_sz; 842 + 843 + map_sz = roundup(map->def.value_size, 8) * map->def.max_entries; 844 + map_sz = roundup(map_sz, page_sz); 845 + return map_sz; 846 + } 847 + 848 + static char *internal_map_name(struct bpf_object *obj, 849 + enum libbpf_map_type type) 899 850 { 900 851 char map_name[BPF_OBJ_NAME_LEN]; 852 + const char *sfx = libbpf_type_to_btf_name[type]; 853 + int sfx_len = max((size_t)7, strlen(sfx)); 854 + int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, 855 + strlen(obj->name)); 856 + 857 + snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, 858 + sfx_len, libbpf_type_to_btf_name[type]); 859 + 860 + return strdup(map_name); 861 + } 862 + 863 + static int 864 + bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, 865 + int sec_idx, void *data, size_t data_sz) 866 + { 901 867 struct bpf_map_def *def; 902 868 struct bpf_map *map; 869 + int err; 903 870 904 871 map = bpf_object__add_map(obj); 905 872 if (IS_ERR(map)) ··· 933 850 map->libbpf_type = type; 934 851 map->sec_idx = sec_idx; 935 852 map->sec_offset = 0; 936 - snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, 937 - libbpf_type_to_btf_name[type]); 938 - map->name = strdup(map_name); 853 + map->name = internal_map_name(obj, type); 939 854 if (!map->name) { 940 855 pr_warn("failed to alloc map name\n"); 941 856 return -ENOMEM; ··· 942 861 def = &map->def; 943 862 def->type = BPF_MAP_TYPE_ARRAY; 944 863 def->key_size = sizeof(int); 945 - def->value_size = data->d_size; 864 + def->value_size = data_sz; 946 865 def->max_entries = 1; 947 - def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; 948 - if (obj->caps.array_mmap) 949 - def->map_flags |= BPF_F_MMAPABLE; 866 + def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG 867 + ? BPF_F_RDONLY_PROG : 0; 868 + def->map_flags |= BPF_F_MMAPABLE; 950 869 951 870 pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 952 - map_name, map->sec_idx, map->sec_offset, def->map_flags); 871 + map->name, map->sec_idx, map->sec_offset, def->map_flags); 953 872 954 - if (data_buff) { 955 - *data_buff = malloc(data->d_size); 956 - if (!*data_buff) { 957 - zfree(&map->name); 958 - pr_warn("failed to alloc map content buffer\n"); 959 - return -ENOMEM; 960 - } 961 - memcpy(*data_buff, data->d_buf, data->d_size); 873 + map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, 874 + MAP_SHARED | MAP_ANONYMOUS, -1, 0); 875 + if (map->mmaped == MAP_FAILED) { 876 + err = -errno; 877 + map->mmaped = NULL; 878 + pr_warn("failed to alloc map '%s' content buffer: %d\n", 879 + map->name, err); 880 + zfree(&map->name); 881 + return err; 962 882 } 883 + 884 + if (data) 885 + memcpy(map->mmaped, data, data_sz); 963 886 964 887 pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); 965 888 return 0; ··· 973 888 { 974 889 int err; 975 890 976 - if (!obj->caps.global_data) 977 - return 0; 978 891 /* 979 892 * Populate obj->maps with libbpf internal maps. 980 893 */ 981 894 if (obj->efile.data_shndx >= 0) { 982 895 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, 983 896 obj->efile.data_shndx, 984 - obj->efile.data, 985 - &obj->sections.data); 897 + obj->efile.data->d_buf, 898 + obj->efile.data->d_size); 986 899 if (err) 987 900 return err; 988 901 } 989 902 if (obj->efile.rodata_shndx >= 0) { 990 903 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, 991 904 obj->efile.rodata_shndx, 992 - obj->efile.rodata, 993 - &obj->sections.rodata); 905 + obj->efile.rodata->d_buf, 906 + obj->efile.rodata->d_size); 994 907 if (err) 995 908 return err; 996 909 } 997 910 if (obj->efile.bss_shndx >= 0) { 998 911 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, 999 912 obj->efile.bss_shndx, 1000 - obj->efile.bss, NULL); 913 + NULL, 914 + obj->efile.bss->d_size); 1001 915 if (err) 1002 916 return err; 1003 917 } 918 + return 0; 919 + } 920 + 921 + 922 + static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, 923 + const void *name) 924 + { 925 + int i; 926 + 927 + for (i = 0; i < obj->nr_extern; i++) { 928 + if (strcmp(obj->externs[i].name, name) == 0) 929 + return &obj->externs[i]; 930 + } 931 + return NULL; 932 + } 933 + 934 + static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, 935 + char value) 936 + { 937 + switch (ext->type) { 938 + case EXT_BOOL: 939 + if (value == 'm') { 940 + pr_warn("extern %s=%c should be tristate or char\n", 941 + ext->name, value); 942 + return -EINVAL; 943 + } 944 + *(bool *)ext_val = value == 'y' ? true : false; 945 + break; 946 + case EXT_TRISTATE: 947 + if (value == 'y') 948 + *(enum libbpf_tristate *)ext_val = TRI_YES; 949 + else if (value == 'm') 950 + *(enum libbpf_tristate *)ext_val = TRI_MODULE; 951 + else /* value == 'n' */ 952 + *(enum libbpf_tristate *)ext_val = TRI_NO; 953 + break; 954 + case EXT_CHAR: 955 + *(char *)ext_val = value; 956 + break; 957 + case EXT_UNKNOWN: 958 + case EXT_INT: 959 + case EXT_CHAR_ARR: 960 + default: 961 + pr_warn("extern %s=%c should be bool, tristate, or char\n", 962 + ext->name, value); 963 + return -EINVAL; 964 + } 965 + ext->is_set = true; 966 + return 0; 967 + } 968 + 969 + static int set_ext_value_str(struct extern_desc *ext, char *ext_val, 970 + const char *value) 971 + { 972 + size_t len; 973 + 974 + if (ext->type != EXT_CHAR_ARR) { 975 + pr_warn("extern %s=%s should char array\n", ext->name, value); 976 + return -EINVAL; 977 + } 978 + 979 + len = strlen(value); 980 + if (value[len - 1] != '"') { 981 + pr_warn("extern '%s': invalid string config '%s'\n", 982 + ext->name, value); 983 + return -EINVAL; 984 + } 985 + 986 + /* strip quotes */ 987 + len -= 2; 988 + if (len >= ext->sz) { 989 + pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", 990 + ext->name, value, len, ext->sz - 1); 991 + len = ext->sz - 1; 992 + } 993 + memcpy(ext_val, value + 1, len); 994 + ext_val[len] = '\0'; 995 + ext->is_set = true; 996 + return 0; 997 + } 998 + 999 + static int parse_u64(const char *value, __u64 *res) 1000 + { 1001 + char *value_end; 1002 + int err; 1003 + 1004 + errno = 0; 1005 + *res = strtoull(value, &value_end, 0); 1006 + if (errno) { 1007 + err = -errno; 1008 + pr_warn("failed to parse '%s' as integer: %d\n", value, err); 1009 + return err; 1010 + } 1011 + if (*value_end) { 1012 + pr_warn("failed to parse '%s' as integer completely\n", value); 1013 + return -EINVAL; 1014 + } 1015 + return 0; 1016 + } 1017 + 1018 + static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) 1019 + { 1020 + int bit_sz = ext->sz * 8; 1021 + 1022 + if (ext->sz == 8) 1023 + return true; 1024 + 1025 + /* Validate that value stored in u64 fits in integer of `ext->sz` 1026 + * bytes size without any loss of information. If the target integer 1027 + * is signed, we rely on the following limits of integer type of 1028 + * Y bits and subsequent transformation: 1029 + * 1030 + * -2^(Y-1) <= X <= 2^(Y-1) - 1 1031 + * 0 <= X + 2^(Y-1) <= 2^Y - 1 1032 + * 0 <= X + 2^(Y-1) < 2^Y 1033 + * 1034 + * For unsigned target integer, check that all the (64 - Y) bits are 1035 + * zero. 1036 + */ 1037 + if (ext->is_signed) 1038 + return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); 1039 + else 1040 + return (v >> bit_sz) == 0; 1041 + } 1042 + 1043 + static int set_ext_value_num(struct extern_desc *ext, void *ext_val, 1044 + __u64 value) 1045 + { 1046 + if (ext->type != EXT_INT && ext->type != EXT_CHAR) { 1047 + pr_warn("extern %s=%llu should be integer\n", 1048 + ext->name, (unsigned long long)value); 1049 + return -EINVAL; 1050 + } 1051 + if (!is_ext_value_in_range(ext, value)) { 1052 + pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", 1053 + ext->name, (unsigned long long)value, ext->sz); 1054 + return -ERANGE; 1055 + } 1056 + switch (ext->sz) { 1057 + case 1: *(__u8 *)ext_val = value; break; 1058 + case 2: *(__u16 *)ext_val = value; break; 1059 + case 4: *(__u32 *)ext_val = value; break; 1060 + case 8: *(__u64 *)ext_val = value; break; 1061 + default: 1062 + return -EINVAL; 1063 + } 1064 + ext->is_set = true; 1065 + return 0; 1066 + } 1067 + 1068 + static int bpf_object__process_kconfig_line(struct bpf_object *obj, 1069 + char *buf, void *data) 1070 + { 1071 + struct extern_desc *ext; 1072 + char *sep, *value; 1073 + int len, err = 0; 1074 + void *ext_val; 1075 + __u64 num; 1076 + 1077 + if (strncmp(buf, "CONFIG_", 7)) 1078 + return 0; 1079 + 1080 + sep = strchr(buf, '='); 1081 + if (!sep) { 1082 + pr_warn("failed to parse '%s': no separator\n", buf); 1083 + return -EINVAL; 1084 + } 1085 + 1086 + /* Trim ending '\n' */ 1087 + len = strlen(buf); 1088 + if (buf[len - 1] == '\n') 1089 + buf[len - 1] = '\0'; 1090 + /* Split on '=' and ensure that a value is present. */ 1091 + *sep = '\0'; 1092 + if (!sep[1]) { 1093 + *sep = '='; 1094 + pr_warn("failed to parse '%s': no value\n", buf); 1095 + return -EINVAL; 1096 + } 1097 + 1098 + ext = find_extern_by_name(obj, buf); 1099 + if (!ext || ext->is_set) 1100 + return 0; 1101 + 1102 + ext_val = data + ext->data_off; 1103 + value = sep + 1; 1104 + 1105 + switch (*value) { 1106 + case 'y': case 'n': case 'm': 1107 + err = set_ext_value_tri(ext, ext_val, *value); 1108 + break; 1109 + case '"': 1110 + err = set_ext_value_str(ext, ext_val, value); 1111 + break; 1112 + default: 1113 + /* assume integer */ 1114 + err = parse_u64(value, &num); 1115 + if (err) { 1116 + pr_warn("extern %s=%s should be integer\n", 1117 + ext->name, value); 1118 + return err; 1119 + } 1120 + err = set_ext_value_num(ext, ext_val, num); 1121 + break; 1122 + } 1123 + if (err) 1124 + return err; 1125 + pr_debug("extern %s=%s\n", ext->name, value); 1126 + return 0; 1127 + } 1128 + 1129 + static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) 1130 + { 1131 + char buf[PATH_MAX]; 1132 + struct utsname uts; 1133 + int len, err = 0; 1134 + gzFile file; 1135 + 1136 + uname(&uts); 1137 + len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); 1138 + if (len < 0) 1139 + return -EINVAL; 1140 + else if (len >= PATH_MAX) 1141 + return -ENAMETOOLONG; 1142 + 1143 + /* gzopen also accepts uncompressed files. */ 1144 + file = gzopen(buf, "r"); 1145 + if (!file) 1146 + file = gzopen("/proc/config.gz", "r"); 1147 + 1148 + if (!file) { 1149 + pr_warn("failed to open system Kconfig\n"); 1150 + return -ENOENT; 1151 + } 1152 + 1153 + while (gzgets(file, buf, sizeof(buf))) { 1154 + err = bpf_object__process_kconfig_line(obj, buf, data); 1155 + if (err) { 1156 + pr_warn("error parsing system Kconfig line '%s': %d\n", 1157 + buf, err); 1158 + goto out; 1159 + } 1160 + } 1161 + 1162 + out: 1163 + gzclose(file); 1164 + return err; 1165 + } 1166 + 1167 + static int bpf_object__read_kconfig_mem(struct bpf_object *obj, 1168 + const char *config, void *data) 1169 + { 1170 + char buf[PATH_MAX]; 1171 + int err = 0; 1172 + FILE *file; 1173 + 1174 + file = fmemopen((void *)config, strlen(config), "r"); 1175 + if (!file) { 1176 + err = -errno; 1177 + pr_warn("failed to open in-memory Kconfig: %d\n", err); 1178 + return err; 1179 + } 1180 + 1181 + while (fgets(buf, sizeof(buf), file)) { 1182 + err = bpf_object__process_kconfig_line(obj, buf, data); 1183 + if (err) { 1184 + pr_warn("error parsing in-memory Kconfig line '%s': %d\n", 1185 + buf, err); 1186 + break; 1187 + } 1188 + } 1189 + 1190 + fclose(file); 1191 + return err; 1192 + } 1193 + 1194 + static int bpf_object__init_kconfig_map(struct bpf_object *obj) 1195 + { 1196 + struct extern_desc *last_ext; 1197 + size_t map_sz; 1198 + int err; 1199 + 1200 + if (obj->nr_extern == 0) 1201 + return 0; 1202 + 1203 + last_ext = &obj->externs[obj->nr_extern - 1]; 1204 + map_sz = last_ext->data_off + last_ext->sz; 1205 + 1206 + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, 1207 + obj->efile.symbols_shndx, 1208 + NULL, map_sz); 1209 + if (err) 1210 + return err; 1211 + 1212 + obj->kconfig_map_idx = obj->nr_maps - 1; 1213 + 1004 1214 return 0; 1005 1215 } 1006 1216 ··· 1622 1242 } 1623 1243 sz = btf__resolve_size(obj->btf, t->type); 1624 1244 if (sz < 0) { 1625 - pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", 1626 - map_name, t->type, sz); 1245 + pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", 1246 + map_name, t->type, (ssize_t)sz); 1627 1247 return sz; 1628 1248 } 1629 - pr_debug("map '%s': found key [%u], sz = %lld.\n", 1630 - map_name, t->type, sz); 1249 + pr_debug("map '%s': found key [%u], sz = %zd.\n", 1250 + map_name, t->type, (ssize_t)sz); 1631 1251 if (map->def.key_size && map->def.key_size != sz) { 1632 - pr_warn("map '%s': conflicting key size %u != %lld.\n", 1633 - map_name, map->def.key_size, sz); 1252 + pr_warn("map '%s': conflicting key size %u != %zd.\n", 1253 + map_name, map->def.key_size, (ssize_t)sz); 1634 1254 return -EINVAL; 1635 1255 } 1636 1256 map->def.key_size = sz; ··· 1665 1285 } 1666 1286 sz = btf__resolve_size(obj->btf, t->type); 1667 1287 if (sz < 0) { 1668 - pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", 1669 - map_name, t->type, sz); 1288 + pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", 1289 + map_name, t->type, (ssize_t)sz); 1670 1290 return sz; 1671 1291 } 1672 - pr_debug("map '%s': found value [%u], sz = %lld.\n", 1673 - map_name, t->type, sz); 1292 + pr_debug("map '%s': found value [%u], sz = %zd.\n", 1293 + map_name, t->type, (ssize_t)sz); 1674 1294 if (map->def.value_size && map->def.value_size != sz) { 1675 - pr_warn("map '%s': conflicting value size %u != %lld.\n", 1676 - map_name, map->def.value_size, sz); 1295 + pr_warn("map '%s': conflicting value size %u != %zd.\n", 1296 + map_name, map->def.value_size, (ssize_t)sz); 1677 1297 return -EINVAL; 1678 1298 } 1679 1299 map->def.value_size = sz; ··· 1773 1393 return 0; 1774 1394 } 1775 1395 1776 - static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, 1777 - const char *pin_root_path) 1396 + static int bpf_object__init_maps(struct bpf_object *obj, 1397 + const struct bpf_object_open_opts *opts) 1778 1398 { 1779 - bool strict = !relaxed_maps; 1399 + const char *pin_root_path; 1400 + bool strict; 1780 1401 int err; 1781 1402 1403 + strict = !OPTS_GET(opts, relaxed_maps, false); 1404 + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 1405 + 1782 1406 err = bpf_object__init_user_maps(obj, strict); 1783 - if (err) 1784 - return err; 1785 - 1786 - err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 1787 - if (err) 1788 - return err; 1789 - 1790 - err = bpf_object__init_global_data_maps(obj); 1407 + err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); 1408 + err = err ?: bpf_object__init_global_data_maps(obj); 1409 + err = err ?: bpf_object__init_kconfig_map(obj); 1791 1410 if (err) 1792 1411 return err; 1793 1412 ··· 1888 1509 1889 1510 static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) 1890 1511 { 1891 - return obj->efile.btf_maps_shndx >= 0; 1512 + return obj->efile.btf_maps_shndx >= 0 || 1513 + obj->nr_extern > 0; 1892 1514 } 1893 1515 1894 1516 static int bpf_object__init_btf(struct bpf_object *obj, ··· 1904 1524 if (IS_ERR(obj->btf)) { 1905 1525 pr_warn("Error loading ELF section %s: %d.\n", 1906 1526 BTF_ELF_SEC, err); 1907 - goto out; 1908 - } 1909 - err = btf__finalize_data(obj, obj->btf); 1910 - if (err) { 1911 - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); 1912 1527 goto out; 1913 1528 } 1914 1529 } ··· 1939 1564 return 0; 1940 1565 } 1941 1566 1567 + static int bpf_object__finalize_btf(struct bpf_object *obj) 1568 + { 1569 + int err; 1570 + 1571 + if (!obj->btf) 1572 + return 0; 1573 + 1574 + err = btf__finalize_data(obj, obj->btf); 1575 + if (!err) 1576 + return 0; 1577 + 1578 + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); 1579 + btf__free(obj->btf); 1580 + obj->btf = NULL; 1581 + btf_ext__free(obj->btf_ext); 1582 + obj->btf_ext = NULL; 1583 + 1584 + if (bpf_object__is_btf_mandatory(obj)) { 1585 + pr_warn("BTF is required, but is missing or corrupted.\n"); 1586 + return -ENOENT; 1587 + } 1588 + return 0; 1589 + } 1590 + 1942 1591 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) 1943 1592 { 1944 1593 int err = 0; ··· 1991 1592 return 0; 1992 1593 } 1993 1594 1994 - static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, 1995 - const char *pin_root_path) 1595 + static int bpf_object__elf_collect(struct bpf_object *obj) 1996 1596 { 1997 1597 Elf *elf = obj->efile.elf; 1998 1598 GElf_Ehdr *ep = &obj->efile.ehdr; ··· 2063 1665 return -LIBBPF_ERRNO__FORMAT; 2064 1666 } 2065 1667 obj->efile.symbols = data; 1668 + obj->efile.symbols_shndx = idx; 2066 1669 obj->efile.strtabidx = sh.sh_link; 2067 1670 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { 2068 1671 if (sh.sh_flags & SHF_EXECINSTR) { ··· 2082 1683 name, obj->path, cp); 2083 1684 return err; 2084 1685 } 2085 - } else if (strcmp(name, ".data") == 0) { 1686 + } else if (strcmp(name, DATA_SEC) == 0) { 2086 1687 obj->efile.data = data; 2087 1688 obj->efile.data_shndx = idx; 2088 - } else if (strcmp(name, ".rodata") == 0) { 1689 + } else if (strcmp(name, RODATA_SEC) == 0) { 2089 1690 obj->efile.rodata = data; 2090 1691 obj->efile.rodata_shndx = idx; 2091 1692 } else { ··· 2115 1716 2116 1717 obj->efile.reloc_sects[nr_sects].shdr = sh; 2117 1718 obj->efile.reloc_sects[nr_sects].data = data; 2118 - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { 1719 + } else if (sh.sh_type == SHT_NOBITS && 1720 + strcmp(name, BSS_SEC) == 0) { 2119 1721 obj->efile.bss = data; 2120 1722 obj->efile.bss_shndx = idx; 2121 1723 } else { ··· 2128 1728 pr_warn("Corrupted ELF file: index of strtab invalid\n"); 2129 1729 return -LIBBPF_ERRNO__FORMAT; 2130 1730 } 2131 - err = bpf_object__init_btf(obj, btf_data, btf_ext_data); 2132 - if (!err) 2133 - err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); 2134 - if (!err) 2135 - err = bpf_object__sanitize_and_load_btf(obj); 2136 - if (!err) 2137 - err = bpf_object__init_prog_names(obj); 2138 - return err; 1731 + return bpf_object__init_btf(obj, btf_data, btf_ext_data); 1732 + } 1733 + 1734 + static bool sym_is_extern(const GElf_Sym *sym) 1735 + { 1736 + int bind = GELF_ST_BIND(sym->st_info); 1737 + /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ 1738 + return sym->st_shndx == SHN_UNDEF && 1739 + (bind == STB_GLOBAL || bind == STB_WEAK) && 1740 + GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; 1741 + } 1742 + 1743 + static int find_extern_btf_id(const struct btf *btf, const char *ext_name) 1744 + { 1745 + const struct btf_type *t; 1746 + const char *var_name; 1747 + int i, n; 1748 + 1749 + if (!btf) 1750 + return -ESRCH; 1751 + 1752 + n = btf__get_nr_types(btf); 1753 + for (i = 1; i <= n; i++) { 1754 + t = btf__type_by_id(btf, i); 1755 + 1756 + if (!btf_is_var(t)) 1757 + continue; 1758 + 1759 + var_name = btf__name_by_offset(btf, t->name_off); 1760 + if (strcmp(var_name, ext_name)) 1761 + continue; 1762 + 1763 + if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) 1764 + return -EINVAL; 1765 + 1766 + return i; 1767 + } 1768 + 1769 + return -ENOENT; 1770 + } 1771 + 1772 + static enum extern_type find_extern_type(const struct btf *btf, int id, 1773 + bool *is_signed) 1774 + { 1775 + const struct btf_type *t; 1776 + const char *name; 1777 + 1778 + t = skip_mods_and_typedefs(btf, id, NULL); 1779 + name = btf__name_by_offset(btf, t->name_off); 1780 + 1781 + if (is_signed) 1782 + *is_signed = false; 1783 + switch (btf_kind(t)) { 1784 + case BTF_KIND_INT: { 1785 + int enc = btf_int_encoding(t); 1786 + 1787 + if (enc & BTF_INT_BOOL) 1788 + return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; 1789 + if (is_signed) 1790 + *is_signed = enc & BTF_INT_SIGNED; 1791 + if (t->size == 1) 1792 + return EXT_CHAR; 1793 + if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) 1794 + return EXT_UNKNOWN; 1795 + return EXT_INT; 1796 + } 1797 + case BTF_KIND_ENUM: 1798 + if (t->size != 4) 1799 + return EXT_UNKNOWN; 1800 + if (strcmp(name, "libbpf_tristate")) 1801 + return EXT_UNKNOWN; 1802 + return EXT_TRISTATE; 1803 + case BTF_KIND_ARRAY: 1804 + if (btf_array(t)->nelems == 0) 1805 + return EXT_UNKNOWN; 1806 + if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) 1807 + return EXT_UNKNOWN; 1808 + return EXT_CHAR_ARR; 1809 + default: 1810 + return EXT_UNKNOWN; 1811 + } 1812 + } 1813 + 1814 + static int cmp_externs(const void *_a, const void *_b) 1815 + { 1816 + const struct extern_desc *a = _a; 1817 + const struct extern_desc *b = _b; 1818 + 1819 + /* descending order by alignment requirements */ 1820 + if (a->align != b->align) 1821 + return a->align > b->align ? -1 : 1; 1822 + /* ascending order by size, within same alignment class */ 1823 + if (a->sz != b->sz) 1824 + return a->sz < b->sz ? -1 : 1; 1825 + /* resolve ties by name */ 1826 + return strcmp(a->name, b->name); 1827 + } 1828 + 1829 + static int bpf_object__collect_externs(struct bpf_object *obj) 1830 + { 1831 + const struct btf_type *t; 1832 + struct extern_desc *ext; 1833 + int i, n, off, btf_id; 1834 + struct btf_type *sec; 1835 + const char *ext_name; 1836 + Elf_Scn *scn; 1837 + GElf_Shdr sh; 1838 + 1839 + if (!obj->efile.symbols) 1840 + return 0; 1841 + 1842 + scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx); 1843 + if (!scn) 1844 + return -LIBBPF_ERRNO__FORMAT; 1845 + if (gelf_getshdr(scn, &sh) != &sh) 1846 + return -LIBBPF_ERRNO__FORMAT; 1847 + n = sh.sh_size / sh.sh_entsize; 1848 + 1849 + pr_debug("looking for externs among %d symbols...\n", n); 1850 + for (i = 0; i < n; i++) { 1851 + GElf_Sym sym; 1852 + 1853 + if (!gelf_getsym(obj->efile.symbols, i, &sym)) 1854 + return -LIBBPF_ERRNO__FORMAT; 1855 + if (!sym_is_extern(&sym)) 1856 + continue; 1857 + ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, 1858 + sym.st_name); 1859 + if (!ext_name || !ext_name[0]) 1860 + continue; 1861 + 1862 + ext = obj->externs; 1863 + ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); 1864 + if (!ext) 1865 + return -ENOMEM; 1866 + obj->externs = ext; 1867 + ext = &ext[obj->nr_extern]; 1868 + memset(ext, 0, sizeof(*ext)); 1869 + obj->nr_extern++; 1870 + 1871 + ext->btf_id = find_extern_btf_id(obj->btf, ext_name); 1872 + if (ext->btf_id <= 0) { 1873 + pr_warn("failed to find BTF for extern '%s': %d\n", 1874 + ext_name, ext->btf_id); 1875 + return ext->btf_id; 1876 + } 1877 + t = btf__type_by_id(obj->btf, ext->btf_id); 1878 + ext->name = btf__name_by_offset(obj->btf, t->name_off); 1879 + ext->sym_idx = i; 1880 + ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; 1881 + ext->sz = btf__resolve_size(obj->btf, t->type); 1882 + if (ext->sz <= 0) { 1883 + pr_warn("failed to resolve size of extern '%s': %d\n", 1884 + ext_name, ext->sz); 1885 + return ext->sz; 1886 + } 1887 + ext->align = btf__align_of(obj->btf, t->type); 1888 + if (ext->align <= 0) { 1889 + pr_warn("failed to determine alignment of extern '%s': %d\n", 1890 + ext_name, ext->align); 1891 + return -EINVAL; 1892 + } 1893 + ext->type = find_extern_type(obj->btf, t->type, 1894 + &ext->is_signed); 1895 + if (ext->type == EXT_UNKNOWN) { 1896 + pr_warn("extern '%s' type is unsupported\n", ext_name); 1897 + return -ENOTSUP; 1898 + } 1899 + } 1900 + pr_debug("collected %d externs total\n", obj->nr_extern); 1901 + 1902 + if (!obj->nr_extern) 1903 + return 0; 1904 + 1905 + /* sort externs by (alignment, size, name) and calculate their offsets 1906 + * within a map */ 1907 + qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); 1908 + off = 0; 1909 + for (i = 0; i < obj->nr_extern; i++) { 1910 + ext = &obj->externs[i]; 1911 + ext->data_off = roundup(off, ext->align); 1912 + off = ext->data_off + ext->sz; 1913 + pr_debug("extern #%d: symbol %d, off %u, name %s\n", 1914 + i, ext->sym_idx, ext->data_off, ext->name); 1915 + } 1916 + 1917 + btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); 1918 + if (btf_id <= 0) { 1919 + pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); 1920 + return -ESRCH; 1921 + } 1922 + 1923 + sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); 1924 + sec->size = off; 1925 + n = btf_vlen(sec); 1926 + for (i = 0; i < n; i++) { 1927 + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; 1928 + 1929 + t = btf__type_by_id(obj->btf, vs->type); 1930 + ext_name = btf__name_by_offset(obj->btf, t->name_off); 1931 + ext = find_extern_by_name(obj, ext_name); 1932 + if (!ext) { 1933 + pr_warn("failed to find extern definition for BTF var '%s'\n", 1934 + ext_name); 1935 + return -ESRCH; 1936 + } 1937 + vs->offset = ext->data_off; 1938 + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; 1939 + } 1940 + 1941 + return 0; 2139 1942 } 2140 1943 2141 1944 static struct bpf_program * ··· 2368 1765 return NULL; 2369 1766 } 2370 1767 1768 + struct bpf_program * 1769 + bpf_object__find_program_by_name(const struct bpf_object *obj, 1770 + const char *name) 1771 + { 1772 + struct bpf_program *prog; 1773 + 1774 + bpf_object__for_each_program(prog, obj) { 1775 + if (!strcmp(prog->name, name)) 1776 + return prog; 1777 + } 1778 + return NULL; 1779 + } 1780 + 2371 1781 static bool bpf_object__shndx_is_data(const struct bpf_object *obj, 2372 1782 int shndx) 2373 1783 { ··· 2405 1789 return LIBBPF_MAP_BSS; 2406 1790 else if (shndx == obj->efile.rodata_shndx) 2407 1791 return LIBBPF_MAP_RODATA; 1792 + else if (shndx == obj->efile.symbols_shndx) 1793 + return LIBBPF_MAP_KCONFIG; 2408 1794 else 2409 1795 return LIBBPF_MAP_UNSPEC; 2410 1796 } ··· 2435 1817 return -LIBBPF_ERRNO__RELOC; 2436 1818 } 2437 1819 if (sym->st_value % 8) { 2438 - pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); 1820 + pr_warn("bad call relo offset: %zu\n", 1821 + (size_t)sym->st_value); 2439 1822 return -LIBBPF_ERRNO__RELOC; 2440 1823 } 2441 1824 reloc_desc->type = RELO_CALL; ··· 2451 1832 insn_idx, insn->code); 2452 1833 return -LIBBPF_ERRNO__RELOC; 2453 1834 } 1835 + 1836 + if (sym_is_extern(sym)) { 1837 + int sym_idx = GELF_R_SYM(rel->r_info); 1838 + int i, n = obj->nr_extern; 1839 + struct extern_desc *ext; 1840 + 1841 + for (i = 0; i < n; i++) { 1842 + ext = &obj->externs[i]; 1843 + if (ext->sym_idx == sym_idx) 1844 + break; 1845 + } 1846 + if (i >= n) { 1847 + pr_warn("extern relo failed to find extern for sym %d\n", 1848 + sym_idx); 1849 + return -LIBBPF_ERRNO__RELOC; 1850 + } 1851 + pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", 1852 + i, ext->name, ext->sym_idx, ext->data_off, insn_idx); 1853 + reloc_desc->type = RELO_EXTERN; 1854 + reloc_desc->insn_idx = insn_idx; 1855 + reloc_desc->sym_off = ext->data_off; 1856 + return 0; 1857 + } 1858 + 2454 1859 if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { 2455 1860 pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", 2456 1861 name, shdr_idx); ··· 2502 1859 break; 2503 1860 } 2504 1861 if (map_idx >= nr_maps) { 2505 - pr_warn("map relo failed to find map for sec %u, off %llu\n", 2506 - shdr_idx, (__u64)sym->st_value); 1862 + pr_warn("map relo failed to find map for sec %u, off %zu\n", 1863 + shdr_idx, (size_t)sym->st_value); 2507 1864 return -LIBBPF_ERRNO__RELOC; 2508 1865 } 2509 1866 reloc_desc->type = RELO_LD64; ··· 2516 1873 /* global data map relocation */ 2517 1874 if (!bpf_object__shndx_is_data(obj, shdr_idx)) { 2518 1875 pr_warn("bad data relo against section %u\n", shdr_idx); 2519 - return -LIBBPF_ERRNO__RELOC; 2520 - } 2521 - if (!obj->caps.global_data) { 2522 - pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", 2523 - name, insn_idx); 2524 1876 return -LIBBPF_ERRNO__RELOC; 2525 1877 } 2526 1878 for (map_idx = 0; map_idx < nr_maps; map_idx++) { ··· 2579 1941 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, 2580 1942 sym.st_name) ? : "<?>"; 2581 1943 2582 - pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", 2583 - (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), 2584 - (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), 1944 + pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", 1945 + (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), 1946 + (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), 2585 1947 GELF_ST_BIND(sym.st_info), sym.st_name, name, 2586 1948 insn_idx); 2587 1949 ··· 2936 2298 static int 2937 2299 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) 2938 2300 { 2301 + enum libbpf_map_type map_type = map->libbpf_type; 2939 2302 char *cp, errmsg[STRERR_BUFSIZE]; 2940 2303 int err, zero = 0; 2941 - __u8 *data; 2942 2304 2943 - /* Nothing to do here since kernel already zero-initializes .bss map. */ 2944 - if (map->libbpf_type == LIBBPF_MAP_BSS) 2305 + /* kernel already zero-initializes .bss map. */ 2306 + if (map_type == LIBBPF_MAP_BSS) 2945 2307 return 0; 2946 2308 2947 - data = map->libbpf_type == LIBBPF_MAP_DATA ? 2948 - obj->sections.data : obj->sections.rodata; 2309 + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); 2310 + if (err) { 2311 + err = -errno; 2312 + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 2313 + pr_warn("Error setting initial map(%s) contents: %s\n", 2314 + map->name, cp); 2315 + return err; 2316 + } 2949 2317 2950 - err = bpf_map_update_elem(map->fd, &zero, data, 0); 2951 - /* Freeze .rodata map as read-only from syscall side. */ 2952 - if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { 2318 + /* Freeze .rodata and .kconfig map as read-only from syscall side. */ 2319 + if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { 2953 2320 err = bpf_map_freeze(map->fd); 2954 2321 if (err) { 2955 - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 2322 + err = -errno; 2323 + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 2956 2324 pr_warn("Error freezing map(%s) as read-only: %s\n", 2957 2325 map->name, cp); 2958 - err = 0; 2326 + return err; 2959 2327 } 2960 2328 } 2961 - return err; 2329 + return 0; 2962 2330 } 2963 2331 2964 2332 static int ··· 3055 2411 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); 3056 2412 pr_warn("failed to create map (name: '%s'): %s(%d)\n", 3057 2413 map->name, cp, err); 2414 + pr_perm_msg(err); 3058 2415 for (j = 0; j < i; j++) 3059 2416 zclose(obj->maps[j].fd); 3060 2417 return err; ··· 3181 2536 return !s || !s[0]; 3182 2537 } 3183 2538 2539 + static bool is_flex_arr(const struct btf *btf, 2540 + const struct bpf_core_accessor *acc, 2541 + const struct btf_array *arr) 2542 + { 2543 + const struct btf_type *t; 2544 + 2545 + /* not a flexible array, if not inside a struct or has non-zero size */ 2546 + if (!acc->name || arr->nelems > 0) 2547 + return false; 2548 + 2549 + /* has to be the last member of enclosing struct */ 2550 + t = btf__type_by_id(btf, acc->type_id); 2551 + return acc->idx == btf_vlen(t) - 1; 2552 + } 2553 + 3184 2554 /* 3185 2555 * Turn bpf_field_reloc into a low- and high-level spec representation, 3186 2556 * validating correctness along the way, as well as calculating resulting ··· 3233 2573 struct bpf_core_spec *spec) 3234 2574 { 3235 2575 int access_idx, parsed_len, i; 2576 + struct bpf_core_accessor *acc; 3236 2577 const struct btf_type *t; 3237 2578 const char *name; 3238 2579 __u32 id; ··· 3281 2620 return -EINVAL; 3282 2621 3283 2622 access_idx = spec->raw_spec[i]; 2623 + acc = &spec->spec[spec->len]; 3284 2624 3285 2625 if (btf_is_composite(t)) { 3286 2626 const struct btf_member *m; ··· 3299 2637 if (str_is_empty(name)) 3300 2638 return -EINVAL; 3301 2639 3302 - spec->spec[spec->len].type_id = id; 3303 - spec->spec[spec->len].idx = access_idx; 3304 - spec->spec[spec->len].name = name; 2640 + acc->type_id = id; 2641 + acc->idx = access_idx; 2642 + acc->name = name; 3305 2643 spec->len++; 3306 2644 } 3307 2645 3308 2646 id = m->type; 3309 2647 } else if (btf_is_array(t)) { 3310 2648 const struct btf_array *a = btf_array(t); 2649 + bool flex; 3311 2650 3312 2651 t = skip_mods_and_typedefs(btf, a->type, &id); 3313 - if (!t || access_idx >= a->nelems) 2652 + if (!t) 2653 + return -EINVAL; 2654 + 2655 + flex = is_flex_arr(btf, acc - 1, a); 2656 + if (!flex && access_idx >= a->nelems) 3314 2657 return -EINVAL; 3315 2658 3316 2659 spec->spec[spec->len].type_id = id; ··· 3620 2953 */ 3621 2954 if (i > 0) { 3622 2955 const struct btf_array *a; 2956 + bool flex; 3623 2957 3624 2958 if (!btf_is_array(targ_type)) 3625 2959 return 0; 3626 2960 3627 2961 a = btf_array(targ_type); 3628 - if (local_acc->idx >= a->nelems) 2962 + flex = is_flex_arr(targ_btf, targ_acc - 1, a); 2963 + if (!flex && local_acc->idx >= a->nelems) 3629 2964 return 0; 3630 2965 if (!skip_mods_and_typedefs(targ_btf, a->type, 3631 2966 &targ_id)) ··· 3811 3142 insn = &prog->insns[insn_idx]; 3812 3143 class = BPF_CLASS(insn->code); 3813 3144 3814 - if (class == BPF_ALU || class == BPF_ALU64) { 3145 + switch (class) { 3146 + case BPF_ALU: 3147 + case BPF_ALU64: 3815 3148 if (BPF_SRC(insn->code) != BPF_K) 3816 3149 return -EINVAL; 3817 3150 if (!failed && validate && insn->imm != orig_val) { 3818 - pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", 3151 + pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", 3819 3152 bpf_program__title(prog, false), insn_idx, 3820 3153 insn->imm, orig_val, new_val); 3821 3154 return -EINVAL; ··· 3827 3156 pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", 3828 3157 bpf_program__title(prog, false), insn_idx, 3829 3158 failed ? " w/ failed reloc" : "", orig_val, new_val); 3830 - } else { 3159 + break; 3160 + case BPF_LDX: 3161 + case BPF_ST: 3162 + case BPF_STX: 3163 + if (!failed && validate && insn->off != orig_val) { 3164 + pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", 3165 + bpf_program__title(prog, false), insn_idx, 3166 + insn->off, orig_val, new_val); 3167 + return -EINVAL; 3168 + } 3169 + if (new_val > SHRT_MAX) { 3170 + pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n", 3171 + bpf_program__title(prog, false), insn_idx, 3172 + new_val); 3173 + return -ERANGE; 3174 + } 3175 + orig_val = insn->off; 3176 + insn->off = new_val; 3177 + pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n", 3178 + bpf_program__title(prog, false), insn_idx, 3179 + failed ? " w/ failed reloc" : "", orig_val, new_val); 3180 + break; 3181 + default: 3831 3182 pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", 3832 3183 bpf_program__title(prog, false), 3833 3184 insn_idx, insn->code, insn->src_reg, insn->dst_reg, ··· 4252 3559 size_t new_cnt; 4253 3560 int err; 4254 3561 4255 - if (relo->type != RELO_CALL) 4256 - return -LIBBPF_ERRNO__RELOC; 4257 - 4258 3562 if (prog->idx == obj->efile.text_shndx) { 4259 3563 pr_warn("relo in .text insn %d into off %d (insn #%d)\n", 4260 3564 relo->insn_idx, relo->sym_off, relo->sym_off / 8); ··· 4313 3623 4314 3624 for (i = 0; i < prog->nr_reloc; i++) { 4315 3625 struct reloc_desc *relo = &prog->reloc_desc[i]; 3626 + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 4316 3627 4317 - if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { 4318 - struct bpf_insn *insn = &prog->insns[relo->insn_idx]; 3628 + if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { 3629 + pr_warn("relocation out of range: '%s'\n", 3630 + prog->section_name); 3631 + return -LIBBPF_ERRNO__RELOC; 3632 + } 4319 3633 4320 - if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { 4321 - pr_warn("relocation out of range: '%s'\n", 4322 - prog->section_name); 4323 - return -LIBBPF_ERRNO__RELOC; 4324 - } 4325 - 4326 - if (relo->type != RELO_DATA) { 4327 - insn[0].src_reg = BPF_PSEUDO_MAP_FD; 4328 - } else { 4329 - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 4330 - insn[1].imm = insn[0].imm + relo->sym_off; 4331 - } 3634 + switch (relo->type) { 3635 + case RELO_LD64: 3636 + insn[0].src_reg = BPF_PSEUDO_MAP_FD; 4332 3637 insn[0].imm = obj->maps[relo->map_idx].fd; 4333 - } else if (relo->type == RELO_CALL) { 3638 + break; 3639 + case RELO_DATA: 3640 + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 3641 + insn[1].imm = insn[0].imm + relo->sym_off; 3642 + insn[0].imm = obj->maps[relo->map_idx].fd; 3643 + break; 3644 + case RELO_EXTERN: 3645 + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; 3646 + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; 3647 + insn[1].imm = relo->sym_off; 3648 + break; 3649 + case RELO_CALL: 4334 3650 err = bpf_program__reloc_text(prog, obj, relo); 4335 3651 if (err) 4336 3652 return err; 3653 + break; 3654 + default: 3655 + pr_warn("relo #%d: bad relo type %d\n", i, relo->type); 3656 + return -EINVAL; 4337 3657 } 4338 3658 } 4339 3659 ··· 4478 3778 ret = -errno; 4479 3779 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 4480 3780 pr_warn("load bpf program failed: %s\n", cp); 3781 + pr_perm_msg(ret); 4481 3782 4482 3783 if (log_buf && log_buf[0] != '\0') { 4483 3784 ret = -LIBBPF_ERRNO__VERIFY; ··· 4508 3807 return ret; 4509 3808 } 4510 3809 4511 - int 4512 - bpf_program__load(struct bpf_program *prog, 4513 - char *license, __u32 kern_version) 3810 + static int libbpf_find_attach_btf_id(const char *name, 3811 + enum bpf_attach_type attach_type, 3812 + __u32 attach_prog_fd); 3813 + 3814 + int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) 4514 3815 { 4515 - int err = 0, fd, i; 3816 + int err = 0, fd, i, btf_id; 3817 + 3818 + if (prog->type == BPF_PROG_TYPE_TRACING) { 3819 + btf_id = libbpf_find_attach_btf_id(prog->section_name, 3820 + prog->expected_attach_type, 3821 + prog->attach_prog_fd); 3822 + if (btf_id <= 0) 3823 + return btf_id; 3824 + prog->attach_btf_id = btf_id; 3825 + } 4516 3826 4517 3827 if (prog->instances.nr < 0 || !prog->instances.fds) { 4518 3828 if (prog->preprocessor) { ··· 4547 3835 prog->section_name, prog->instances.nr); 4548 3836 } 4549 3837 err = load_program(prog, prog->insns, prog->insns_cnt, 4550 - license, kern_version, &fd); 3838 + license, kern_ver, &fd); 4551 3839 if (!err) 4552 3840 prog->instances.fds[0] = fd; 4553 3841 goto out; ··· 4576 3864 } 4577 3865 4578 3866 err = load_program(prog, result.new_insn_ptr, 4579 - result.new_insn_cnt, 4580 - license, kern_version, &fd); 4581 - 3867 + result.new_insn_cnt, license, kern_ver, &fd); 4582 3868 if (err) { 4583 3869 pr_warn("Loading the %dth instance of program '%s' failed\n", 4584 3870 i, prog->section_name); ··· 4620 3910 return 0; 4621 3911 } 4622 3912 4623 - static int libbpf_find_attach_btf_id(const char *name, 4624 - enum bpf_attach_type attach_type, 4625 - __u32 attach_prog_fd); 4626 3913 static struct bpf_object * 4627 3914 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, 4628 - struct bpf_object_open_opts *opts) 3915 + const struct bpf_object_open_opts *opts) 4629 3916 { 4630 - const char *pin_root_path; 3917 + const char *obj_name, *kconfig; 4631 3918 struct bpf_program *prog; 4632 3919 struct bpf_object *obj; 4633 - const char *obj_name; 4634 3920 char tmp_name[64]; 4635 - bool relaxed_maps; 4636 - __u32 attach_prog_fd; 4637 3921 int err; 4638 3922 4639 3923 if (elf_version(EV_CURRENT) == EV_NONE) { ··· 4656 3952 return obj; 4657 3953 4658 3954 obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); 4659 - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); 4660 - pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 4661 - attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); 3955 + kconfig = OPTS_GET(opts, kconfig, NULL); 3956 + if (kconfig) { 3957 + obj->kconfig = strdup(kconfig); 3958 + if (!obj->kconfig) 3959 + return ERR_PTR(-ENOMEM); 3960 + } 4662 3961 4663 - CHECK_ERR(bpf_object__elf_init(obj), err, out); 4664 - CHECK_ERR(bpf_object__check_endianness(obj), err, out); 4665 - CHECK_ERR(bpf_object__probe_caps(obj), err, out); 4666 - CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), 4667 - err, out); 4668 - CHECK_ERR(bpf_object__collect_reloc(obj), err, out); 3962 + err = bpf_object__elf_init(obj); 3963 + err = err ? : bpf_object__check_endianness(obj); 3964 + err = err ? : bpf_object__elf_collect(obj); 3965 + err = err ? : bpf_object__collect_externs(obj); 3966 + err = err ? : bpf_object__finalize_btf(obj); 3967 + err = err ? : bpf_object__init_maps(obj, opts); 3968 + err = err ? : bpf_object__init_prog_names(obj); 3969 + err = err ? : bpf_object__collect_reloc(obj); 3970 + if (err) 3971 + goto out; 4669 3972 bpf_object__elf_finish(obj); 4670 3973 4671 3974 bpf_object__for_each_program(prog, obj) { ··· 4689 3978 4690 3979 bpf_program__set_type(prog, prog_type); 4691 3980 bpf_program__set_expected_attach_type(prog, attach_type); 4692 - if (prog_type == BPF_PROG_TYPE_TRACING) { 4693 - err = libbpf_find_attach_btf_id(prog->section_name, 4694 - attach_type, 4695 - attach_prog_fd); 4696 - if (err <= 0) 4697 - goto out; 4698 - prog->attach_btf_id = err; 4699 - prog->attach_prog_fd = attach_prog_fd; 4700 - } 3981 + if (prog_type == BPF_PROG_TYPE_TRACING) 3982 + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); 4701 3983 } 4702 3984 4703 3985 return obj; ··· 4730 4026 } 4731 4027 4732 4028 struct bpf_object * 4733 - bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) 4029 + bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) 4734 4030 { 4735 4031 if (!path) 4736 4032 return ERR_PTR(-EINVAL); ··· 4742 4038 4743 4039 struct bpf_object * 4744 4040 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 4745 - struct bpf_object_open_opts *opts) 4041 + const struct bpf_object_open_opts *opts) 4746 4042 { 4747 4043 if (!obj_buf || obj_buf_sz == 0) 4748 4044 return ERR_PTR(-EINVAL); ··· 4783 4079 return 0; 4784 4080 } 4785 4081 4082 + static int bpf_object__sanitize_maps(struct bpf_object *obj) 4083 + { 4084 + struct bpf_map *m; 4085 + 4086 + bpf_object__for_each_map(m, obj) { 4087 + if (!bpf_map__is_internal(m)) 4088 + continue; 4089 + if (!obj->caps.global_data) { 4090 + pr_warn("kernel doesn't support global data\n"); 4091 + return -ENOTSUP; 4092 + } 4093 + if (!obj->caps.array_mmap) 4094 + m->def.map_flags ^= BPF_F_MMAPABLE; 4095 + } 4096 + 4097 + return 0; 4098 + } 4099 + 4100 + static int bpf_object__resolve_externs(struct bpf_object *obj, 4101 + const char *extra_kconfig) 4102 + { 4103 + bool need_config = false; 4104 + struct extern_desc *ext; 4105 + int err, i; 4106 + void *data; 4107 + 4108 + if (obj->nr_extern == 0) 4109 + return 0; 4110 + 4111 + data = obj->maps[obj->kconfig_map_idx].mmaped; 4112 + 4113 + for (i = 0; i < obj->nr_extern; i++) { 4114 + ext = &obj->externs[i]; 4115 + 4116 + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { 4117 + void *ext_val = data + ext->data_off; 4118 + __u32 kver = get_kernel_version(); 4119 + 4120 + if (!kver) { 4121 + pr_warn("failed to get kernel version\n"); 4122 + return -EINVAL; 4123 + } 4124 + err = set_ext_value_num(ext, ext_val, kver); 4125 + if (err) 4126 + return err; 4127 + pr_debug("extern %s=0x%x\n", ext->name, kver); 4128 + } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { 4129 + need_config = true; 4130 + } else { 4131 + pr_warn("unrecognized extern '%s'\n", ext->name); 4132 + return -EINVAL; 4133 + } 4134 + } 4135 + if (need_config && extra_kconfig) { 4136 + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); 4137 + if (err) 4138 + return -EINVAL; 4139 + need_config = false; 4140 + for (i = 0; i < obj->nr_extern; i++) { 4141 + ext = &obj->externs[i]; 4142 + if (!ext->is_set) { 4143 + need_config = true; 4144 + break; 4145 + } 4146 + } 4147 + } 4148 + if (need_config) { 4149 + err = bpf_object__read_kconfig_file(obj, data); 4150 + if (err) 4151 + return -EINVAL; 4152 + } 4153 + for (i = 0; i < obj->nr_extern; i++) { 4154 + ext = &obj->externs[i]; 4155 + 4156 + if (!ext->is_set && !ext->is_weak) { 4157 + pr_warn("extern %s (strong) not resolved\n", ext->name); 4158 + return -ESRCH; 4159 + } else if (!ext->is_set) { 4160 + pr_debug("extern %s (weak) not resolved, defaulting to zero\n", 4161 + ext->name); 4162 + } 4163 + } 4164 + 4165 + return 0; 4166 + } 4167 + 4786 4168 int bpf_object__load_xattr(struct bpf_object_load_attr *attr) 4787 4169 { 4788 4170 struct bpf_object *obj; ··· 4887 4097 4888 4098 obj->loaded = true; 4889 4099 4890 - CHECK_ERR(bpf_object__create_maps(obj), err, out); 4891 - CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); 4892 - CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); 4100 + err = bpf_object__probe_caps(obj); 4101 + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); 4102 + err = err ? : bpf_object__sanitize_and_load_btf(obj); 4103 + err = err ? : bpf_object__sanitize_maps(obj); 4104 + err = err ? : bpf_object__create_maps(obj); 4105 + err = err ? : bpf_object__relocate(obj, attr->target_btf_path); 4106 + err = err ? : bpf_object__load_progs(obj, attr->log_level); 4107 + if (err) 4108 + goto out; 4893 4109 4894 4110 return 0; 4895 4111 out: ··· 5466 4670 btf_ext__free(obj->btf_ext); 5467 4671 5468 4672 for (i = 0; i < obj->nr_maps; i++) { 5469 - zfree(&obj->maps[i].name); 5470 - zfree(&obj->maps[i].pin_path); 5471 - if (obj->maps[i].clear_priv) 5472 - obj->maps[i].clear_priv(&obj->maps[i], 5473 - obj->maps[i].priv); 5474 - obj->maps[i].priv = NULL; 5475 - obj->maps[i].clear_priv = NULL; 4673 + struct bpf_map *map = &obj->maps[i]; 4674 + 4675 + if (map->clear_priv) 4676 + map->clear_priv(map, map->priv); 4677 + map->priv = NULL; 4678 + map->clear_priv = NULL; 4679 + 4680 + if (map->mmaped) { 4681 + munmap(map->mmaped, bpf_map_mmap_sz(map)); 4682 + map->mmaped = NULL; 4683 + } 4684 + 4685 + zfree(&map->name); 4686 + zfree(&map->pin_path); 5476 4687 } 5477 4688 5478 - zfree(&obj->sections.rodata); 5479 - zfree(&obj->sections.data); 4689 + zfree(&obj->kconfig); 4690 + zfree(&obj->externs); 4691 + obj->nr_extern = 0; 4692 + 5480 4693 zfree(&obj->maps); 5481 4694 obj->nr_maps = 0; 5482 4695 ··· 5623 4818 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) 5624 4819 { 5625 4820 prog->prog_ifindex = ifindex; 4821 + } 4822 + 4823 + const char *bpf_program__name(const struct bpf_program *prog) 4824 + { 4825 + return prog->name; 5626 4826 } 5627 4827 5628 4828 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) ··· 5782 4972 */ 5783 4973 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) 5784 4974 5785 - static const struct { 4975 + #define SEC_DEF(sec_pfx, ptype, ...) { \ 4976 + .sec = sec_pfx, \ 4977 + .len = sizeof(sec_pfx) - 1, \ 4978 + .prog_type = BPF_PROG_TYPE_##ptype, \ 4979 + __VA_ARGS__ \ 4980 + } 4981 + 4982 + struct bpf_sec_def; 4983 + 4984 + typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, 4985 + struct bpf_program *prog); 4986 + 4987 + static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, 4988 + struct bpf_program *prog); 4989 + static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, 4990 + struct bpf_program *prog); 4991 + static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, 4992 + struct bpf_program *prog); 4993 + static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, 4994 + struct bpf_program *prog); 4995 + 4996 + struct bpf_sec_def { 5786 4997 const char *sec; 5787 4998 size_t len; 5788 4999 enum bpf_prog_type prog_type; ··· 5811 4980 bool is_attachable; 5812 4981 bool is_attach_btf; 5813 4982 enum bpf_attach_type attach_type; 5814 - } section_names[] = { 4983 + attach_fn_t attach_fn; 4984 + }; 4985 + 4986 + static const struct bpf_sec_def section_defs[] = { 5815 4987 BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), 5816 - BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), 4988 + BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT), 4989 + SEC_DEF("kprobe/", KPROBE, 4990 + .attach_fn = attach_kprobe), 5817 4991 BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), 5818 - BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), 4992 + SEC_DEF("kretprobe/", KPROBE, 4993 + .attach_fn = attach_kprobe), 5819 4994 BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), 5820 4995 BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), 5821 4996 BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), 5822 - BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), 5823 - BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), 5824 - BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), 5825 - BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), 5826 - BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, 5827 - BPF_TRACE_RAW_TP), 5828 - BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, 5829 - BPF_TRACE_FENTRY), 5830 - BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, 5831 - BPF_TRACE_FEXIT), 4997 + SEC_DEF("tracepoint/", TRACEPOINT, 4998 + .attach_fn = attach_tp), 4999 + SEC_DEF("tp/", TRACEPOINT, 5000 + .attach_fn = attach_tp), 5001 + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, 5002 + .attach_fn = attach_raw_tp), 5003 + SEC_DEF("raw_tp/", RAW_TRACEPOINT, 5004 + .attach_fn = attach_raw_tp), 5005 + SEC_DEF("tp_btf/", TRACING, 5006 + .expected_attach_type = BPF_TRACE_RAW_TP, 5007 + .is_attach_btf = true, 5008 + .attach_fn = attach_trace), 5009 + SEC_DEF("fentry/", TRACING, 5010 + .expected_attach_type = BPF_TRACE_FENTRY, 5011 + .is_attach_btf = true, 5012 + .attach_fn = attach_trace), 5013 + SEC_DEF("fexit/", TRACING, 5014 + .expected_attach_type = BPF_TRACE_FEXIT, 5015 + .is_attach_btf = true, 5016 + .attach_fn = attach_trace), 5832 5017 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 5833 5018 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), 5834 5019 BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), ··· 5906 5059 #undef BPF_APROG_SEC 5907 5060 #undef BPF_EAPROG_SEC 5908 5061 #undef BPF_APROG_COMPAT 5062 + #undef SEC_DEF 5909 5063 5910 5064 #define MAX_TYPE_NAME_SIZE 32 5911 5065 5066 + static const struct bpf_sec_def *find_sec_def(const char *sec_name) 5067 + { 5068 + int i, n = ARRAY_SIZE(section_defs); 5069 + 5070 + for (i = 0; i < n; i++) { 5071 + if (strncmp(sec_name, 5072 + section_defs[i].sec, section_defs[i].len)) 5073 + continue; 5074 + return &section_defs[i]; 5075 + } 5076 + return NULL; 5077 + } 5078 + 5912 5079 static char *libbpf_get_type_names(bool attach_type) 5913 5080 { 5914 - int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; 5081 + int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; 5915 5082 char *buf; 5916 5083 5917 5084 buf = malloc(len); ··· 5934 5073 5935 5074 buf[0] = '\0'; 5936 5075 /* Forge string buf with all available names */ 5937 - for (i = 0; i < ARRAY_SIZE(section_names); i++) { 5938 - if (attach_type && !section_names[i].is_attachable) 5076 + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 5077 + if (attach_type && !section_defs[i].is_attachable) 5939 5078 continue; 5940 5079 5941 - if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { 5080 + if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { 5942 5081 free(buf); 5943 5082 return NULL; 5944 5083 } 5945 5084 strcat(buf, " "); 5946 - strcat(buf, section_names[i].sec); 5085 + strcat(buf, section_defs[i].sec); 5947 5086 } 5948 5087 5949 5088 return buf; ··· 5952 5091 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 5953 5092 enum bpf_attach_type *expected_attach_type) 5954 5093 { 5094 + const struct bpf_sec_def *sec_def; 5955 5095 char *type_names; 5956 - int i; 5957 5096 5958 5097 if (!name) 5959 5098 return -EINVAL; 5960 5099 5961 - for (i = 0; i < ARRAY_SIZE(section_names); i++) { 5962 - if (strncmp(name, section_names[i].sec, section_names[i].len)) 5963 - continue; 5964 - *prog_type = section_names[i].prog_type; 5965 - *expected_attach_type = section_names[i].expected_attach_type; 5100 + sec_def = find_sec_def(name); 5101 + if (sec_def) { 5102 + *prog_type = sec_def->prog_type; 5103 + *expected_attach_type = sec_def->expected_attach_type; 5966 5104 return 0; 5967 5105 } 5968 - pr_warn("failed to guess program type from ELF section '%s'\n", name); 5106 + 5107 + pr_debug("failed to guess program type from ELF section '%s'\n", name); 5969 5108 type_names = libbpf_get_type_names(false); 5970 5109 if (type_names != NULL) { 5971 - pr_info("supported section(type) names are:%s\n", type_names); 5110 + pr_debug("supported section(type) names are:%s\n", type_names); 5972 5111 free(type_names); 5973 5112 } 5974 5113 ··· 6047 5186 if (!name) 6048 5187 return -EINVAL; 6049 5188 6050 - for (i = 0; i < ARRAY_SIZE(section_names); i++) { 6051 - if (!section_names[i].is_attach_btf) 5189 + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 5190 + if (!section_defs[i].is_attach_btf) 6052 5191 continue; 6053 - if (strncmp(name, section_names[i].sec, section_names[i].len)) 5192 + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) 6054 5193 continue; 6055 5194 if (attach_prog_fd) 6056 - err = libbpf_find_prog_btf_id(name + section_names[i].len, 5195 + err = libbpf_find_prog_btf_id(name + section_defs[i].len, 6057 5196 attach_prog_fd); 6058 5197 else 6059 - err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, 5198 + err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len, 6060 5199 attach_type); 6061 5200 if (err <= 0) 6062 5201 pr_warn("%s is not found in vmlinux BTF\n", name); ··· 6075 5214 if (!name) 6076 5215 return -EINVAL; 6077 5216 6078 - for (i = 0; i < ARRAY_SIZE(section_names); i++) { 6079 - if (strncmp(name, section_names[i].sec, section_names[i].len)) 5217 + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { 5218 + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) 6080 5219 continue; 6081 - if (!section_names[i].is_attachable) 5220 + if (!section_defs[i].is_attachable) 6082 5221 return -EINVAL; 6083 - *attach_type = section_names[i].attach_type; 5222 + *attach_type = section_defs[i].attach_type; 6084 5223 return 0; 6085 5224 } 6086 - pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); 5225 + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); 6087 5226 type_names = libbpf_get_type_names(true); 6088 5227 if (type_names != NULL) { 6089 - pr_info("attachable section(type) names are:%s\n", type_names); 5228 + pr_debug("attachable section(type) names are:%s\n", type_names); 6090 5229 free(type_names); 6091 5230 } 6092 5231 ··· 6327 5466 } 6328 5467 6329 5468 struct bpf_link { 5469 + int (*detach)(struct bpf_link *link); 6330 5470 int (*destroy)(struct bpf_link *link); 5471 + bool disconnected; 6331 5472 }; 5473 + 5474 + /* Release "ownership" of underlying BPF resource (typically, BPF program 5475 + * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected 5476 + * link, when destructed through bpf_link__destroy() call won't attempt to 5477 + * detach/unregisted that BPF resource. This is useful in situations where, 5478 + * say, attached BPF program has to outlive userspace program that attached it 5479 + * in the system. Depending on type of BPF program, though, there might be 5480 + * additional steps (like pinning BPF program in BPF FS) necessary to ensure 5481 + * exit of userspace program doesn't trigger automatic detachment and clean up 5482 + * inside the kernel. 5483 + */ 5484 + void bpf_link__disconnect(struct bpf_link *link) 5485 + { 5486 + link->disconnected = true; 5487 + } 6332 5488 6333 5489 int bpf_link__destroy(struct bpf_link *link) 6334 5490 { 6335 - int err; 5491 + int err = 0; 6336 5492 6337 5493 if (!link) 6338 5494 return 0; 6339 5495 6340 - err = link->destroy(link); 5496 + if (!link->disconnected && link->detach) 5497 + err = link->detach(link); 5498 + if (link->destroy) 5499 + link->destroy(link); 6341 5500 free(link); 6342 5501 6343 5502 return err; ··· 6368 5487 int fd; /* hook FD */ 6369 5488 }; 6370 5489 6371 - static int bpf_link__destroy_perf_event(struct bpf_link *link) 5490 + static int bpf_link__detach_perf_event(struct bpf_link *link) 6372 5491 { 6373 5492 struct bpf_link_fd *l = (void *)link; 6374 5493 int err; ··· 6400 5519 return ERR_PTR(-EINVAL); 6401 5520 } 6402 5521 6403 - link = malloc(sizeof(*link)); 5522 + link = calloc(1, sizeof(*link)); 6404 5523 if (!link) 6405 5524 return ERR_PTR(-ENOMEM); 6406 - link->link.destroy = &bpf_link__destroy_perf_event; 5525 + link->link.detach = &bpf_link__detach_perf_event; 6407 5526 link->fd = pfd; 6408 5527 6409 5528 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { ··· 6560 5679 return link; 6561 5680 } 6562 5681 5682 + static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, 5683 + struct bpf_program *prog) 5684 + { 5685 + const char *func_name; 5686 + bool retprobe; 5687 + 5688 + func_name = bpf_program__title(prog, false) + sec->len; 5689 + retprobe = strcmp(sec->sec, "kretprobe/") == 0; 5690 + 5691 + return bpf_program__attach_kprobe(prog, retprobe, func_name); 5692 + } 5693 + 6563 5694 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, 6564 5695 bool retprobe, pid_t pid, 6565 5696 const char *binary_path, ··· 6684 5791 return link; 6685 5792 } 6686 5793 6687 - static int bpf_link__destroy_fd(struct bpf_link *link) 5794 + static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, 5795 + struct bpf_program *prog) 5796 + { 5797 + char *sec_name, *tp_cat, *tp_name; 5798 + struct bpf_link *link; 5799 + 5800 + sec_name = strdup(bpf_program__title(prog, false)); 5801 + if (!sec_name) 5802 + return ERR_PTR(-ENOMEM); 5803 + 5804 + /* extract "tp/<category>/<name>" */ 5805 + tp_cat = sec_name + sec->len; 5806 + tp_name = strchr(tp_cat, '/'); 5807 + if (!tp_name) { 5808 + link = ERR_PTR(-EINVAL); 5809 + goto out; 5810 + } 5811 + *tp_name = '\0'; 5812 + tp_name++; 5813 + 5814 + link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); 5815 + out: 5816 + free(sec_name); 5817 + return link; 5818 + } 5819 + 5820 + static int bpf_link__detach_fd(struct bpf_link *link) 6688 5821 { 6689 5822 struct bpf_link_fd *l = (void *)link; 6690 5823 ··· 6731 5812 return ERR_PTR(-EINVAL); 6732 5813 } 6733 5814 6734 - link = malloc(sizeof(*link)); 5815 + link = calloc(1, sizeof(*link)); 6735 5816 if (!link) 6736 5817 return ERR_PTR(-ENOMEM); 6737 - link->link.destroy = &bpf_link__destroy_fd; 5818 + link->link.detach = &bpf_link__detach_fd; 6738 5819 6739 5820 pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); 6740 5821 if (pfd < 0) { ··· 6747 5828 } 6748 5829 link->fd = pfd; 6749 5830 return (struct bpf_link *)link; 5831 + } 5832 + 5833 + static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, 5834 + struct bpf_program *prog) 5835 + { 5836 + const char *tp_name = bpf_program__title(prog, false) + sec->len; 5837 + 5838 + return bpf_program__attach_raw_tracepoint(prog, tp_name); 6750 5839 } 6751 5840 6752 5841 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) ··· 6770 5843 return ERR_PTR(-EINVAL); 6771 5844 } 6772 5845 6773 - link = malloc(sizeof(*link)); 5846 + link = calloc(1, sizeof(*link)); 6774 5847 if (!link) 6775 5848 return ERR_PTR(-ENOMEM); 6776 - link->link.destroy = &bpf_link__destroy_fd; 5849 + link->link.detach = &bpf_link__detach_fd; 6777 5850 6778 5851 pfd = bpf_raw_tracepoint_open(NULL, prog_fd); 6779 5852 if (pfd < 0) { ··· 6786 5859 } 6787 5860 link->fd = pfd; 6788 5861 return (struct bpf_link *)link; 5862 + } 5863 + 5864 + static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, 5865 + struct bpf_program *prog) 5866 + { 5867 + return bpf_program__attach_trace(prog); 5868 + } 5869 + 5870 + struct bpf_link *bpf_program__attach(struct bpf_program *prog) 5871 + { 5872 + const struct bpf_sec_def *sec_def; 5873 + 5874 + sec_def = find_sec_def(bpf_program__title(prog, false)); 5875 + if (!sec_def || !sec_def->attach_fn) 5876 + return ERR_PTR(-ESRCH); 5877 + 5878 + return sec_def->attach_fn(sec_def, prog); 6789 5879 } 6790 5880 6791 5881 enum bpf_perf_event_ret ··· 6888 5944 size_t mmap_size; 6889 5945 struct perf_cpu_buf **cpu_bufs; 6890 5946 struct epoll_event *events; 6891 - int cpu_cnt; 5947 + int cpu_cnt; /* number of allocated CPU buffers */ 6892 5948 int epoll_fd; /* perf event FD */ 6893 5949 int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ 6894 5950 }; ··· 7022 6078 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, 7023 6079 struct perf_buffer_params *p) 7024 6080 { 6081 + const char *online_cpus_file = "/sys/devices/system/cpu/online"; 7025 6082 struct bpf_map_info map = {}; 7026 6083 char msg[STRERR_BUFSIZE]; 7027 6084 struct perf_buffer *pb; 6085 + bool *online = NULL; 7028 6086 __u32 map_info_len; 7029 - int err, i; 6087 + int err, i, j, n; 7030 6088 7031 6089 if (page_cnt & (page_cnt - 1)) { 7032 6090 pr_warn("page count should be power of two, but is %zu\n", ··· 7097 6151 goto error; 7098 6152 } 7099 6153 7100 - for (i = 0; i < pb->cpu_cnt; i++) { 6154 + err = parse_cpu_mask_file(online_cpus_file, &online, &n); 6155 + if (err) { 6156 + pr_warn("failed to get online CPU mask: %d\n", err); 6157 + goto error; 6158 + } 6159 + 6160 + for (i = 0, j = 0; i < pb->cpu_cnt; i++) { 7101 6161 struct perf_cpu_buf *cpu_buf; 7102 6162 int cpu, map_key; 7103 6163 7104 6164 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; 7105 6165 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; 6166 + 6167 + /* in case user didn't explicitly requested particular CPUs to 6168 + * be attached to, skip offline/not present CPUs 6169 + */ 6170 + if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) 6171 + continue; 7106 6172 7107 6173 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); 7108 6174 if (IS_ERR(cpu_buf)) { ··· 7122 6164 goto error; 7123 6165 } 7124 6166 7125 - pb->cpu_bufs[i] = cpu_buf; 6167 + pb->cpu_bufs[j] = cpu_buf; 7126 6168 7127 6169 err = bpf_map_update_elem(pb->map_fd, &map_key, 7128 6170 &cpu_buf->fd, 0); ··· 7134 6176 goto error; 7135 6177 } 7136 6178 7137 - pb->events[i].events = EPOLLIN; 7138 - pb->events[i].data.ptr = cpu_buf; 6179 + pb->events[j].events = EPOLLIN; 6180 + pb->events[j].data.ptr = cpu_buf; 7139 6181 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, 7140 - &pb->events[i]) < 0) { 6182 + &pb->events[j]) < 0) { 7141 6183 err = -errno; 7142 6184 pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", 7143 6185 cpu, cpu_buf->fd, 7144 6186 libbpf_strerror_r(err, msg, sizeof(msg))); 7145 6187 goto error; 7146 6188 } 6189 + j++; 7147 6190 } 6191 + pb->cpu_cnt = j; 6192 + free(online); 7148 6193 7149 6194 return pb; 7150 6195 7151 6196 error: 6197 + free(online); 7152 6198 if (pb) 7153 6199 perf_buffer__free(pb); 7154 6200 return ERR_PTR(err); ··· 7483 6521 } 7484 6522 } 7485 6523 6524 + int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) 6525 + { 6526 + int err = 0, n, len, start, end = -1; 6527 + bool *tmp; 6528 + 6529 + *mask = NULL; 6530 + *mask_sz = 0; 6531 + 6532 + /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 6533 + while (*s) { 6534 + if (*s == ',' || *s == '\n') { 6535 + s++; 6536 + continue; 6537 + } 6538 + n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); 6539 + if (n <= 0 || n > 2) { 6540 + pr_warn("Failed to get CPU range %s: %d\n", s, n); 6541 + err = -EINVAL; 6542 + goto cleanup; 6543 + } else if (n == 1) { 6544 + end = start; 6545 + } 6546 + if (start < 0 || start > end) { 6547 + pr_warn("Invalid CPU range [%d,%d] in %s\n", 6548 + start, end, s); 6549 + err = -EINVAL; 6550 + goto cleanup; 6551 + } 6552 + tmp = realloc(*mask, end + 1); 6553 + if (!tmp) { 6554 + err = -ENOMEM; 6555 + goto cleanup; 6556 + } 6557 + *mask = tmp; 6558 + memset(tmp + *mask_sz, 0, start - *mask_sz); 6559 + memset(tmp + start, 1, end - start + 1); 6560 + *mask_sz = end + 1; 6561 + s += len; 6562 + } 6563 + if (!*mask_sz) { 6564 + pr_warn("Empty CPU range\n"); 6565 + return -EINVAL; 6566 + } 6567 + return 0; 6568 + cleanup: 6569 + free(*mask); 6570 + *mask = NULL; 6571 + return err; 6572 + } 6573 + 6574 + int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) 6575 + { 6576 + int fd, err = 0, len; 6577 + char buf[128]; 6578 + 6579 + fd = open(fcpu, O_RDONLY); 6580 + if (fd < 0) { 6581 + err = -errno; 6582 + pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); 6583 + return err; 6584 + } 6585 + len = read(fd, buf, sizeof(buf)); 6586 + close(fd); 6587 + if (len <= 0) { 6588 + err = len ? -errno : -EINVAL; 6589 + pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); 6590 + return err; 6591 + } 6592 + if (len >= sizeof(buf)) { 6593 + pr_warn("CPU mask is too big in file %s\n", fcpu); 6594 + return -E2BIG; 6595 + } 6596 + buf[len] = '\0'; 6597 + 6598 + return parse_cpu_mask_str(buf, mask, mask_sz); 6599 + } 6600 + 7486 6601 int libbpf_num_possible_cpus(void) 7487 6602 { 7488 6603 static const char *fcpu = "/sys/devices/system/cpu/possible"; 7489 - int len = 0, n = 0, il = 0, ir = 0; 7490 - unsigned int start = 0, end = 0; 7491 - int tmp_cpus = 0; 7492 6604 static int cpus; 7493 - char buf[128]; 7494 - int error = 0; 7495 - int fd = -1; 6605 + int err, n, i, tmp_cpus; 6606 + bool *mask; 7496 6607 7497 6608 tmp_cpus = READ_ONCE(cpus); 7498 6609 if (tmp_cpus > 0) 7499 6610 return tmp_cpus; 7500 6611 7501 - fd = open(fcpu, O_RDONLY); 7502 - if (fd < 0) { 7503 - error = errno; 7504 - pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); 7505 - return -error; 7506 - } 7507 - len = read(fd, buf, sizeof(buf)); 7508 - close(fd); 7509 - if (len <= 0) { 7510 - error = len ? errno : EINVAL; 7511 - pr_warn("Failed to read # of possible cpus from %s: %s\n", 7512 - fcpu, strerror(error)); 7513 - return -error; 7514 - } 7515 - if (len == sizeof(buf)) { 7516 - pr_warn("File %s size overflow\n", fcpu); 7517 - return -EOVERFLOW; 7518 - } 7519 - buf[len] = '\0'; 6612 + err = parse_cpu_mask_file(fcpu, &mask, &n); 6613 + if (err) 6614 + return err; 7520 6615 7521 - for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { 7522 - /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ 7523 - if (buf[ir] == ',' || buf[ir] == '\0') { 7524 - buf[ir] = '\0'; 7525 - n = sscanf(&buf[il], "%u-%u", &start, &end); 7526 - if (n <= 0) { 7527 - pr_warn("Failed to get # CPUs from %s\n", 7528 - &buf[il]); 7529 - return -EINVAL; 7530 - } else if (n == 1) { 7531 - end = start; 7532 - } 7533 - tmp_cpus += end - start + 1; 7534 - il = ir + 1; 7535 - } 6616 + tmp_cpus = 0; 6617 + for (i = 0; i < n; i++) { 6618 + if (mask[i]) 6619 + tmp_cpus++; 7536 6620 } 7537 - if (tmp_cpus <= 0) { 7538 - pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); 7539 - return -EINVAL; 7540 - } 6621 + free(mask); 7541 6622 7542 6623 WRITE_ONCE(cpus, tmp_cpus); 7543 6624 return tmp_cpus; 6625 + } 6626 + 6627 + int bpf_object__open_skeleton(struct bpf_object_skeleton *s, 6628 + const struct bpf_object_open_opts *opts) 6629 + { 6630 + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, 6631 + .object_name = s->name, 6632 + ); 6633 + struct bpf_object *obj; 6634 + int i; 6635 + 6636 + /* Attempt to preserve opts->object_name, unless overriden by user 6637 + * explicitly. Overwriting object name for skeletons is discouraged, 6638 + * as it breaks global data maps, because they contain object name 6639 + * prefix as their own map name prefix. When skeleton is generated, 6640 + * bpftool is making an assumption that this name will stay the same. 6641 + */ 6642 + if (opts) { 6643 + memcpy(&skel_opts, opts, sizeof(*opts)); 6644 + if (!opts->object_name) 6645 + skel_opts.object_name = s->name; 6646 + } 6647 + 6648 + obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); 6649 + if (IS_ERR(obj)) { 6650 + pr_warn("failed to initialize skeleton BPF object '%s': %ld\n", 6651 + s->name, PTR_ERR(obj)); 6652 + return PTR_ERR(obj); 6653 + } 6654 + 6655 + *s->obj = obj; 6656 + 6657 + for (i = 0; i < s->map_cnt; i++) { 6658 + struct bpf_map **map = s->maps[i].map; 6659 + const char *name = s->maps[i].name; 6660 + void **mmaped = s->maps[i].mmaped; 6661 + 6662 + *map = bpf_object__find_map_by_name(obj, name); 6663 + if (!*map) { 6664 + pr_warn("failed to find skeleton map '%s'\n", name); 6665 + return -ESRCH; 6666 + } 6667 + 6668 + /* externs shouldn't be pre-setup from user code */ 6669 + if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) 6670 + *mmaped = (*map)->mmaped; 6671 + } 6672 + 6673 + for (i = 0; i < s->prog_cnt; i++) { 6674 + struct bpf_program **prog = s->progs[i].prog; 6675 + const char *name = s->progs[i].name; 6676 + 6677 + *prog = bpf_object__find_program_by_name(obj, name); 6678 + if (!*prog) { 6679 + pr_warn("failed to find skeleton program '%s'\n", name); 6680 + return -ESRCH; 6681 + } 6682 + } 6683 + 6684 + return 0; 6685 + } 6686 + 6687 + int bpf_object__load_skeleton(struct bpf_object_skeleton *s) 6688 + { 6689 + int i, err; 6690 + 6691 + err = bpf_object__load(*s->obj); 6692 + if (err) { 6693 + pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); 6694 + return err; 6695 + } 6696 + 6697 + for (i = 0; i < s->map_cnt; i++) { 6698 + struct bpf_map *map = *s->maps[i].map; 6699 + size_t mmap_sz = bpf_map_mmap_sz(map); 6700 + int prot, map_fd = bpf_map__fd(map); 6701 + void **mmaped = s->maps[i].mmaped; 6702 + 6703 + if (!mmaped) 6704 + continue; 6705 + 6706 + if (!(map->def.map_flags & BPF_F_MMAPABLE)) { 6707 + *mmaped = NULL; 6708 + continue; 6709 + } 6710 + 6711 + if (map->def.map_flags & BPF_F_RDONLY_PROG) 6712 + prot = PROT_READ; 6713 + else 6714 + prot = PROT_READ | PROT_WRITE; 6715 + 6716 + /* Remap anonymous mmap()-ed "map initialization image" as 6717 + * a BPF map-backed mmap()-ed memory, but preserving the same 6718 + * memory address. This will cause kernel to change process' 6719 + * page table to point to a different piece of kernel memory, 6720 + * but from userspace point of view memory address (and its 6721 + * contents, being identical at this point) will stay the 6722 + * same. This mapping will be released by bpf_object__close() 6723 + * as per normal clean up procedure, so we don't need to worry 6724 + * about it from skeleton's clean up perspective. 6725 + */ 6726 + *mmaped = mmap(map->mmaped, mmap_sz, prot, 6727 + MAP_SHARED | MAP_FIXED, map_fd, 0); 6728 + if (*mmaped == MAP_FAILED) { 6729 + err = -errno; 6730 + *mmaped = NULL; 6731 + pr_warn("failed to re-mmap() map '%s': %d\n", 6732 + bpf_map__name(map), err); 6733 + return err; 6734 + } 6735 + } 6736 + 6737 + return 0; 6738 + } 6739 + 6740 + int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) 6741 + { 6742 + int i; 6743 + 6744 + for (i = 0; i < s->prog_cnt; i++) { 6745 + struct bpf_program *prog = *s->progs[i].prog; 6746 + struct bpf_link **link = s->progs[i].link; 6747 + const struct bpf_sec_def *sec_def; 6748 + const char *sec_name = bpf_program__title(prog, false); 6749 + 6750 + sec_def = find_sec_def(sec_name); 6751 + if (!sec_def || !sec_def->attach_fn) 6752 + continue; 6753 + 6754 + *link = sec_def->attach_fn(sec_def, prog); 6755 + if (IS_ERR(*link)) { 6756 + pr_warn("failed to auto-attach program '%s': %ld\n", 6757 + bpf_program__name(prog), PTR_ERR(*link)); 6758 + return PTR_ERR(*link); 6759 + } 6760 + } 6761 + 6762 + return 0; 6763 + } 6764 + 6765 + void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) 6766 + { 6767 + int i; 6768 + 6769 + for (i = 0; i < s->prog_cnt; i++) { 6770 + struct bpf_link **link = s->progs[i].link; 6771 + 6772 + if (!IS_ERR_OR_NULL(*link)) 6773 + bpf_link__destroy(*link); 6774 + *link = NULL; 6775 + } 6776 + } 6777 + 6778 + void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) 6779 + { 6780 + if (s->progs) 6781 + bpf_object__detach_skeleton(s); 6782 + if (s->obj) 6783 + bpf_object__close(*s->obj); 6784 + free(s->maps); 6785 + free(s->progs); 6786 + free(s); 7544 6787 }

+61 -46

tools/lib/bpf/libbpf.h

··· 17 17 #include <sys/types.h> // for size_t 18 18 #include <linux/bpf.h> 19 19 20 + #include "libbpf_common.h" 21 + 20 22 #ifdef __cplusplus 21 23 extern "C" { 22 - #endif 23 - 24 - #ifndef LIBBPF_API 25 - #define LIBBPF_API __attribute__((visibility("default"))) 26 24 #endif 27 25 28 26 enum libbpf_errno { ··· 65 67 enum bpf_prog_type prog_type; 66 68 }; 67 69 68 - /* Helper macro to declare and initialize libbpf options struct 69 - * 70 - * This dance with uninitialized declaration, followed by memset to zero, 71 - * followed by assignment using compound literal syntax is done to preserve 72 - * ability to use a nice struct field initialization syntax and **hopefully** 73 - * have all the padding bytes initialized to zero. It's not guaranteed though, 74 - * when copying literal, that compiler won't copy garbage in literal's padding 75 - * bytes, but that's the best way I've found and it seems to work in practice. 76 - * 77 - * Macro declares opts struct of given type and name, zero-initializes, 78 - * including any extra padding, it with memset() and then assigns initial 79 - * values provided by users in struct initializer-syntax as varargs. 80 - */ 81 - #define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ 82 - struct TYPE NAME = ({ \ 83 - memset(&NAME, 0, sizeof(struct TYPE)); \ 84 - (struct TYPE) { \ 85 - .sz = sizeof(struct TYPE), \ 86 - __VA_ARGS__ \ 87 - }; \ 88 - }) 89 - 90 70 struct bpf_object_open_opts { 91 71 /* size of this struct, for forward/backward compatiblity */ 92 72 size_t sz; ··· 85 109 */ 86 110 const char *pin_root_path; 87 111 __u32 attach_prog_fd; 112 + /* Additional kernel config content that augments and overrides 113 + * system Kconfig for CONFIG_xxx externs. 114 + */ 115 + const char *kconfig; 88 116 }; 89 - #define bpf_object_open_opts__last_field attach_prog_fd 117 + #define bpf_object_open_opts__last_field kconfig 90 118 91 119 LIBBPF_API struct bpf_object *bpf_object__open(const char *path); 92 120 LIBBPF_API struct bpf_object * 93 - bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); 121 + bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); 94 122 LIBBPF_API struct bpf_object * 95 123 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, 96 - struct bpf_object_open_opts *opts); 124 + const struct bpf_object_open_opts *opts); 97 125 98 126 /* deprecated bpf_object__open variants */ 99 127 LIBBPF_API struct bpf_object * ··· 105 125 const char *name); 106 126 LIBBPF_API struct bpf_object * 107 127 bpf_object__open_xattr(struct bpf_object_open_attr *attr); 108 - 109 - int bpf_object__section_size(const struct bpf_object *obj, const char *name, 110 - __u32 *size); 111 - int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, 112 - __u32 *off); 113 128 114 129 enum libbpf_pin_type { 115 130 LIBBPF_PIN_NONE, ··· 136 161 LIBBPF_API int bpf_object__load(struct bpf_object *obj); 137 162 LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); 138 163 LIBBPF_API int bpf_object__unload(struct bpf_object *obj); 164 + 139 165 LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); 140 166 LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); 141 167 ··· 147 171 LIBBPF_API struct bpf_program * 148 172 bpf_object__find_program_by_title(const struct bpf_object *obj, 149 173 const char *title); 174 + LIBBPF_API struct bpf_program * 175 + bpf_object__find_program_by_name(const struct bpf_object *obj, 176 + const char *name); 150 177 151 178 LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); 152 179 #define bpf_object__for_each_safe(pos, tmp) \ ··· 193 214 LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, 194 215 __u32 ifindex); 195 216 217 + LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); 196 218 LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, 197 219 bool needs_copy); 198 220 ··· 215 235 216 236 struct bpf_link; 217 237 238 + LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); 218 239 LIBBPF_API int bpf_link__destroy(struct bpf_link *link); 219 240 241 + LIBBPF_API struct bpf_link * 242 + bpf_program__attach(struct bpf_program *prog); 220 243 LIBBPF_API struct bpf_link * 221 244 bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); 222 245 LIBBPF_API struct bpf_link * ··· 495 512 void **copy_mem, size_t *copy_size, 496 513 bpf_perf_event_print_t fn, void *private_data); 497 514 498 - struct nlattr; 499 - typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); 500 - int libbpf_netlink_open(unsigned int *nl_pid); 501 - int libbpf_nl_get_link(int sock, unsigned int nl_pid, 502 - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); 503 - int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, 504 - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); 505 - int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, 506 - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); 507 - int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, 508 - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); 509 - 510 515 struct bpf_prog_linfo; 511 516 struct bpf_prog_info; 512 517 ··· 600 629 * 601 630 */ 602 631 LIBBPF_API int libbpf_num_possible_cpus(void); 632 + 633 + struct bpf_map_skeleton { 634 + const char *name; 635 + struct bpf_map **map; 636 + void **mmaped; 637 + }; 638 + 639 + struct bpf_prog_skeleton { 640 + const char *name; 641 + struct bpf_program **prog; 642 + struct bpf_link **link; 643 + }; 644 + 645 + struct bpf_object_skeleton { 646 + size_t sz; /* size of this struct, for forward/backward compatibility */ 647 + 648 + const char *name; 649 + void *data; 650 + size_t data_sz; 651 + 652 + struct bpf_object **obj; 653 + 654 + int map_cnt; 655 + int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ 656 + struct bpf_map_skeleton *maps; 657 + 658 + int prog_cnt; 659 + int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ 660 + struct bpf_prog_skeleton *progs; 661 + }; 662 + 663 + LIBBPF_API int 664 + bpf_object__open_skeleton(struct bpf_object_skeleton *s, 665 + const struct bpf_object_open_opts *opts); 666 + LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s); 667 + LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); 668 + LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); 669 + LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); 670 + 671 + enum libbpf_tristate { 672 + TRI_NO = 0, 673 + TRI_YES = 1, 674 + TRI_MODULE = 2, 675 + }; 603 676 604 677 #ifdef __cplusplus 605 678 } /* extern "C" */

+16

tools/lib/bpf/libbpf.map

··· 208 208 btf__find_by_name_kind; 209 209 libbpf_find_vmlinux_btf_id; 210 210 } LIBBPF_0.0.5; 211 + 212 + LIBBPF_0.0.7 { 213 + global: 214 + btf_dump__emit_type_decl; 215 + bpf_link__disconnect; 216 + bpf_object__find_program_by_name; 217 + bpf_object__attach_skeleton; 218 + bpf_object__destroy_skeleton; 219 + bpf_object__detach_skeleton; 220 + bpf_object__load_skeleton; 221 + bpf_object__open_skeleton; 222 + bpf_prog_attach_xattr; 223 + bpf_program__attach; 224 + bpf_program__name; 225 + btf__align_of; 226 + } LIBBPF_0.0.6;

+1 -1

tools/lib/bpf/libbpf.pc.template

··· 8 8 Description: BPF library 9 9 Version: @VERSION@ 10 10 Libs: -L${libdir} -lbpf 11 - Requires.private: libelf 11 + Requires.private: libelf zlib 12 12 Cflags: -I${includedir}

+40

tools/lib/bpf/libbpf_common.h

··· 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + 3 + /* 4 + * Common user-facing libbpf helpers. 5 + * 6 + * Copyright (c) 2019 Facebook 7 + */ 8 + 9 + #ifndef __LIBBPF_LIBBPF_COMMON_H 10 + #define __LIBBPF_LIBBPF_COMMON_H 11 + 12 + #include <string.h> 13 + 14 + #ifndef LIBBPF_API 15 + #define LIBBPF_API __attribute__((visibility("default"))) 16 + #endif 17 + 18 + /* Helper macro to declare and initialize libbpf options struct 19 + * 20 + * This dance with uninitialized declaration, followed by memset to zero, 21 + * followed by assignment using compound literal syntax is done to preserve 22 + * ability to use a nice struct field initialization syntax and **hopefully** 23 + * have all the padding bytes initialized to zero. It's not guaranteed though, 24 + * when copying literal, that compiler won't copy garbage in literal's padding 25 + * bytes, but that's the best way I've found and it seems to work in practice. 26 + * 27 + * Macro declares opts struct of given type and name, zero-initializes, 28 + * including any extra padding, it with memset() and then assigns initial 29 + * values provided by users in struct initializer-syntax as varargs. 30 + */ 31 + #define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ 32 + struct TYPE NAME = ({ \ 33 + memset(&NAME, 0, sizeof(struct TYPE)); \ 34 + (struct TYPE) { \ 35 + .sz = sizeof(struct TYPE), \ 36 + __VA_ARGS__ \ 37 + }; \ 38 + }) 39 + 40 + #endif /* __LIBBPF_LIBBPF_COMMON_H */

+20 -1

tools/lib/bpf/libbpf_internal.h

··· 76 76 77 77 for (i = opts_sz; i < user_sz; i++) { 78 78 if (opts[i]) { 79 - pr_warn("%s has non-zero extra bytes", 79 + pr_warn("%s has non-zero extra bytes\n", 80 80 type_name); 81 81 return false; 82 82 } ··· 95 95 #define OPTS_GET(opts, field, fallback_value) \ 96 96 (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) 97 97 98 + int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); 99 + int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); 98 100 int libbpf__load_raw_btf(const char *raw_types, size_t types_len, 99 101 const char *str_sec, size_t str_len); 102 + 103 + int bpf_object__section_size(const struct bpf_object *obj, const char *name, 104 + __u32 *size); 105 + int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, 106 + __u32 *off); 107 + 108 + struct nlattr; 109 + typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); 110 + int libbpf_netlink_open(unsigned int *nl_pid); 111 + int libbpf_nl_get_link(int sock, unsigned int nl_pid, 112 + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); 113 + int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, 114 + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); 115 + int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, 116 + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); 117 + int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, 118 + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); 100 119 101 120 struct btf_ext_info { 102 121 /*

+2 -1

tools/testing/selftests/bpf/.gitignore

··· 21 21 get_cgroup_id_user 22 22 test_skb_cgroup_id_user 23 23 test_socket_cookie 24 - test_cgroup_attach 25 24 test_cgroup_storage 26 25 test_select_reuseport 27 26 test_flow_dissector ··· 37 38 test_btf_dump 38 39 xdping 39 40 test_cpp 41 + *.skel.h 40 42 /no_alu32 41 43 /bpf_gcc 44 + /tools

+68 -13

tools/testing/selftests/bpf/Makefile

··· 3 3 include ../../../scripts/Makefile.arch 4 4 5 5 CURDIR := $(abspath .) 6 - LIBDIR := $(abspath ../../../lib) 6 + TOOLSDIR := $(abspath ../../..) 7 + LIBDIR := $(TOOLSDIR)/lib 7 8 BPFDIR := $(LIBDIR)/bpf 8 - TOOLSDIR := $(abspath ../../../include) 9 - APIDIR := $(TOOLSDIR)/uapi 9 + TOOLSINCDIR := $(TOOLSDIR)/include 10 + BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool 11 + APIDIR := $(TOOLSINCDIR)/uapi 10 12 GENDIR := $(abspath ../../../../include/generated) 11 13 GENHDR := $(GENDIR)/autoconf.h 12 14 ··· 21 19 LLVM_OBJCOPY ?= llvm-objcopy 22 20 BPF_GCC ?= $(shell command -v bpf-gcc;) 23 21 CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \ 24 - -I$(GENDIR) -I$(TOOLSDIR) -I$(CURDIR) \ 22 + -I$(GENDIR) -I$(TOOLSINCDIR) -I$(CURDIR) \ 25 23 -Dbpf_prog_load=bpf_prog_test_load \ 26 24 -Dbpf_load_program=bpf_test_load_program 27 - LDLIBS += -lcap -lelf -lrt -lpthread 25 + LDLIBS += -lcap -lelf -lz -lrt -lpthread 28 26 29 27 # Order correspond to 'make run_tests' order 30 28 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ 31 29 test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ 32 30 test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ 33 - test_cgroup_storage test_select_reuseport \ 31 + test_cgroup_storage \ 34 32 test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ 35 - test_cgroup_attach test_progs-no_alu32 33 + test_progs-no_alu32 36 34 37 35 # Also test bpf-gcc, if present 38 36 ifneq ($(BPF_GCC),) ··· 77 75 78 76 TEST_CUSTOM_PROGS = urandom_read 79 77 78 + # Emit succinct information message describing current building step 79 + # $1 - generic step name (e.g., CC, LINK, etc); 80 + # $2 - optional "flavor" specifier; if provided, will be emitted as [flavor]; 81 + # $3 - target (assumed to be file); only file name will be emitted; 82 + # $4 - optional extra arg, emitted as-is, if provided. 83 + ifeq ($(V),1) 84 + msg = 85 + else 86 + msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4)) 87 + endif 88 + 89 + # override lib.mk's default rules 90 + OVERRIDE_TARGETS := 1 91 + override define CLEAN 92 + $(call msg, CLEAN) 93 + $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) 94 + endef 95 + 80 96 include ../lib.mk 81 97 82 98 # Define simple and short `make test_progs`, `make test_sysctl`, etc targets ··· 107 87 $(TEST_GEN_PROGS_EXTENDED) \ 108 88 $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; 109 89 90 + $(OUTPUT)/%:%.c 91 + $(call msg, BINARY,,$@) 92 + $(LINK.c) $^ $(LDLIBS) -o $@ 93 + 110 94 $(OUTPUT)/urandom_read: urandom_read.c 95 + $(call msg, BINARY,,$@) 111 96 $(CC) -o $@ $< -Wl,--build-id 112 97 113 98 $(OUTPUT)/test_stub.o: test_stub.c 99 + $(call msg, CC,,$@) 114 100 $(CC) -c $(CFLAGS) -o $@ $< 115 101 116 102 BPFOBJ := $(OUTPUT)/libbpf.a ··· 136 110 $(OUTPUT)/test_netcnt: cgroup_helpers.c 137 111 $(OUTPUT)/test_sock_fields: cgroup_helpers.c 138 112 $(OUTPUT)/test_sysctl: cgroup_helpers.c 139 - $(OUTPUT)/test_cgroup_attach: cgroup_helpers.c 140 113 141 114 .PHONY: force 142 115 143 116 # force a rebuild of BPFOBJ when its dependencies are updated 144 117 force: 118 + 119 + DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool 120 + BPFTOOL ?= $(DEFAULT_BPFTOOL) 121 + 122 + $(DEFAULT_BPFTOOL): force 123 + $(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install 145 124 146 125 $(BPFOBJ): force 147 126 $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ ··· 190 159 # $3 - CFLAGS 191 160 # $4 - LDFLAGS 192 161 define CLANG_BPF_BUILD_RULE 162 + $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) 193 163 ($(CLANG) $3 -O2 -target bpf -emit-llvm \ 194 164 -c $1 -o - || echo "BPF obj compilation failed") | \ 195 165 $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 196 166 endef 197 167 # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 198 168 define CLANG_NOALU32_BPF_BUILD_RULE 169 + $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) 199 170 ($(CLANG) $3 -O2 -target bpf -emit-llvm \ 200 171 -c $1 -o - || echo "BPF obj compilation failed") | \ 201 172 $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 202 173 endef 203 174 # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC 204 175 define CLANG_NATIVE_BPF_BUILD_RULE 176 + $(call msg, CLANG-BPF,$(TRUNNER_BINARY),$2) 205 177 ($(CLANG) $3 -O2 -emit-llvm \ 206 178 -c $1 -o - || echo "BPF obj compilation failed") | \ 207 179 $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 208 180 endef 209 181 # Build BPF object using GCC 210 182 define GCC_BPF_BUILD_RULE 183 + $(call msg, GCC-BPF,$(TRUNNER_BINARY),$2) 211 184 $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 212 185 endef 186 + 187 + SKEL_BLACKLIST := btf__% test_pinning_invalid.c 213 188 214 189 # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on 215 190 # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. ··· 232 195 $$(filter %.c,$(TRUNNER_EXTRA_SOURCES))) 233 196 TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES)) 234 197 TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h 235 - TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ 236 - $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))) 198 + TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)) 199 + TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)) 200 + TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ 201 + $$(filter-out $(SKEL_BLACKLIST), \ 202 + $$(TRUNNER_BPF_SRCS))) 237 203 238 204 # Evaluate rules now with extra TRUNNER_XXX variables above already defined 239 205 $$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2)) ··· 266 226 $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ 267 227 $(TRUNNER_BPF_CFLAGS), \ 268 228 $(TRUNNER_BPF_LDFLAGS)) 229 + 230 + $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ 231 + $(TRUNNER_OUTPUT)/%.o \ 232 + | $(BPFTOOL) $(TRUNNER_OUTPUT) 233 + $$(call msg, GEN-SKEL,$(TRUNNER_BINARY),$$@) 234 + $$(BPFTOOL) gen skeleton $$< > $$@ 269 235 endif 270 236 271 237 # ensure we set up tests.h header generation rule just once 272 238 ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) 273 239 $(TRUNNER_TESTS_DIR)-tests-hdr := y 274 240 $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c 241 + $$(call msg, TEST-HDR,$(TRUNNER_BINARY),$$@) 275 242 $$(shell ( cd $(TRUNNER_TESTS_DIR); \ 276 243 echo '/* Generated header, do not edit */'; \ 277 244 ls *.c 2> /dev/null | \ ··· 292 245 $(TRUNNER_TESTS_DIR)/%.c \ 293 246 $(TRUNNER_EXTRA_HDRS) \ 294 247 $(TRUNNER_BPF_OBJS) \ 248 + $(TRUNNER_BPF_SKELS) \ 295 249 $$(BPFOBJ) | $(TRUNNER_OUTPUT) 250 + $$(call msg, TEST-OBJ,$(TRUNNER_BINARY),$$@) 296 251 cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) 297 252 298 253 $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ ··· 302 253 $(TRUNNER_EXTRA_HDRS) \ 303 254 $(TRUNNER_TESTS_HDR) \ 304 255 $$(BPFOBJ) | $(TRUNNER_OUTPUT) 256 + $$(call msg, EXTRA-OBJ,$(TRUNNER_BINARY),$$@) 305 257 $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ 306 258 259 + # only copy extra resources if in flavored build 307 260 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) 308 261 ifneq ($2,) 309 - # only copy extra resources if in flavored build 262 + $$(call msg, EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) 310 263 cp -a $$^ $(TRUNNER_OUTPUT)/ 311 264 endif 312 265 313 266 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ 314 267 $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ 315 268 | $(TRUNNER_BINARY)-extras 269 + $$(call msg, BINARY,,$$@) 316 270 $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ 317 271 318 272 endef ··· 367 315 echo '#endif' \ 368 316 ) > verifier/tests.h) 369 317 $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) 318 + $(call msg, BINARY,,$@) 370 319 $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ 371 320 372 321 # Make sure we are able to include and link libbpf against c++. 373 - $(OUTPUT)/test_cpp: test_cpp.cpp $(BPFOBJ) 322 + $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) 323 + $(call msg, CXX,,$@) 374 324 $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ 375 325 376 326 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ 377 327 prog_tests/tests.h map_tests/tests.h verifier/tests.h \ 378 - feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc 328 + feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc \ 329 + tools *.skel.h

+36 -125

tools/testing/selftests/bpf/prog_tests/attach_probe.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 - 4 - #define EMBED_FILE(NAME, PATH) \ 5 - asm ( \ 6 - " .pushsection \".rodata\", \"a\", @progbits \n" \ 7 - " .global "#NAME"_data \n" \ 8 - #NAME"_data: \n" \ 9 - " .incbin \"" PATH "\" \n" \ 10 - #NAME"_data_end: \n" \ 11 - " .global "#NAME"_size \n" \ 12 - " .type "#NAME"_size, @object \n" \ 13 - " .size "#NAME"_size, 4 \n" \ 14 - " .align 4, \n" \ 15 - #NAME"_size: \n" \ 16 - " .int "#NAME"_data_end - "#NAME"_data \n" \ 17 - " .popsection \n" \ 18 - ); \ 19 - extern char NAME##_data[]; \ 20 - extern int NAME##_size; 3 + #include "test_attach_probe.skel.h" 21 4 22 5 ssize_t get_base_addr() { 23 - size_t start; 6 + size_t start, offset; 24 7 char buf[256]; 25 8 FILE *f; 26 9 ··· 11 28 if (!f) 12 29 return -errno; 13 30 14 - while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) { 31 + while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n", 32 + &start, buf, &offset) == 3) { 15 33 if (strcmp(buf, "r-xp") == 0) { 16 34 fclose(f); 17 - return start; 35 + return start - offset; 18 36 } 19 37 } 20 38 ··· 23 39 return -EINVAL; 24 40 } 25 41 26 - EMBED_FILE(probe, "test_attach_probe.o"); 27 - 28 42 void test_attach_probe(void) 29 43 { 30 - const char *kprobe_name = "kprobe/sys_nanosleep"; 31 - const char *kretprobe_name = "kretprobe/sys_nanosleep"; 32 - const char *uprobe_name = "uprobe/trigger_func"; 33 - const char *uretprobe_name = "uretprobe/trigger_func"; 34 - const int kprobe_idx = 0, kretprobe_idx = 1; 35 - const int uprobe_idx = 2, uretprobe_idx = 3; 36 - const char *obj_name = "attach_probe"; 37 - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, 38 - .object_name = obj_name, 39 - .relaxed_maps = true, 40 - ); 41 - struct bpf_program *kprobe_prog, *kretprobe_prog; 42 - struct bpf_program *uprobe_prog, *uretprobe_prog; 43 - struct bpf_object *obj; 44 - int err, duration = 0, res; 45 - struct bpf_link *kprobe_link = NULL; 46 - struct bpf_link *kretprobe_link = NULL; 47 - struct bpf_link *uprobe_link = NULL; 48 - struct bpf_link *uretprobe_link = NULL; 49 - int results_map_fd; 44 + int duration = 0; 45 + struct bpf_link *kprobe_link, *kretprobe_link; 46 + struct bpf_link *uprobe_link, *uretprobe_link; 47 + struct test_attach_probe* skel; 50 48 size_t uprobe_offset; 51 49 ssize_t base_addr; 52 50 ··· 38 72 return; 39 73 uprobe_offset = (size_t)&get_base_addr - base_addr; 40 74 41 - /* open object */ 42 - obj = bpf_object__open_mem(probe_data, probe_size, &open_opts); 43 - if (CHECK(IS_ERR(obj), "obj_open_mem", "err %ld\n", PTR_ERR(obj))) 75 + skel = test_attach_probe__open_and_load(); 76 + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) 44 77 return; 45 - 46 - if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", 47 - "wrong obj name '%s', expected '%s'\n", 48 - bpf_object__name(obj), obj_name)) 78 + if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n")) 49 79 goto cleanup; 50 80 51 - kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); 52 - if (CHECK(!kprobe_prog, "find_probe", 53 - "prog '%s' not found\n", kprobe_name)) 54 - goto cleanup; 55 - kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name); 56 - if (CHECK(!kretprobe_prog, "find_probe", 57 - "prog '%s' not found\n", kretprobe_name)) 58 - goto cleanup; 59 - uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name); 60 - if (CHECK(!uprobe_prog, "find_probe", 61 - "prog '%s' not found\n", uprobe_name)) 62 - goto cleanup; 63 - uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name); 64 - if (CHECK(!uretprobe_prog, "find_probe", 65 - "prog '%s' not found\n", uretprobe_name)) 66 - goto cleanup; 67 - 68 - /* create maps && load programs */ 69 - err = bpf_object__load(obj); 70 - if (CHECK(err, "obj_load", "err %d\n", err)) 71 - goto cleanup; 72 - 73 - /* load maps */ 74 - results_map_fd = bpf_find_map(__func__, obj, "results_map"); 75 - if (CHECK(results_map_fd < 0, "find_results_map", 76 - "err %d\n", results_map_fd)) 77 - goto cleanup; 78 - 79 - kprobe_link = bpf_program__attach_kprobe(kprobe_prog, 81 + kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, 80 82 false /* retprobe */, 81 83 SYS_NANOSLEEP_KPROBE_NAME); 82 84 if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", 83 - "err %ld\n", PTR_ERR(kprobe_link))) { 84 - kprobe_link = NULL; 85 + "err %ld\n", PTR_ERR(kprobe_link))) 85 86 goto cleanup; 86 - } 87 - kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog, 87 + skel->links.handle_kprobe = kprobe_link; 88 + 89 + kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, 88 90 true /* retprobe */, 89 91 SYS_NANOSLEEP_KPROBE_NAME); 90 92 if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe", 91 - "err %ld\n", PTR_ERR(kretprobe_link))) { 92 - kretprobe_link = NULL; 93 + "err %ld\n", PTR_ERR(kretprobe_link))) 93 94 goto cleanup; 94 - } 95 - uprobe_link = bpf_program__attach_uprobe(uprobe_prog, 95 + skel->links.handle_kretprobe = kretprobe_link; 96 + 97 + uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe, 96 98 false /* retprobe */, 97 99 0 /* self pid */, 98 100 "/proc/self/exe", 99 101 uprobe_offset); 100 102 if (CHECK(IS_ERR(uprobe_link), "attach_uprobe", 101 - "err %ld\n", PTR_ERR(uprobe_link))) { 102 - uprobe_link = NULL; 103 + "err %ld\n", PTR_ERR(uprobe_link))) 103 104 goto cleanup; 104 - } 105 - uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog, 105 + skel->links.handle_uprobe = uprobe_link; 106 + 107 + uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe, 106 108 true /* retprobe */, 107 109 -1 /* any pid */, 108 110 "/proc/self/exe", 109 111 uprobe_offset); 110 112 if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe", 111 - "err %ld\n", PTR_ERR(uretprobe_link))) { 112 - uretprobe_link = NULL; 113 + "err %ld\n", PTR_ERR(uretprobe_link))) 113 114 goto cleanup; 114 - } 115 + skel->links.handle_uretprobe = uretprobe_link; 115 116 116 117 /* trigger & validate kprobe && kretprobe */ 117 118 usleep(1); 118 119 119 - err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res); 120 - if (CHECK(err, "get_kprobe_res", 121 - "failed to get kprobe res: %d\n", err)) 120 + if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res", 121 + "wrong kprobe res: %d\n", skel->bss->kprobe_res)) 122 122 goto cleanup; 123 - if (CHECK(res != kprobe_idx + 1, "check_kprobe_res", 124 - "wrong kprobe res: %d\n", res)) 125 - goto cleanup; 126 - 127 - err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res); 128 - if (CHECK(err, "get_kretprobe_res", 129 - "failed to get kretprobe res: %d\n", err)) 130 - goto cleanup; 131 - if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res", 132 - "wrong kretprobe res: %d\n", res)) 123 + if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res", 124 + "wrong kretprobe res: %d\n", skel->bss->kretprobe_res)) 133 125 goto cleanup; 134 126 135 127 /* trigger & validate uprobe & uretprobe */ 136 128 get_base_addr(); 137 129 138 - err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res); 139 - if (CHECK(err, "get_uprobe_res", 140 - "failed to get uprobe res: %d\n", err)) 130 + if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", 131 + "wrong uprobe res: %d\n", skel->bss->uprobe_res)) 141 132 goto cleanup; 142 - if (CHECK(res != uprobe_idx + 1, "check_uprobe_res", 143 - "wrong uprobe res: %d\n", res)) 144 - goto cleanup; 145 - 146 - err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res); 147 - if (CHECK(err, "get_uretprobe_res", 148 - "failed to get uretprobe res: %d\n", err)) 149 - goto cleanup; 150 - if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res", 151 - "wrong uretprobe res: %d\n", res)) 133 + if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res", 134 + "wrong uretprobe res: %d\n", skel->bss->uretprobe_res)) 152 135 goto cleanup; 153 136 154 137 cleanup: 155 - bpf_link__destroy(kprobe_link); 156 - bpf_link__destroy(kretprobe_link); 157 - bpf_link__destroy(uprobe_link); 158 - bpf_link__destroy(uretprobe_link); 159 - bpf_object__close(obj); 138 + test_attach_probe__destroy(skel); 160 139 }

+111

tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <test_progs.h> 4 + 5 + #include "cgroup_helpers.h" 6 + 7 + #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" 8 + 9 + char bpf_log_buf[BPF_LOG_BUF_SIZE]; 10 + 11 + static int prog_load(void) 12 + { 13 + struct bpf_insn prog[] = { 14 + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */ 15 + BPF_EXIT_INSN(), 16 + }; 17 + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); 18 + 19 + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, 20 + prog, insns_cnt, "GPL", 0, 21 + bpf_log_buf, BPF_LOG_BUF_SIZE); 22 + } 23 + 24 + void test_cgroup_attach_autodetach(void) 25 + { 26 + __u32 duration = 0, prog_cnt = 4, attach_flags; 27 + int allow_prog[2] = {-1}; 28 + __u32 prog_ids[2] = {0}; 29 + void *ptr = NULL; 30 + int cg = 0, i; 31 + int attempts; 32 + 33 + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { 34 + allow_prog[i] = prog_load(); 35 + if (CHECK(allow_prog[i] < 0, "prog_load", 36 + "verifier output:\n%s\n-------\n", bpf_log_buf)) 37 + goto err; 38 + } 39 + 40 + if (CHECK_FAIL(setup_cgroup_environment())) 41 + goto err; 42 + 43 + /* create a cgroup, attach two programs and remember their ids */ 44 + cg = create_and_get_cgroup("/cg_autodetach"); 45 + if (CHECK_FAIL(cg < 0)) 46 + goto err; 47 + 48 + if (CHECK_FAIL(join_cgroup("/cg_autodetach"))) 49 + goto err; 50 + 51 + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) 52 + if (CHECK(bpf_prog_attach(allow_prog[i], cg, 53 + BPF_CGROUP_INET_EGRESS, 54 + BPF_F_ALLOW_MULTI), 55 + "prog_attach", "prog[%d], errno=%d\n", i, errno)) 56 + goto err; 57 + 58 + /* make sure that programs are attached and run some traffic */ 59 + if (CHECK(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags, 60 + prog_ids, &prog_cnt), 61 + "prog_query", "errno=%d\n", errno)) 62 + goto err; 63 + if (CHECK_FAIL(system(PING_CMD))) 64 + goto err; 65 + 66 + /* allocate some memory (4Mb) to pin the original cgroup */ 67 + ptr = malloc(4 * (1 << 20)); 68 + if (CHECK_FAIL(!ptr)) 69 + goto err; 70 + 71 + /* close programs and cgroup fd */ 72 + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { 73 + close(allow_prog[i]); 74 + allow_prog[i] = -1; 75 + } 76 + 77 + close(cg); 78 + cg = 0; 79 + 80 + /* leave the cgroup and remove it. don't detach programs */ 81 + cleanup_cgroup_environment(); 82 + 83 + /* wait for the asynchronous auto-detachment. 84 + * wait for no more than 5 sec and give up. 85 + */ 86 + for (i = 0; i < ARRAY_SIZE(prog_ids); i++) { 87 + for (attempts = 5; attempts >= 0; attempts--) { 88 + int fd = bpf_prog_get_fd_by_id(prog_ids[i]); 89 + 90 + if (fd < 0) 91 + break; 92 + 93 + /* don't leave the fd open */ 94 + close(fd); 95 + 96 + if (CHECK_FAIL(!attempts)) 97 + goto err; 98 + 99 + sleep(1); 100 + } 101 + } 102 + 103 + err: 104 + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) 105 + if (allow_prog[i] >= 0) 106 + close(allow_prog[i]); 107 + if (cg) 108 + close(cg); 109 + free(ptr); 110 + cleanup_cgroup_environment(); 111 + }

+285

tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <test_progs.h> 4 + 5 + #include "cgroup_helpers.h" 6 + 7 + #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" 8 + 9 + char bpf_log_buf[BPF_LOG_BUF_SIZE]; 10 + 11 + static int map_fd = -1; 12 + 13 + static int prog_load_cnt(int verdict, int val) 14 + { 15 + int cgroup_storage_fd, percpu_cgroup_storage_fd; 16 + 17 + if (map_fd < 0) 18 + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); 19 + if (map_fd < 0) { 20 + printf("failed to create map '%s'\n", strerror(errno)); 21 + return -1; 22 + } 23 + 24 + cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, 25 + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); 26 + if (cgroup_storage_fd < 0) { 27 + printf("failed to create map '%s'\n", strerror(errno)); 28 + return -1; 29 + } 30 + 31 + percpu_cgroup_storage_fd = bpf_create_map( 32 + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, 33 + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); 34 + if (percpu_cgroup_storage_fd < 0) { 35 + printf("failed to create map '%s'\n", strerror(errno)); 36 + return -1; 37 + } 38 + 39 + struct bpf_insn prog[] = { 40 + BPF_MOV32_IMM(BPF_REG_0, 0), 41 + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ 42 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 43 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ 44 + BPF_LD_MAP_FD(BPF_REG_1, map_fd), 45 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 46 + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), 47 + BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ 48 + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ 49 + 50 + BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), 51 + BPF_MOV64_IMM(BPF_REG_2, 0), 52 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), 53 + BPF_MOV64_IMM(BPF_REG_1, val), 54 + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), 55 + 56 + BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), 57 + BPF_MOV64_IMM(BPF_REG_2, 0), 58 + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), 59 + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), 60 + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), 61 + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), 62 + 63 + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ 64 + BPF_EXIT_INSN(), 65 + }; 66 + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); 67 + int ret; 68 + 69 + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, 70 + prog, insns_cnt, "GPL", 0, 71 + bpf_log_buf, BPF_LOG_BUF_SIZE); 72 + 73 + close(cgroup_storage_fd); 74 + return ret; 75 + } 76 + 77 + void test_cgroup_attach_multi(void) 78 + { 79 + __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; 80 + int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; 81 + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts); 82 + int allow_prog[7] = {-1}; 83 + unsigned long long value; 84 + __u32 duration = 0; 85 + int i = 0; 86 + 87 + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { 88 + allow_prog[i] = prog_load_cnt(1, 1 << i); 89 + if (CHECK(allow_prog[i] < 0, "prog_load", 90 + "verifier output:\n%s\n-------\n", bpf_log_buf)) 91 + goto err; 92 + } 93 + 94 + if (CHECK_FAIL(setup_cgroup_environment())) 95 + goto err; 96 + 97 + cg1 = create_and_get_cgroup("/cg1"); 98 + if (CHECK_FAIL(cg1 < 0)) 99 + goto err; 100 + cg2 = create_and_get_cgroup("/cg1/cg2"); 101 + if (CHECK_FAIL(cg2 < 0)) 102 + goto err; 103 + cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); 104 + if (CHECK_FAIL(cg3 < 0)) 105 + goto err; 106 + cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); 107 + if (CHECK_FAIL(cg4 < 0)) 108 + goto err; 109 + cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); 110 + if (CHECK_FAIL(cg5 < 0)) 111 + goto err; 112 + 113 + if (CHECK_FAIL(join_cgroup("/cg1/cg2/cg3/cg4/cg5"))) 114 + goto err; 115 + 116 + if (CHECK(bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 117 + BPF_F_ALLOW_MULTI), 118 + "prog0_attach_to_cg1_multi", "errno=%d\n", errno)) 119 + goto err; 120 + 121 + if (CHECK(!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 122 + BPF_F_ALLOW_MULTI), 123 + "fail_same_prog_attach_to_cg1", "unexpected success\n")) 124 + goto err; 125 + 126 + if (CHECK(bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 127 + BPF_F_ALLOW_MULTI), 128 + "prog1_attach_to_cg1_multi", "errno=%d\n", errno)) 129 + goto err; 130 + 131 + if (CHECK(bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 132 + BPF_F_ALLOW_OVERRIDE), 133 + "prog2_attach_to_cg2_override", "errno=%d\n", errno)) 134 + goto err; 135 + 136 + if (CHECK(bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 137 + BPF_F_ALLOW_MULTI), 138 + "prog3_attach_to_cg3_multi", "errno=%d\n", errno)) 139 + goto err; 140 + 141 + if (CHECK(bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 142 + BPF_F_ALLOW_OVERRIDE), 143 + "prog4_attach_to_cg4_override", "errno=%d\n", errno)) 144 + goto err; 145 + 146 + if (CHECK(bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0), 147 + "prog5_attach_to_cg5_none", "errno=%d\n", errno)) 148 + goto err; 149 + 150 + CHECK_FAIL(system(PING_CMD)); 151 + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); 152 + CHECK_FAIL(value != 1 + 2 + 8 + 32); 153 + 154 + /* query the number of effective progs in cg5 */ 155 + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 156 + BPF_F_QUERY_EFFECTIVE, NULL, NULL, &prog_cnt)); 157 + CHECK_FAIL(prog_cnt != 4); 158 + /* retrieve prog_ids of effective progs in cg5 */ 159 + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 160 + BPF_F_QUERY_EFFECTIVE, &attach_flags, 161 + prog_ids, &prog_cnt)); 162 + CHECK_FAIL(prog_cnt != 4); 163 + CHECK_FAIL(attach_flags != 0); 164 + saved_prog_id = prog_ids[0]; 165 + /* check enospc handling */ 166 + prog_ids[0] = 0; 167 + prog_cnt = 2; 168 + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 169 + BPF_F_QUERY_EFFECTIVE, &attach_flags, 170 + prog_ids, &prog_cnt) != -1); 171 + CHECK_FAIL(errno != ENOSPC); 172 + CHECK_FAIL(prog_cnt != 4); 173 + /* check that prog_ids are returned even when buffer is too small */ 174 + CHECK_FAIL(prog_ids[0] != saved_prog_id); 175 + /* retrieve prog_id of single attached prog in cg5 */ 176 + prog_ids[0] = 0; 177 + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL, 178 + prog_ids, &prog_cnt)); 179 + CHECK_FAIL(prog_cnt != 1); 180 + CHECK_FAIL(prog_ids[0] != saved_prog_id); 181 + 182 + /* detach bottom program and ping again */ 183 + if (CHECK(bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS), 184 + "prog_detach_from_cg5", "errno=%d\n", errno)) 185 + goto err; 186 + 187 + value = 0; 188 + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); 189 + CHECK_FAIL(system(PING_CMD)); 190 + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); 191 + CHECK_FAIL(value != 1 + 2 + 8 + 16); 192 + 193 + /* test replace */ 194 + 195 + attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE; 196 + attach_opts.replace_prog_fd = allow_prog[0]; 197 + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 198 + BPF_CGROUP_INET_EGRESS, &attach_opts), 199 + "fail_prog_replace_override", "unexpected success\n")) 200 + goto err; 201 + CHECK_FAIL(errno != EINVAL); 202 + 203 + attach_opts.flags = BPF_F_REPLACE; 204 + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 205 + BPF_CGROUP_INET_EGRESS, &attach_opts), 206 + "fail_prog_replace_no_multi", "unexpected success\n")) 207 + goto err; 208 + CHECK_FAIL(errno != EINVAL); 209 + 210 + attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE; 211 + attach_opts.replace_prog_fd = -1; 212 + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 213 + BPF_CGROUP_INET_EGRESS, &attach_opts), 214 + "fail_prog_replace_bad_fd", "unexpected success\n")) 215 + goto err; 216 + CHECK_FAIL(errno != EBADF); 217 + 218 + /* replacing a program that is not attached to cgroup should fail */ 219 + attach_opts.replace_prog_fd = allow_prog[3]; 220 + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, 221 + BPF_CGROUP_INET_EGRESS, &attach_opts), 222 + "fail_prog_replace_no_ent", "unexpected success\n")) 223 + goto err; 224 + CHECK_FAIL(errno != ENOENT); 225 + 226 + /* replace 1st from the top program */ 227 + attach_opts.replace_prog_fd = allow_prog[0]; 228 + if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, 229 + BPF_CGROUP_INET_EGRESS, &attach_opts), 230 + "prog_replace", "errno=%d\n", errno)) 231 + goto err; 232 + 233 + value = 0; 234 + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); 235 + CHECK_FAIL(system(PING_CMD)); 236 + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); 237 + CHECK_FAIL(value != 64 + 2 + 8 + 16); 238 + 239 + /* detach 3rd from bottom program and ping again */ 240 + if (CHECK(!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS), 241 + "fail_prog_detach_from_cg3", "unexpected success\n")) 242 + goto err; 243 + 244 + if (CHECK(bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS), 245 + "prog3_detach_from_cg3", "errno=%d\n", errno)) 246 + goto err; 247 + 248 + value = 0; 249 + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); 250 + CHECK_FAIL(system(PING_CMD)); 251 + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); 252 + CHECK_FAIL(value != 64 + 2 + 16); 253 + 254 + /* detach 2nd from bottom program and ping again */ 255 + if (CHECK(bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS), 256 + "prog_detach_from_cg4", "errno=%d\n", errno)) 257 + goto err; 258 + 259 + value = 0; 260 + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); 261 + CHECK_FAIL(system(PING_CMD)); 262 + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); 263 + CHECK_FAIL(value != 64 + 2 + 4); 264 + 265 + prog_cnt = 4; 266 + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 267 + BPF_F_QUERY_EFFECTIVE, &attach_flags, 268 + prog_ids, &prog_cnt)); 269 + CHECK_FAIL(prog_cnt != 3); 270 + CHECK_FAIL(attach_flags != 0); 271 + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL, 272 + prog_ids, &prog_cnt)); 273 + CHECK_FAIL(prog_cnt != 0); 274 + 275 + err: 276 + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) 277 + if (allow_prog[i] >= 0) 278 + close(allow_prog[i]); 279 + close(cg1); 280 + close(cg2); 281 + close(cg3); 282 + close(cg4); 283 + close(cg5); 284 + cleanup_cgroup_environment(); 285 + }

+148

tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <test_progs.h> 4 + 5 + #include "cgroup_helpers.h" 6 + 7 + #define FOO "/foo" 8 + #define BAR "/foo/bar/" 9 + #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" 10 + 11 + char bpf_log_buf[BPF_LOG_BUF_SIZE]; 12 + 13 + static int prog_load(int verdict) 14 + { 15 + struct bpf_insn prog[] = { 16 + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ 17 + BPF_EXIT_INSN(), 18 + }; 19 + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); 20 + 21 + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, 22 + prog, insns_cnt, "GPL", 0, 23 + bpf_log_buf, BPF_LOG_BUF_SIZE); 24 + } 25 + 26 + void test_cgroup_attach_override(void) 27 + { 28 + int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1; 29 + __u32 duration = 0; 30 + 31 + allow_prog = prog_load(1); 32 + if (CHECK(allow_prog < 0, "prog_load_allow", 33 + "verifier output:\n%s\n-------\n", bpf_log_buf)) 34 + goto err; 35 + 36 + drop_prog = prog_load(0); 37 + if (CHECK(drop_prog < 0, "prog_load_drop", 38 + "verifier output:\n%s\n-------\n", bpf_log_buf)) 39 + goto err; 40 + 41 + foo = test__join_cgroup(FOO); 42 + if (CHECK(foo < 0, "cgroup_join_foo", "cgroup setup failed\n")) 43 + goto err; 44 + 45 + if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 46 + BPF_F_ALLOW_OVERRIDE), 47 + "prog_attach_drop_foo_override", 48 + "attach prog to %s failed, errno=%d\n", FOO, errno)) 49 + goto err; 50 + 51 + if (CHECK(!system(PING_CMD), "ping_fail", 52 + "ping unexpectedly succeeded\n")) 53 + goto err; 54 + 55 + bar = test__join_cgroup(BAR); 56 + if (CHECK(bar < 0, "cgroup_join_bar", "cgroup setup failed\n")) 57 + goto err; 58 + 59 + if (CHECK(!system(PING_CMD), "ping_fail", 60 + "ping unexpectedly succeeded\n")) 61 + goto err; 62 + 63 + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 64 + BPF_F_ALLOW_OVERRIDE), 65 + "prog_attach_allow_bar_override", 66 + "attach prog to %s failed, errno=%d\n", BAR, errno)) 67 + goto err; 68 + 69 + if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n")) 70 + goto err; 71 + 72 + if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS), 73 + "prog_detach_bar", 74 + "detach prog from %s failed, errno=%d\n", BAR, errno)) 75 + goto err; 76 + 77 + if (CHECK(!system(PING_CMD), "ping_fail", 78 + "ping unexpectedly succeeded\n")) 79 + goto err; 80 + 81 + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 82 + BPF_F_ALLOW_OVERRIDE), 83 + "prog_attach_allow_bar_override", 84 + "attach prog to %s failed, errno=%d\n", BAR, errno)) 85 + goto err; 86 + 87 + if (CHECK(bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS), 88 + "prog_detach_foo", 89 + "detach prog from %s failed, errno=%d\n", FOO, errno)) 90 + goto err; 91 + 92 + if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n")) 93 + goto err; 94 + 95 + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 96 + BPF_F_ALLOW_OVERRIDE), 97 + "prog_attach_allow_bar_override", 98 + "attach prog to %s failed, errno=%d\n", BAR, errno)) 99 + goto err; 100 + 101 + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0), 102 + "fail_prog_attach_allow_bar_none", 103 + "attach prog to %s unexpectedly succeeded\n", BAR)) 104 + goto err; 105 + 106 + if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS), 107 + "prog_detach_bar", 108 + "detach prog from %s failed, errno=%d\n", BAR, errno)) 109 + goto err; 110 + 111 + if (CHECK(!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS), 112 + "fail_prog_detach_foo", 113 + "double detach from %s unexpectedly succeeded\n", FOO)) 114 + goto err; 115 + 116 + if (CHECK(bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0), 117 + "prog_attach_allow_foo_none", 118 + "attach prog to %s failed, errno=%d\n", FOO, errno)) 119 + goto err; 120 + 121 + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0), 122 + "fail_prog_attach_allow_bar_none", 123 + "attach prog to %s unexpectedly succeeded\n", BAR)) 124 + goto err; 125 + 126 + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 127 + BPF_F_ALLOW_OVERRIDE), 128 + "fail_prog_attach_allow_bar_override", 129 + "attach prog to %s unexpectedly succeeded\n", BAR)) 130 + goto err; 131 + 132 + if (CHECK(!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 133 + BPF_F_ALLOW_OVERRIDE), 134 + "fail_prog_attach_allow_foo_override", 135 + "attach prog to %s unexpectedly succeeded\n", FOO)) 136 + goto err; 137 + 138 + if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0), 139 + "prog_attach_drop_foo_none", 140 + "attach prog to %s failed, errno=%d\n", FOO, errno)) 141 + goto err; 142 + 143 + err: 144 + close(foo); 145 + close(bar); 146 + close(allow_prog); 147 + close(drop_prog); 148 + }

+169

tools/testing/selftests/bpf/prog_tests/core_extern.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <test_progs.h> 5 + #include <sys/mman.h> 6 + #include <sys/utsname.h> 7 + #include <linux/version.h> 8 + #include "test_core_extern.skel.h" 9 + 10 + static uint32_t get_kernel_version(void) 11 + { 12 + uint32_t major, minor, patch; 13 + struct utsname info; 14 + 15 + uname(&info); 16 + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) 17 + return 0; 18 + return KERNEL_VERSION(major, minor, patch); 19 + } 20 + 21 + #define CFG "CONFIG_BPF_SYSCALL=n\n" 22 + 23 + static struct test_case { 24 + const char *name; 25 + const char *cfg; 26 + bool fails; 27 + struct test_core_extern__data data; 28 + } test_cases[] = { 29 + { .name = "default search path", .data = { .bpf_syscall = true } }, 30 + { 31 + .name = "custom values", 32 + .cfg = "CONFIG_BPF_SYSCALL=n\n" 33 + "CONFIG_TRISTATE=m\n" 34 + "CONFIG_BOOL=y\n" 35 + "CONFIG_CHAR=100\n" 36 + "CONFIG_USHORT=30000\n" 37 + "CONFIG_INT=123456\n" 38 + "CONFIG_ULONG=0xDEADBEEFC0DE\n" 39 + "CONFIG_STR=\"abracad\"\n" 40 + "CONFIG_MISSING=0", 41 + .data = { 42 + .bpf_syscall = false, 43 + .tristate_val = TRI_MODULE, 44 + .bool_val = true, 45 + .char_val = 100, 46 + .ushort_val = 30000, 47 + .int_val = 123456, 48 + .ulong_val = 0xDEADBEEFC0DE, 49 + .str_val = "abracad", 50 + }, 51 + }, 52 + /* TRISTATE */ 53 + { .name = "tristate (y)", .cfg = CFG"CONFIG_TRISTATE=y\n", 54 + .data = { .tristate_val = TRI_YES } }, 55 + { .name = "tristate (n)", .cfg = CFG"CONFIG_TRISTATE=n\n", 56 + .data = { .tristate_val = TRI_NO } }, 57 + { .name = "tristate (m)", .cfg = CFG"CONFIG_TRISTATE=m\n", 58 + .data = { .tristate_val = TRI_MODULE } }, 59 + { .name = "tristate (int)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=1" }, 60 + { .name = "tristate (bad)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=M" }, 61 + /* BOOL */ 62 + { .name = "bool (y)", .cfg = CFG"CONFIG_BOOL=y\n", 63 + .data = { .bool_val = true } }, 64 + { .name = "bool (n)", .cfg = CFG"CONFIG_BOOL=n\n", 65 + .data = { .bool_val = false } }, 66 + { .name = "bool (tristate)", .fails = 1, .cfg = CFG"CONFIG_BOOL=m" }, 67 + { .name = "bool (int)", .fails = 1, .cfg = CFG"CONFIG_BOOL=1" }, 68 + /* CHAR */ 69 + { .name = "char (tristate)", .cfg = CFG"CONFIG_CHAR=m\n", 70 + .data = { .char_val = 'm' } }, 71 + { .name = "char (bad)", .fails = 1, .cfg = CFG"CONFIG_CHAR=q\n" }, 72 + { .name = "char (empty)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\n" }, 73 + { .name = "char (str)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\"y\"\n" }, 74 + /* STRING */ 75 + { .name = "str (empty)", .cfg = CFG"CONFIG_STR=\"\"\n", 76 + .data = { .str_val = "\0\0\0\0\0\0\0" } }, 77 + { .name = "str (padded)", .cfg = CFG"CONFIG_STR=\"abra\"\n", 78 + .data = { .str_val = "abra\0\0\0" } }, 79 + { .name = "str (too long)", .cfg = CFG"CONFIG_STR=\"abracada\"\n", 80 + .data = { .str_val = "abracad" } }, 81 + { .name = "str (no value)", .fails = 1, .cfg = CFG"CONFIG_STR=\n" }, 82 + { .name = "str (bad value)", .fails = 1, .cfg = CFG"CONFIG_STR=bla\n" }, 83 + /* INTEGERS */ 84 + { 85 + .name = "integer forms", 86 + .cfg = CFG 87 + "CONFIG_CHAR=0xA\n" 88 + "CONFIG_USHORT=0462\n" 89 + "CONFIG_INT=-100\n" 90 + "CONFIG_ULONG=+1000000000000", 91 + .data = { 92 + .char_val = 0xA, 93 + .ushort_val = 0462, 94 + .int_val = -100, 95 + .ulong_val = 1000000000000, 96 + }, 97 + }, 98 + { .name = "int (bad)", .fails = 1, .cfg = CFG"CONFIG_INT=abc" }, 99 + { .name = "int (str)", .fails = 1, .cfg = CFG"CONFIG_INT=\"abc\"" }, 100 + { .name = "int (empty)", .fails = 1, .cfg = CFG"CONFIG_INT=" }, 101 + { .name = "int (mixed)", .fails = 1, .cfg = CFG"CONFIG_INT=123abc" }, 102 + { .name = "int (max)", .cfg = CFG"CONFIG_INT=2147483647", 103 + .data = { .int_val = 2147483647 } }, 104 + { .name = "int (min)", .cfg = CFG"CONFIG_INT=-2147483648", 105 + .data = { .int_val = -2147483648 } }, 106 + { .name = "int (max+1)", .fails = 1, .cfg = CFG"CONFIG_INT=2147483648" }, 107 + { .name = "int (min-1)", .fails = 1, .cfg = CFG"CONFIG_INT=-2147483649" }, 108 + { .name = "ushort (max)", .cfg = CFG"CONFIG_USHORT=65535", 109 + .data = { .ushort_val = 65535 } }, 110 + { .name = "ushort (min)", .cfg = CFG"CONFIG_USHORT=0", 111 + .data = { .ushort_val = 0 } }, 112 + { .name = "ushort (max+1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=65536" }, 113 + { .name = "ushort (min-1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=-1" }, 114 + { .name = "u64 (max)", .cfg = CFG"CONFIG_ULONG=0xffffffffffffffff", 115 + .data = { .ulong_val = 0xffffffffffffffff } }, 116 + { .name = "u64 (min)", .cfg = CFG"CONFIG_ULONG=0", 117 + .data = { .ulong_val = 0 } }, 118 + { .name = "u64 (max+1)", .fails = 1, .cfg = CFG"CONFIG_ULONG=0x10000000000000000" }, 119 + }; 120 + 121 + void test_core_extern(void) 122 + { 123 + const uint32_t kern_ver = get_kernel_version(); 124 + int err, duration = 0, i, j; 125 + struct test_core_extern *skel = NULL; 126 + uint64_t *got, *exp; 127 + int n = sizeof(*skel->data) / sizeof(uint64_t); 128 + 129 + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { 130 + struct test_case *t = &test_cases[i]; 131 + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, 132 + .kconfig = t->cfg, 133 + ); 134 + 135 + if (!test__start_subtest(t->name)) 136 + continue; 137 + 138 + skel = test_core_extern__open_opts(&opts); 139 + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) 140 + goto cleanup; 141 + err = test_core_extern__load(skel); 142 + if (t->fails) { 143 + CHECK(!err, "skel_load", 144 + "shouldn't succeed open/load of skeleton\n"); 145 + goto cleanup; 146 + } else if (CHECK(err, "skel_load", 147 + "failed to open/load skeleton\n")) { 148 + goto cleanup; 149 + } 150 + err = test_core_extern__attach(skel); 151 + if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) 152 + goto cleanup; 153 + 154 + usleep(1); 155 + 156 + t->data.kern_ver = kern_ver; 157 + t->data.missing_val = 0xDEADC0DE; 158 + got = (uint64_t *)skel->data; 159 + exp = (uint64_t *)&t->data; 160 + for (j = 0; j < n; j++) { 161 + CHECK(got[j] != exp[j], "check_res", 162 + "result #%d: expected %lx, but got %lx\n", 163 + j, exp[j], got[j]); 164 + } 165 + cleanup: 166 + test_core_extern__destroy(skel); 167 + skel = NULL; 168 + } 169 + }

+4

tools/testing/selftests/bpf/prog_tests/core_reloc.c

··· 74 74 .b123 = 2, \ 75 75 .c1c = 3, \ 76 76 .d00d = 4, \ 77 + .f10c = 0, \ 77 78 }, \ 78 79 .output_len = sizeof(struct core_reloc_arrays_output) \ 79 80 } ··· 309 308 ARRAYS_CASE(arrays), 310 309 ARRAYS_CASE(arrays___diff_arr_dim), 311 310 ARRAYS_CASE(arrays___diff_arr_val_sz), 311 + ARRAYS_CASE(arrays___equiv_zero_sz_arr), 312 + ARRAYS_CASE(arrays___fixed_arr), 312 313 313 314 ARRAYS_ERR_CASE(arrays___err_too_small), 314 315 ARRAYS_ERR_CASE(arrays___err_too_shallow), 315 316 ARRAYS_ERR_CASE(arrays___err_non_array), 316 317 ARRAYS_ERR_CASE(arrays___err_wrong_val_type1), 317 318 ARRAYS_ERR_CASE(arrays___err_wrong_val_type2), 319 + ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr), 318 320 319 321 /* enum/ptr/int handling scenarios */ 320 322 PRIMITIVES_CASE(primitives),

+78

tools/testing/selftests/bpf/prog_tests/cpu_mask.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <bpf/btf.h> 4 + #include "libbpf_internal.h" 5 + 6 + static int duration = 0; 7 + 8 + static void validate_mask(int case_nr, const char *exp, bool *mask, int n) 9 + { 10 + int i; 11 + 12 + for (i = 0; exp[i]; i++) { 13 + if (exp[i] == '1') { 14 + if (CHECK(i + 1 > n, "mask_short", 15 + "case #%d: mask too short, got n=%d, need at least %d\n", 16 + case_nr, n, i + 1)) 17 + return; 18 + CHECK(!mask[i], "cpu_not_set", 19 + "case #%d: mask differs, expected cpu#%d SET\n", 20 + case_nr, i); 21 + } else { 22 + CHECK(i < n && mask[i], "cpu_set", 23 + "case #%d: mask differs, expected cpu#%d UNSET\n", 24 + case_nr, i); 25 + } 26 + } 27 + CHECK(i < n, "mask_long", 28 + "case #%d: mask too long, got n=%d, expected at most %d\n", 29 + case_nr, n, i); 30 + } 31 + 32 + static struct { 33 + const char *cpu_mask; 34 + const char *expect; 35 + bool fails; 36 + } test_cases[] = { 37 + { "0\n", "1", false }, 38 + { "0,2\n", "101", false }, 39 + { "0-2\n", "111", false }, 40 + { "0-2,3-4\n", "11111", false }, 41 + { "0", "1", false }, 42 + { "0-2", "111", false }, 43 + { "0,2", "101", false }, 44 + { "0,1-3", "1111", false }, 45 + { "0,1,2,3", "1111", false }, 46 + { "0,2-3,5", "101101", false }, 47 + { "3-3", "0001", false }, 48 + { "2-4,6,9-10", "00111010011", false }, 49 + /* failure cases */ 50 + { "", "", true }, 51 + { "0-", "", true }, 52 + { "0 ", "", true }, 53 + { "0_1", "", true }, 54 + { "1-0", "", true }, 55 + { "-1", "", true }, 56 + }; 57 + 58 + void test_cpu_mask() 59 + { 60 + int i, err, n; 61 + bool *mask; 62 + 63 + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { 64 + mask = NULL; 65 + err = parse_cpu_mask_str(test_cases[i].cpu_mask, &mask, &n); 66 + if (test_cases[i].fails) { 67 + CHECK(!err, "should_fail", 68 + "case #%d: parsing should fail!\n", i + 1); 69 + } else { 70 + if (CHECK(err, "parse_err", 71 + "case #%d: cpu mask parsing failed: %d\n", 72 + i + 1, err)) 73 + continue; 74 + validate_mask(i + 1, test_cases[i].expect, mask, n); 75 + } 76 + free(mask); 77 + } 78 + }

+33 -68

tools/testing/selftests/bpf/prog_tests/fentry_fexit.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <test_progs.h> 4 + #include "test_pkt_access.skel.h" 5 + #include "fentry_test.skel.h" 6 + #include "fexit_test.skel.h" 4 7 5 8 void test_fentry_fexit(void) 6 9 { 7 - struct bpf_prog_load_attr attr_fentry = { 8 - .file = "./fentry_test.o", 9 - }; 10 - struct bpf_prog_load_attr attr_fexit = { 11 - .file = "./fexit_test.o", 12 - }; 10 + struct test_pkt_access *pkt_skel = NULL; 11 + struct fentry_test *fentry_skel = NULL; 12 + struct fexit_test *fexit_skel = NULL; 13 + __u64 *fentry_res, *fexit_res; 14 + __u32 duration = 0, retval; 15 + int err, pkt_fd, i; 13 16 14 - struct bpf_object *obj_fentry = NULL, *obj_fexit = NULL, *pkt_obj; 15 - struct bpf_map *data_map_fentry, *data_map_fexit; 16 - char fentry_name[] = "fentry/bpf_fentry_testX"; 17 - char fexit_name[] = "fexit/bpf_fentry_testX"; 18 - int err, pkt_fd, kfree_skb_fd, i; 19 - struct bpf_link *link[12] = {}; 20 - struct bpf_program *prog[12]; 21 - __u32 duration, retval; 22 - const int zero = 0; 23 - u64 result[12]; 24 - 25 - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 26 - &pkt_obj, &pkt_fd); 27 - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 17 + pkt_skel = test_pkt_access__open_and_load(); 18 + if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) 28 19 return; 29 - err = bpf_prog_load_xattr(&attr_fentry, &obj_fentry, &kfree_skb_fd); 30 - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 20 + fentry_skel = fentry_test__open_and_load(); 21 + if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) 31 22 goto close_prog; 32 - err = bpf_prog_load_xattr(&attr_fexit, &obj_fexit, &kfree_skb_fd); 33 - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 34 - goto close_prog; 35 - 36 - for (i = 0; i < 6; i++) { 37 - fentry_name[sizeof(fentry_name) - 2] = '1' + i; 38 - prog[i] = bpf_object__find_program_by_title(obj_fentry, fentry_name); 39 - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fentry_name)) 40 - goto close_prog; 41 - link[i] = bpf_program__attach_trace(prog[i]); 42 - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 43 - goto close_prog; 44 - } 45 - data_map_fentry = bpf_object__find_map_by_name(obj_fentry, "fentry_t.bss"); 46 - if (CHECK(!data_map_fentry, "find_data_map", "data map not found\n")) 23 + fexit_skel = fexit_test__open_and_load(); 24 + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) 47 25 goto close_prog; 48 26 49 - for (i = 6; i < 12; i++) { 50 - fexit_name[sizeof(fexit_name) - 2] = '1' + i - 6; 51 - prog[i] = bpf_object__find_program_by_title(obj_fexit, fexit_name); 52 - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fexit_name)) 53 - goto close_prog; 54 - link[i] = bpf_program__attach_trace(prog[i]); 55 - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 56 - goto close_prog; 57 - } 58 - data_map_fexit = bpf_object__find_map_by_name(obj_fexit, "fexit_te.bss"); 59 - if (CHECK(!data_map_fexit, "find_data_map", "data map not found\n")) 27 + err = fentry_test__attach(fentry_skel); 28 + if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) 29 + goto close_prog; 30 + err = fexit_test__attach(fexit_skel); 31 + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) 60 32 goto close_prog; 61 33 34 + pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); 62 35 err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 63 36 NULL, NULL, &retval, &duration); 64 37 CHECK(err || retval, "ipv6", 65 38 "err %d errno %d retval %d duration %d\n", 66 39 err, errno, retval, duration); 67 40 68 - err = bpf_map_lookup_elem(bpf_map__fd(data_map_fentry), &zero, &result); 69 - if (CHECK(err, "get_result", 70 - "failed to get output data: %d\n", err)) 71 - goto close_prog; 72 - 73 - err = bpf_map_lookup_elem(bpf_map__fd(data_map_fexit), &zero, result + 6); 74 - if (CHECK(err, "get_result", 75 - "failed to get output data: %d\n", err)) 76 - goto close_prog; 77 - 78 - for (i = 0; i < 12; i++) 79 - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", 80 - i % 6 + 1, result[i])) 81 - goto close_prog; 41 + fentry_res = (__u64 *)fentry_skel->bss; 42 + fexit_res = (__u64 *)fexit_skel->bss; 43 + printf("%lld\n", fentry_skel->bss->test1_result); 44 + for (i = 0; i < 6; i++) { 45 + CHECK(fentry_res[i] != 1, "result", 46 + "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]); 47 + CHECK(fexit_res[i] != 1, "result", 48 + "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]); 49 + } 82 50 83 51 close_prog: 84 - for (i = 0; i < 12; i++) 85 - if (!IS_ERR_OR_NULL(link[i])) 86 - bpf_link__destroy(link[i]); 87 - bpf_object__close(obj_fentry); 88 - bpf_object__close(obj_fexit); 89 - bpf_object__close(pkt_obj); 52 + test_pkt_access__destroy(pkt_skel); 53 + fentry_test__destroy(fentry_skel); 54 + fexit_test__destroy(fexit_skel); 90 55 }

+24 -45

tools/testing/selftests/bpf/prog_tests/fentry_test.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <test_progs.h> 4 + #include "test_pkt_access.skel.h" 5 + #include "fentry_test.skel.h" 4 6 5 7 void test_fentry_test(void) 6 8 { 7 - struct bpf_prog_load_attr attr = { 8 - .file = "./fentry_test.o", 9 - }; 10 - 11 - char prog_name[] = "fentry/bpf_fentry_testX"; 12 - struct bpf_object *obj = NULL, *pkt_obj; 13 - int err, pkt_fd, kfree_skb_fd, i; 14 - struct bpf_link *link[6] = {}; 15 - struct bpf_program *prog[6]; 9 + struct test_pkt_access *pkt_skel = NULL; 10 + struct fentry_test *fentry_skel = NULL; 11 + int err, pkt_fd, i; 16 12 __u32 duration, retval; 17 - struct bpf_map *data_map; 18 - const int zero = 0; 19 - u64 result[6]; 13 + __u64 *result; 20 14 21 - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 22 - &pkt_obj, &pkt_fd); 23 - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 15 + pkt_skel = test_pkt_access__open_and_load(); 16 + if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) 24 17 return; 25 - err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); 26 - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 27 - goto close_prog; 18 + fentry_skel = fentry_test__open_and_load(); 19 + if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) 20 + goto cleanup; 28 21 29 - for (i = 0; i < 6; i++) { 30 - prog_name[sizeof(prog_name) - 2] = '1' + i; 31 - prog[i] = bpf_object__find_program_by_title(obj, prog_name); 32 - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) 33 - goto close_prog; 34 - link[i] = bpf_program__attach_trace(prog[i]); 35 - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 36 - goto close_prog; 37 - } 38 - data_map = bpf_object__find_map_by_name(obj, "fentry_t.bss"); 39 - if (CHECK(!data_map, "find_data_map", "data map not found\n")) 40 - goto close_prog; 22 + err = fentry_test__attach(fentry_skel); 23 + if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) 24 + goto cleanup; 41 25 26 + pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); 42 27 err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 43 28 NULL, NULL, &retval, &duration); 44 29 CHECK(err || retval, "ipv6", 45 30 "err %d errno %d retval %d duration %d\n", 46 31 err, errno, retval, duration); 47 32 48 - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); 49 - if (CHECK(err, "get_result", 50 - "failed to get output data: %d\n", err)) 51 - goto close_prog; 33 + result = (__u64 *)fentry_skel->bss; 34 + for (i = 0; i < 6; i++) { 35 + if (CHECK(result[i] != 1, "result", 36 + "fentry_test%d failed err %lld\n", i + 1, result[i])) 37 + goto cleanup; 38 + } 52 39 53 - for (i = 0; i < 6; i++) 54 - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", 55 - i + 1, result[i])) 56 - goto close_prog; 57 - 58 - close_prog: 59 - for (i = 0; i < 6; i++) 60 - if (!IS_ERR_OR_NULL(link[i])) 61 - bpf_link__destroy(link[i]); 62 - bpf_object__close(obj); 63 - bpf_object__close(pkt_obj); 40 + cleanup: 41 + fentry_test__destroy(fentry_skel); 42 + test_pkt_access__destroy(pkt_skel); 64 43 }

+22 -36

tools/testing/selftests/bpf/prog_tests/mmap.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 3 #include <sys/mman.h> 4 + #include "test_mmap.skel.h" 4 5 5 6 struct map_data { 6 7 __u64 val[512 * 4]; 7 - }; 8 - 9 - struct bss_data { 10 - __u64 in_val; 11 - __u64 out_val; 12 8 }; 13 9 14 10 static size_t roundup_page(size_t sz) ··· 15 19 16 20 void test_mmap(void) 17 21 { 18 - const char *file = "test_mmap.o"; 19 - const char *probe_name = "raw_tracepoint/sys_enter"; 20 - const char *tp_name = "sys_enter"; 21 - const size_t bss_sz = roundup_page(sizeof(struct bss_data)); 22 + const size_t bss_sz = roundup_page(sizeof(struct test_mmap__bss)); 22 23 const size_t map_sz = roundup_page(sizeof(struct map_data)); 23 24 const int zero = 0, one = 1, two = 2, far = 1500; 24 25 const long page_size = sysconf(_SC_PAGE_SIZE); 25 26 int err, duration = 0, i, data_map_fd; 26 - struct bpf_program *prog; 27 - struct bpf_object *obj; 28 - struct bpf_link *link = NULL; 29 27 struct bpf_map *data_map, *bss_map; 30 28 void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; 31 - volatile struct bss_data *bss_data; 32 - volatile struct map_data *map_data; 29 + struct test_mmap__bss *bss_data; 30 + struct map_data *map_data; 31 + struct test_mmap *skel; 33 32 __u64 val = 0; 34 33 35 - obj = bpf_object__open_file("test_mmap.o", NULL); 36 - if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", 37 - file, PTR_ERR(obj))) 38 - return; 39 - prog = bpf_object__find_program_by_title(obj, probe_name); 40 - if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name)) 41 - goto cleanup; 42 - err = bpf_object__load(obj); 43 - if (CHECK(err, "obj_load", "failed to load prog '%s': %d\n", 44 - probe_name, err)) 45 - goto cleanup; 46 34 47 - bss_map = bpf_object__find_map_by_name(obj, "test_mma.bss"); 48 - if (CHECK(!bss_map, "find_bss_map", ".bss map not found\n")) 49 - goto cleanup; 50 - data_map = bpf_object__find_map_by_name(obj, "data_map"); 51 - if (CHECK(!data_map, "find_data_map", "data_map map not found\n")) 52 - goto cleanup; 35 + skel = test_mmap__open_and_load(); 36 + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) 37 + return; 38 + 39 + bss_map = skel->maps.bss; 40 + data_map = skel->maps.data_map; 53 41 data_map_fd = bpf_map__fd(data_map); 54 42 55 43 bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED, ··· 57 77 58 78 CHECK_FAIL(bss_data->in_val); 59 79 CHECK_FAIL(bss_data->out_val); 80 + CHECK_FAIL(skel->bss->in_val); 81 + CHECK_FAIL(skel->bss->out_val); 60 82 CHECK_FAIL(map_data->val[0]); 61 83 CHECK_FAIL(map_data->val[1]); 62 84 CHECK_FAIL(map_data->val[2]); 63 85 CHECK_FAIL(map_data->val[far]); 64 86 65 - link = bpf_program__attach_raw_tracepoint(prog, tp_name); 66 - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) 87 + err = test_mmap__attach(skel); 88 + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) 67 89 goto cleanup; 68 90 69 91 bss_data->in_val = 123; ··· 76 94 77 95 CHECK_FAIL(bss_data->in_val != 123); 78 96 CHECK_FAIL(bss_data->out_val != 123); 97 + CHECK_FAIL(skel->bss->in_val != 123); 98 + CHECK_FAIL(skel->bss->out_val != 123); 79 99 CHECK_FAIL(map_data->val[0] != 111); 80 100 CHECK_FAIL(map_data->val[1] != 222); 81 101 CHECK_FAIL(map_data->val[2] != 123); ··· 144 160 usleep(1); 145 161 CHECK_FAIL(bss_data->in_val != 321); 146 162 CHECK_FAIL(bss_data->out_val != 321); 163 + CHECK_FAIL(skel->bss->in_val != 321); 164 + CHECK_FAIL(skel->bss->out_val != 321); 147 165 CHECK_FAIL(map_data->val[0] != 111); 148 166 CHECK_FAIL(map_data->val[1] != 222); 149 167 CHECK_FAIL(map_data->val[2] != 321); ··· 189 203 map_data = tmp2; 190 204 CHECK_FAIL(bss_data->in_val != 321); 191 205 CHECK_FAIL(bss_data->out_val != 321); 206 + CHECK_FAIL(skel->bss->in_val != 321); 207 + CHECK_FAIL(skel->bss->out_val != 321); 192 208 CHECK_FAIL(map_data->val[0] != 111); 193 209 CHECK_FAIL(map_data->val[1] != 222); 194 210 CHECK_FAIL(map_data->val[2] != 321); ··· 202 214 CHECK_FAIL(munmap(bss_mmaped, bss_sz)); 203 215 if (map_mmaped) 204 216 CHECK_FAIL(munmap(map_mmaped, map_sz)); 205 - if (!IS_ERR_OR_NULL(link)) 206 - bpf_link__destroy(link); 207 - bpf_object__close(obj); 217 + test_mmap__destroy(skel); 208 218 }

+24 -5

tools/testing/selftests/bpf/prog_tests/perf_buffer.c

··· 4 4 #include <sched.h> 5 5 #include <sys/socket.h> 6 6 #include <test_progs.h> 7 + #include "libbpf_internal.h" 7 8 8 9 static void on_sample(void *ctx, int cpu, void *data, __u32 size) 9 10 { ··· 20 19 21 20 void test_perf_buffer(void) 22 21 { 23 - int err, prog_fd, nr_cpus, i, duration = 0; 22 + int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; 24 23 const char *prog_name = "kprobe/sys_nanosleep"; 25 24 const char *file = "./test_perf_buffer.o"; 26 25 struct perf_buffer_opts pb_opts = {}; ··· 30 29 struct bpf_object *obj; 31 30 struct perf_buffer *pb; 32 31 struct bpf_link *link; 32 + bool *online; 33 33 34 34 nr_cpus = libbpf_num_possible_cpus(); 35 35 if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus)) 36 36 return; 37 37 38 + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", 39 + &online, &on_len); 40 + if (CHECK(err, "nr_on_cpus", "err %d\n", err)) 41 + return; 42 + 43 + for (i = 0; i < on_len; i++) 44 + if (online[i]) 45 + nr_on_cpus++; 46 + 38 47 /* load program */ 39 48 err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); 40 - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) 41 - return; 49 + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) { 50 + obj = NULL; 51 + goto out_close; 52 + } 42 53 43 54 prog = bpf_object__find_program_by_title(obj, prog_name); 44 55 if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) ··· 77 64 /* trigger kprobe on every CPU */ 78 65 CPU_ZERO(&cpu_seen); 79 66 for (i = 0; i < nr_cpus; i++) { 67 + if (i >= on_len || !online[i]) { 68 + printf("skipping offline CPU #%d\n", i); 69 + continue; 70 + } 71 + 80 72 CPU_ZERO(&cpu_set); 81 73 CPU_SET(i, &cpu_set); 82 74 ··· 99 81 if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) 100 82 goto out_free_pb; 101 83 102 - if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt", 103 - "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen))) 84 + if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt", 85 + "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) 104 86 goto out_free_pb; 105 87 106 88 out_free_pb: ··· 109 91 bpf_link__destroy(link); 110 92 out_close: 111 93 bpf_object__close(obj); 94 + free(online); 112 95 }

+2 -4

tools/testing/selftests/bpf/prog_tests/probe_user.c

··· 3 3 4 4 void test_probe_user(void) 5 5 { 6 - #define kprobe_name "__sys_connect" 7 - const char *prog_name = "kprobe/" kprobe_name; 6 + const char *prog_name = "kprobe/__sys_connect"; 8 7 const char *obj_file = "./test_probe_user.o"; 9 8 DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); 10 9 int err, results_map_fd, sock_fd, duration = 0; ··· 32 33 "err %d\n", results_map_fd)) 33 34 goto cleanup; 34 35 35 - kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false, 36 - kprobe_name); 36 + kprobe_link = bpf_program__attach(kprobe_prog); 37 37 if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", 38 38 "err %ld\n", PTR_ERR(kprobe_link))) { 39 39 kprobe_link = NULL;

+4 -7

tools/testing/selftests/bpf/prog_tests/rdonly_maps.c

··· 16 16 17 17 void test_rdonly_maps(void) 18 18 { 19 - const char *prog_name_skip_loop = "raw_tracepoint/sys_enter:skip_loop"; 20 - const char *prog_name_part_loop = "raw_tracepoint/sys_enter:part_loop"; 21 - const char *prog_name_full_loop = "raw_tracepoint/sys_enter:full_loop"; 22 19 const char *file = "test_rdonly_maps.o"; 23 20 struct rdonly_map_subtest subtests[] = { 24 - { "skip loop", prog_name_skip_loop, 0, 0 }, 25 - { "part loop", prog_name_part_loop, 3, 2 + 3 + 4 }, 26 - { "full loop", prog_name_full_loop, 4, 2 + 3 + 4 + 5 }, 21 + { "skip loop", "skip_loop", 0, 0 }, 22 + { "part loop", "part_loop", 3, 2 + 3 + 4 }, 23 + { "full loop", "full_loop", 4, 2 + 3 + 4 + 5 }, 27 24 }; 28 25 int i, err, zero = 0, duration = 0; 29 26 struct bpf_link *link = NULL; ··· 47 50 if (!test__start_subtest(t->subtest_name)) 48 51 continue; 49 52 50 - prog = bpf_object__find_program_by_title(obj, t->prog_name); 53 + prog = bpf_object__find_program_by_name(obj, t->prog_name); 51 54 if (CHECK(!prog, "find_prog", "prog '%s' not found\n", 52 55 t->prog_name)) 53 56 goto cleanup;

+7

tools/testing/selftests/bpf/prog_tests/skb_ctx.c

··· 11 11 .cb[4] = 5, 12 12 .priority = 6, 13 13 .tstamp = 7, 14 + .wire_len = 100, 15 + .gso_segs = 8, 16 + .mark = 9, 14 17 }; 15 18 struct bpf_prog_test_run_attr tattr = { 16 19 .data_in = &pkt_v4, ··· 94 91 "ctx_out_tstamp", 95 92 "skb->tstamp == %lld, expected %d\n", 96 93 skb.tstamp, 8); 94 + CHECK_ATTR(skb.mark != 10, 95 + "ctx_out_mark", 96 + "skb->mark == %u, expected %d\n", 97 + skb.mark, 10); 97 98 }

+63

tools/testing/selftests/bpf/prog_tests/skeleton.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <test_progs.h> 5 + 6 + struct s { 7 + int a; 8 + long long b; 9 + } __attribute__((packed)); 10 + 11 + #include "test_skeleton.skel.h" 12 + 13 + void test_skeleton(void) 14 + { 15 + int duration = 0, err; 16 + struct test_skeleton* skel; 17 + struct test_skeleton__bss *bss; 18 + struct test_skeleton__kconfig *kcfg; 19 + 20 + skel = test_skeleton__open(); 21 + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) 22 + return; 23 + 24 + if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n")) 25 + goto cleanup; 26 + 27 + err = test_skeleton__load(skel); 28 + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) 29 + goto cleanup; 30 + 31 + bss = skel->bss; 32 + bss->in1 = 1; 33 + bss->in2 = 2; 34 + bss->in3 = 3; 35 + bss->in4 = 4; 36 + bss->in5.a = 5; 37 + bss->in5.b = 6; 38 + kcfg = skel->kconfig; 39 + 40 + err = test_skeleton__attach(skel); 41 + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) 42 + goto cleanup; 43 + 44 + /* trigger tracepoint */ 45 + usleep(1); 46 + 47 + CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1); 48 + CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2); 49 + CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3); 50 + CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4); 51 + CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n", 52 + bss->handler_out5.a, 5); 53 + CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n", 54 + bss->handler_out5.b, 6); 55 + 56 + CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1", 57 + "got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL); 58 + CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2", 59 + "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION); 60 + 61 + cleanup: 62 + test_skeleton__destroy(skel); 63 + }

+26 -51

tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 + #include "test_stacktrace_build_id.skel.h" 3 4 4 5 void test_stacktrace_build_id(void) 5 6 { 7 + 6 8 int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; 7 - const char *prog_name = "tracepoint/random/urandom_read"; 8 - const char *file = "./test_stacktrace_build_id.o"; 9 - int err, prog_fd, stack_trace_len; 9 + struct test_stacktrace_build_id *skel; 10 + int err, stack_trace_len; 10 11 __u32 key, previous_key, val, duration = 0; 11 - struct bpf_program *prog; 12 - struct bpf_object *obj; 13 - struct bpf_link *link = NULL; 14 12 char buf[256]; 15 13 int i, j; 16 14 struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; ··· 16 18 int retry = 1; 17 19 18 20 retry: 19 - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); 20 - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) 21 + skel = test_stacktrace_build_id__open_and_load(); 22 + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) 21 23 return; 22 24 23 - prog = bpf_object__find_program_by_title(obj, prog_name); 24 - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) 25 - goto close_prog; 26 - 27 - link = bpf_program__attach_tracepoint(prog, "random", "urandom_read"); 28 - if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link))) 29 - goto close_prog; 25 + err = test_stacktrace_build_id__attach(skel); 26 + if (CHECK(err, "attach_tp", "err %d\n", err)) 27 + goto cleanup; 30 28 31 29 /* find map fds */ 32 - control_map_fd = bpf_find_map(__func__, obj, "control_map"); 33 - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", 34 - "err %d errno %d\n", err, errno)) 35 - goto disable_pmu; 36 - 37 - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); 38 - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", 39 - "err %d errno %d\n", err, errno)) 40 - goto disable_pmu; 41 - 42 - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); 43 - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", 44 - err, errno)) 45 - goto disable_pmu; 46 - 47 - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); 48 - if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", 49 - "err %d errno %d\n", err, errno)) 50 - goto disable_pmu; 30 + control_map_fd = bpf_map__fd(skel->maps.control_map); 31 + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); 32 + stackmap_fd = bpf_map__fd(skel->maps.stackmap); 33 + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap); 51 34 52 35 if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) 53 - goto disable_pmu; 36 + goto cleanup; 54 37 if (CHECK_FAIL(system("./urandom_read"))) 55 - goto disable_pmu; 38 + goto cleanup; 56 39 /* disable stack trace collection */ 57 40 key = 0; 58 41 val = 1; ··· 45 66 err = compare_map_keys(stackid_hmap_fd, stackmap_fd); 46 67 if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", 47 68 "err %d errno %d\n", err, errno)) 48 - goto disable_pmu; 69 + goto cleanup; 49 70 50 71 err = compare_map_keys(stackmap_fd, stackid_hmap_fd); 51 72 if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", 52 73 "err %d errno %d\n", err, errno)) 53 - goto disable_pmu; 74 + goto cleanup; 54 75 55 76 err = extract_build_id(buf, 256); 56 77 57 78 if (CHECK(err, "get build_id with readelf", 58 79 "err %d errno %d\n", err, errno)) 59 - goto disable_pmu; 80 + goto cleanup; 60 81 61 82 err = bpf_map_get_next_key(stackmap_fd, NULL, &key); 62 83 if (CHECK(err, "get_next_key from stackmap", 63 84 "err %d, errno %d\n", err, errno)) 64 - goto disable_pmu; 85 + goto cleanup; 65 86 66 87 do { 67 88 char build_id[64]; ··· 69 90 err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); 70 91 if (CHECK(err, "lookup_elem from stackmap", 71 92 "err %d, errno %d\n", err, errno)) 72 - goto disable_pmu; 93 + goto cleanup; 73 94 for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) 74 95 if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && 75 96 id_offs[i].offset != 0) { ··· 87 108 * try it one more time. 88 109 */ 89 110 if (build_id_matches < 1 && retry--) { 90 - bpf_link__destroy(link); 91 - bpf_object__close(obj); 111 + test_stacktrace_build_id__destroy(skel); 92 112 printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", 93 113 __func__); 94 114 goto retry; ··· 95 117 96 118 if (CHECK(build_id_matches < 1, "build id match", 97 119 "Didn't find expected build ID from the map\n")) 98 - goto disable_pmu; 120 + goto cleanup; 99 121 100 - stack_trace_len = PERF_MAX_STACK_DEPTH 101 - * sizeof(struct bpf_stack_build_id); 122 + stack_trace_len = PERF_MAX_STACK_DEPTH * 123 + sizeof(struct bpf_stack_build_id); 102 124 err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); 103 125 CHECK(err, "compare_stack_ips stackmap vs. stack_amap", 104 126 "err %d errno %d\n", err, errno); 105 127 106 - disable_pmu: 107 - bpf_link__destroy(link); 108 - 109 - close_prog: 110 - bpf_object__close(obj); 128 + cleanup: 129 + test_stacktrace_build_id__destroy(skel); 111 130 }

+32 -50

tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 + #include "test_stacktrace_build_id.skel.h" 3 4 4 5 static __u64 read_perf_max_sample_freq(void) 5 6 { ··· 17 16 18 17 void test_stacktrace_build_id_nmi(void) 19 18 { 20 - int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; 21 - const char *prog_name = "tracepoint/random/urandom_read"; 22 - const char *file = "./test_stacktrace_build_id.o"; 23 - int err, pmu_fd, prog_fd; 19 + int control_map_fd, stackid_hmap_fd, stackmap_fd; 20 + struct test_stacktrace_build_id *skel; 21 + int err, pmu_fd; 24 22 struct perf_event_attr attr = { 25 23 .freq = 1, 26 24 .type = PERF_TYPE_HARDWARE, 27 25 .config = PERF_COUNT_HW_CPU_CYCLES, 28 26 }; 29 27 __u32 key, previous_key, val, duration = 0; 30 - struct bpf_program *prog; 31 - struct bpf_object *obj; 32 - struct bpf_link *link; 33 28 char buf[256]; 34 29 int i, j; 35 30 struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; ··· 35 38 attr.sample_freq = read_perf_max_sample_freq(); 36 39 37 40 retry: 38 - err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); 39 - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) 41 + skel = test_stacktrace_build_id__open(); 42 + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) 40 43 return; 41 44 42 - prog = bpf_object__find_program_by_title(obj, prog_name); 43 - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) 44 - goto close_prog; 45 + /* override program type */ 46 + bpf_program__set_perf_event(skel->progs.oncpu); 47 + 48 + err = test_stacktrace_build_id__load(skel); 49 + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) 50 + goto cleanup; 45 51 46 52 pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 47 53 0 /* cpu 0 */, -1 /* group id */, ··· 52 52 if (CHECK(pmu_fd < 0, "perf_event_open", 53 53 "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", 54 54 pmu_fd, errno)) 55 - goto close_prog; 55 + goto cleanup; 56 56 57 - link = bpf_program__attach_perf_event(prog, pmu_fd); 58 - if (CHECK(IS_ERR(link), "attach_perf_event", 59 - "err %ld\n", PTR_ERR(link))) { 57 + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, 58 + pmu_fd); 59 + if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", 60 + "err %ld\n", PTR_ERR(skel->links.oncpu))) { 60 61 close(pmu_fd); 61 - goto close_prog; 62 + goto cleanup; 62 63 } 63 64 64 65 /* find map fds */ 65 - control_map_fd = bpf_find_map(__func__, obj, "control_map"); 66 - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", 67 - "err %d errno %d\n", err, errno)) 68 - goto disable_pmu; 69 - 70 - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); 71 - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", 72 - "err %d errno %d\n", err, errno)) 73 - goto disable_pmu; 74 - 75 - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); 76 - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", 77 - err, errno)) 78 - goto disable_pmu; 79 - 80 - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); 81 - if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", 82 - "err %d errno %d\n", err, errno)) 83 - goto disable_pmu; 66 + control_map_fd = bpf_map__fd(skel->maps.control_map); 67 + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); 68 + stackmap_fd = bpf_map__fd(skel->maps.stackmap); 84 69 85 70 if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) 86 - goto disable_pmu; 71 + goto cleanup; 87 72 if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000"))) 88 - goto disable_pmu; 73 + goto cleanup; 89 74 /* disable stack trace collection */ 90 75 key = 0; 91 76 val = 1; ··· 82 97 err = compare_map_keys(stackid_hmap_fd, stackmap_fd); 83 98 if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", 84 99 "err %d errno %d\n", err, errno)) 85 - goto disable_pmu; 100 + goto cleanup; 86 101 87 102 err = compare_map_keys(stackmap_fd, stackid_hmap_fd); 88 103 if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", 89 104 "err %d errno %d\n", err, errno)) 90 - goto disable_pmu; 105 + goto cleanup; 91 106 92 107 err = extract_build_id(buf, 256); 93 108 94 109 if (CHECK(err, "get build_id with readelf", 95 110 "err %d errno %d\n", err, errno)) 96 - goto disable_pmu; 111 + goto cleanup; 97 112 98 113 err = bpf_map_get_next_key(stackmap_fd, NULL, &key); 99 114 if (CHECK(err, "get_next_key from stackmap", 100 115 "err %d, errno %d\n", err, errno)) 101 - goto disable_pmu; 116 + goto cleanup; 102 117 103 118 do { 104 119 char build_id[64]; ··· 106 121 err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); 107 122 if (CHECK(err, "lookup_elem from stackmap", 108 123 "err %d, errno %d\n", err, errno)) 109 - goto disable_pmu; 124 + goto cleanup; 110 125 for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) 111 126 if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && 112 127 id_offs[i].offset != 0) { ··· 124 139 * try it one more time. 125 140 */ 126 141 if (build_id_matches < 1 && retry--) { 127 - bpf_link__destroy(link); 128 - bpf_object__close(obj); 142 + test_stacktrace_build_id__destroy(skel); 129 143 printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", 130 144 __func__); 131 145 goto retry; ··· 132 148 133 149 if (CHECK(build_id_matches < 1, "build id match", 134 150 "Didn't find expected build ID from the map\n")) 135 - goto disable_pmu; 151 + goto cleanup; 136 152 137 153 /* 138 154 * We intentionally skip compare_stack_ips(). This is because we ··· 141 157 * BPF_STACK_BUILD_ID_IP; 142 158 */ 143 159 144 - disable_pmu: 145 - bpf_link__destroy(link); 146 - close_prog: 147 - bpf_object__close(obj); 160 + cleanup: 161 + test_stacktrace_build_id__destroy(skel); 148 162 }

+25

tools/testing/selftests/bpf/prog_tests/xdp_perf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + 4 + void test_xdp_perf(void) 5 + { 6 + const char *file = "./xdp_dummy.o"; 7 + __u32 duration, retval, size; 8 + struct bpf_object *obj; 9 + char in[128], out[128]; 10 + int err, prog_fd; 11 + 12 + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); 13 + if (CHECK_FAIL(err)) 14 + return; 15 + 16 + err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128, 17 + out, &size, &retval, &duration); 18 + 19 + CHECK(err || retval != XDP_PASS || size != 128, 20 + "xdp-perf", 21 + "err %d errno %d retval %d size %d\n", 22 + err, errno, retval, size); 23 + 24 + bpf_object__close(obj); 25 + }

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_arrays___equiv_zero_sz_arr x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_arrays___err_bad_zero_sz_arr x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_arrays___fixed_arr x) {}

+39

tools/testing/selftests/bpf/progs/core_reloc_types.h

··· 327 327 char b123; 328 328 int c1c; 329 329 int d00d; 330 + int f10c; 330 331 }; 331 332 332 333 struct core_reloc_arrays_substruct { ··· 340 339 char b[2][3][4]; 341 340 struct core_reloc_arrays_substruct c[3]; 342 341 struct core_reloc_arrays_substruct d[1][2]; 342 + struct core_reloc_arrays_substruct f[][2]; 343 343 }; 344 344 345 345 /* bigger array dimensions */ ··· 349 347 char b[3][4][5]; 350 348 struct core_reloc_arrays_substruct c[4]; 351 349 struct core_reloc_arrays_substruct d[2][3]; 350 + struct core_reloc_arrays_substruct f[1][3]; 352 351 }; 353 352 354 353 /* different size of array's value (struct) */ ··· 366 363 int d; 367 364 int __padding2; 368 365 } d[1][2]; 366 + struct { 367 + int __padding1; 368 + int c; 369 + int __padding2; 370 + } f[][2]; 371 + }; 372 + 373 + struct core_reloc_arrays___equiv_zero_sz_arr { 374 + int a[5]; 375 + char b[2][3][4]; 376 + struct core_reloc_arrays_substruct c[3]; 377 + struct core_reloc_arrays_substruct d[1][2]; 378 + /* equivalent to flexible array */ 379 + struct core_reloc_arrays_substruct f[0][2]; 380 + }; 381 + 382 + struct core_reloc_arrays___fixed_arr { 383 + int a[5]; 384 + char b[2][3][4]; 385 + struct core_reloc_arrays_substruct c[3]; 386 + struct core_reloc_arrays_substruct d[1][2]; 387 + /* not a flexible array anymore, but within access bounds */ 388 + struct core_reloc_arrays_substruct f[1][2]; 369 389 }; 370 390 371 391 struct core_reloc_arrays___err_too_small { ··· 396 370 char b[2][3][4]; 397 371 struct core_reloc_arrays_substruct c[3]; 398 372 struct core_reloc_arrays_substruct d[1][2]; 373 + struct core_reloc_arrays_substruct f[][2]; 399 374 }; 400 375 401 376 struct core_reloc_arrays___err_too_shallow { ··· 404 377 char b[2][3]; /* this one lacks one dimension */ 405 378 struct core_reloc_arrays_substruct c[3]; 406 379 struct core_reloc_arrays_substruct d[1][2]; 380 + struct core_reloc_arrays_substruct f[][2]; 407 381 }; 408 382 409 383 struct core_reloc_arrays___err_non_array { ··· 412 384 char b[2][3][4]; 413 385 struct core_reloc_arrays_substruct c[3]; 414 386 struct core_reloc_arrays_substruct d[1][2]; 387 + struct core_reloc_arrays_substruct f[][2]; 415 388 }; 416 389 417 390 struct core_reloc_arrays___err_wrong_val_type { 418 391 int a[5]; 419 392 char b[2][3][4]; 420 393 int c[3]; /* value is not a struct */ 394 + struct core_reloc_arrays_substruct d[1][2]; 395 + struct core_reloc_arrays_substruct f[][2]; 396 + }; 397 + 398 + struct core_reloc_arrays___err_bad_zero_sz_arr { 399 + /* zero-sized array, but not at the end */ 400 + struct core_reloc_arrays_substruct f[0][2]; 401 + int a[5]; 402 + char b[2][3][4]; 403 + struct core_reloc_arrays_substruct c[3]; 421 404 struct core_reloc_arrays_substruct d[1][2]; 422 405 }; 423 406

+12 -22

tools/testing/selftests/bpf/progs/test_attach_probe.c

··· 5 5 #include <linux/bpf.h> 6 6 #include "bpf_helpers.h" 7 7 8 - struct { 9 - __uint(type, BPF_MAP_TYPE_ARRAY); 10 - __uint(max_entries, 4); 11 - __type(key, int); 12 - __type(value, int); 13 - } results_map SEC(".maps"); 8 + int kprobe_res = 0; 9 + int kretprobe_res = 0; 10 + int uprobe_res = 0; 11 + int uretprobe_res = 0; 14 12 15 13 SEC("kprobe/sys_nanosleep") 16 - int handle_sys_nanosleep_entry(struct pt_regs *ctx) 14 + int handle_kprobe(struct pt_regs *ctx) 17 15 { 18 - const int key = 0, value = 1; 19 - 20 - bpf_map_update_elem(&results_map, &key, &value, 0); 16 + kprobe_res = 1; 21 17 return 0; 22 18 } 23 19 24 20 SEC("kretprobe/sys_nanosleep") 25 - int handle_sys_getpid_return(struct pt_regs *ctx) 21 + int handle_kretprobe(struct pt_regs *ctx) 26 22 { 27 - const int key = 1, value = 2; 28 - 29 - bpf_map_update_elem(&results_map, &key, &value, 0); 23 + kretprobe_res = 2; 30 24 return 0; 31 25 } 32 26 33 27 SEC("uprobe/trigger_func") 34 - int handle_uprobe_entry(struct pt_regs *ctx) 28 + int handle_uprobe(struct pt_regs *ctx) 35 29 { 36 - const int key = 2, value = 3; 37 - 38 - bpf_map_update_elem(&results_map, &key, &value, 0); 30 + uprobe_res = 3; 39 31 return 0; 40 32 } 41 33 42 34 SEC("uretprobe/trigger_func") 43 - int handle_uprobe_return(struct pt_regs *ctx) 35 + int handle_uretprobe(struct pt_regs *ctx) 44 36 { 45 - const int key = 3, value = 4; 46 - 47 - bpf_map_update_elem(&results_map, &key, &value, 0); 37 + uretprobe_res = 4; 48 38 return 0; 49 39 } 50 40

+62

tools/testing/selftests/bpf/progs/test_core_extern.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <stdint.h> 5 + #include <stdbool.h> 6 + #include <linux/ptrace.h> 7 + #include <linux/bpf.h> 8 + #include "bpf_helpers.h" 9 + 10 + /* non-existing BPF helper, to test dead code elimination */ 11 + static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; 12 + 13 + extern int LINUX_KERNEL_VERSION __kconfig; 14 + extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ 15 + extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; 16 + extern bool CONFIG_BOOL __kconfig __weak; 17 + extern char CONFIG_CHAR __kconfig __weak; 18 + extern uint16_t CONFIG_USHORT __kconfig __weak; 19 + extern int CONFIG_INT __kconfig __weak; 20 + extern uint64_t CONFIG_ULONG __kconfig __weak; 21 + extern const char CONFIG_STR[8] __kconfig __weak; 22 + extern uint64_t CONFIG_MISSING __kconfig __weak; 23 + 24 + uint64_t kern_ver = -1; 25 + uint64_t bpf_syscall = -1; 26 + uint64_t tristate_val = -1; 27 + uint64_t bool_val = -1; 28 + uint64_t char_val = -1; 29 + uint64_t ushort_val = -1; 30 + uint64_t int_val = -1; 31 + uint64_t ulong_val = -1; 32 + char str_val[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; 33 + uint64_t missing_val = -1; 34 + 35 + SEC("raw_tp/sys_enter") 36 + int handle_sys_enter(struct pt_regs *ctx) 37 + { 38 + int i; 39 + 40 + kern_ver = LINUX_KERNEL_VERSION; 41 + bpf_syscall = CONFIG_BPF_SYSCALL; 42 + tristate_val = CONFIG_TRISTATE; 43 + bool_val = CONFIG_BOOL; 44 + char_val = CONFIG_CHAR; 45 + ushort_val = CONFIG_USHORT; 46 + int_val = CONFIG_INT; 47 + ulong_val = CONFIG_ULONG; 48 + 49 + for (i = 0; i < sizeof(CONFIG_STR); i++) { 50 + str_val[i] = CONFIG_STR[i]; 51 + } 52 + 53 + if (CONFIG_MISSING) 54 + /* invalid, but dead code - never executed */ 55 + missing_val = bpf_missing_helper(ctx, 123); 56 + else 57 + missing_val = 0xDEADC0DE; 58 + 59 + return 0; 60 + } 61 + 62 + char _license[] SEC("license") = "GPL";

+4 -4

tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c

··· 18 18 char b123; 19 19 int c1c; 20 20 int d00d; 21 + int f01c; 21 22 }; 22 23 23 24 struct core_reloc_arrays_substruct { ··· 31 30 char b[2][3][4]; 32 31 struct core_reloc_arrays_substruct c[3]; 33 32 struct core_reloc_arrays_substruct d[1][2]; 33 + struct core_reloc_arrays_substruct f[][2]; 34 34 }; 35 35 36 36 #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) ··· 42 40 struct core_reloc_arrays *in = (void *)&data.in; 43 41 struct core_reloc_arrays_output *out = (void *)&data.out; 44 42 45 - /* in->a[2] */ 46 43 if (CORE_READ(&out->a2, &in->a[2])) 47 44 return 1; 48 - /* in->b[1][2][3] */ 49 45 if (CORE_READ(&out->b123, &in->b[1][2][3])) 50 46 return 1; 51 - /* in->c[1].c */ 52 47 if (CORE_READ(&out->c1c, &in->c[1].c)) 53 48 return 1; 54 - /* in->d[0][0].d */ 55 49 if (CORE_READ(&out->d00d, &in->d[0][0].d)) 50 + return 1; 51 + if (CORE_READ(&out->f01c, &in->f[0][1].c)) 56 52 return 1; 57 53 58 54 return 0;

+1 -1

tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c

··· 62 62 goto done; \ 63 63 }) 64 64 65 - SEC("select_by_skb_data") 65 + SEC("sk_reuseport") 66 66 int _select_by_skb_data(struct sk_reuseport_md *reuse_md) 67 67 { 68 68 __u32 linum, index = 0, flags = 0, index_zero = 0;

+6

tools/testing/selftests/bpf/progs/test_skb_ctx.c

··· 17 17 } 18 18 skb->priority++; 19 19 skb->tstamp++; 20 + skb->mark++; 21 + 22 + if (skb->wire_len != 100) 23 + return 1; 24 + if (skb->gso_segs != 8) 25 + return 1; 20 26 21 27 return 0; 22 28 }

+46

tools/testing/selftests/bpf/progs/test_skeleton.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + 4 + #include <stdbool.h> 5 + #include <linux/bpf.h> 6 + #include "bpf_helpers.h" 7 + 8 + struct s { 9 + int a; 10 + long long b; 11 + } __attribute__((packed)); 12 + 13 + int in1 = 0; 14 + long long in2 = 0; 15 + char in3 = '\0'; 16 + long long in4 __attribute__((aligned(64))) = 0; 17 + struct s in5 = {}; 18 + 19 + long long out2 = 0; 20 + char out3 = 0; 21 + long long out4 = 0; 22 + int out1 = 0; 23 + 24 + extern bool CONFIG_BPF_SYSCALL __kconfig; 25 + extern int LINUX_KERNEL_VERSION __kconfig; 26 + bool bpf_syscall = 0; 27 + int kern_ver = 0; 28 + 29 + SEC("raw_tp/sys_enter") 30 + int handler(const void *ctx) 31 + { 32 + static volatile struct s out5; 33 + 34 + out1 = in1; 35 + out2 = in2; 36 + out3 = in3; 37 + out4 = in4; 38 + out5 = in5; 39 + 40 + bpf_syscall = CONFIG_BPF_SYSCALL; 41 + kern_ver = LINUX_KERNEL_VERSION; 42 + 43 + return 0; 44 + } 45 + 46 + char _license[] SEC("license") = "GPL";

-571

tools/testing/selftests/bpf/test_cgroup_attach.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - 3 - /* eBPF example program: 4 - * 5 - * - Creates arraymap in kernel with 4 bytes keys and 8 byte values 6 - * 7 - * - Loads eBPF program 8 - * 9 - * The eBPF program accesses the map passed in to store two pieces of 10 - * information. The number of invocations of the program, which maps 11 - * to the number of packets received, is stored to key 0. Key 1 is 12 - * incremented on each iteration by the number of bytes stored in 13 - * the skb. The program also stores the number of received bytes 14 - * in the cgroup storage. 15 - * 16 - * - Attaches the new program to a cgroup using BPF_PROG_ATTACH 17 - * 18 - * - Every second, reads map[0] and map[1] to see how many bytes and 19 - * packets were seen on any socket of tasks in the given cgroup. 20 - */ 21 - 22 - #define _GNU_SOURCE 23 - 24 - #include <stdio.h> 25 - #include <stdlib.h> 26 - #include <assert.h> 27 - #include <sys/resource.h> 28 - #include <sys/time.h> 29 - #include <unistd.h> 30 - #include <linux/filter.h> 31 - 32 - #include <linux/bpf.h> 33 - #include <bpf/bpf.h> 34 - 35 - #include "bpf_util.h" 36 - #include "bpf_rlimit.h" 37 - #include "cgroup_helpers.h" 38 - 39 - #define FOO "/foo" 40 - #define BAR "/foo/bar/" 41 - #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" 42 - 43 - char bpf_log_buf[BPF_LOG_BUF_SIZE]; 44 - 45 - #ifdef DEBUG 46 - #define debug(args...) printf(args) 47 - #else 48 - #define debug(args...) 49 - #endif 50 - 51 - static int prog_load(int verdict) 52 - { 53 - int ret; 54 - struct bpf_insn prog[] = { 55 - BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ 56 - BPF_EXIT_INSN(), 57 - }; 58 - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); 59 - 60 - ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, 61 - prog, insns_cnt, "GPL", 0, 62 - bpf_log_buf, BPF_LOG_BUF_SIZE); 63 - 64 - if (ret < 0) { 65 - log_err("Loading program"); 66 - printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); 67 - return 0; 68 - } 69 - return ret; 70 - } 71 - 72 - static int test_foo_bar(void) 73 - { 74 - int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0; 75 - 76 - allow_prog = prog_load(1); 77 - if (!allow_prog) 78 - goto err; 79 - 80 - drop_prog = prog_load(0); 81 - if (!drop_prog) 82 - goto err; 83 - 84 - if (setup_cgroup_environment()) 85 - goto err; 86 - 87 - /* Create cgroup /foo, get fd, and join it */ 88 - foo = create_and_get_cgroup(FOO); 89 - if (foo < 0) 90 - goto err; 91 - 92 - if (join_cgroup(FOO)) 93 - goto err; 94 - 95 - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 96 - BPF_F_ALLOW_OVERRIDE)) { 97 - log_err("Attaching prog to /foo"); 98 - goto err; 99 - } 100 - 101 - debug("Attached DROP prog. This ping in cgroup /foo should fail...\n"); 102 - assert(system(PING_CMD) != 0); 103 - 104 - /* Create cgroup /foo/bar, get fd, and join it */ 105 - bar = create_and_get_cgroup(BAR); 106 - if (bar < 0) 107 - goto err; 108 - 109 - if (join_cgroup(BAR)) 110 - goto err; 111 - 112 - debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); 113 - assert(system(PING_CMD) != 0); 114 - 115 - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 116 - BPF_F_ALLOW_OVERRIDE)) { 117 - log_err("Attaching prog to /foo/bar"); 118 - goto err; 119 - } 120 - 121 - debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); 122 - assert(system(PING_CMD) == 0); 123 - 124 - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { 125 - log_err("Detaching program from /foo/bar"); 126 - goto err; 127 - } 128 - 129 - debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n" 130 - "This ping in cgroup /foo/bar should fail...\n"); 131 - assert(system(PING_CMD) != 0); 132 - 133 - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 134 - BPF_F_ALLOW_OVERRIDE)) { 135 - log_err("Attaching prog to /foo/bar"); 136 - goto err; 137 - } 138 - 139 - if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { 140 - log_err("Detaching program from /foo"); 141 - goto err; 142 - } 143 - 144 - debug("Attached PASS from /foo/bar and detached DROP from /foo.\n" 145 - "This ping in cgroup /foo/bar should pass...\n"); 146 - assert(system(PING_CMD) == 0); 147 - 148 - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 149 - BPF_F_ALLOW_OVERRIDE)) { 150 - log_err("Attaching prog to /foo/bar"); 151 - goto err; 152 - } 153 - 154 - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { 155 - errno = 0; 156 - log_err("Unexpected success attaching prog to /foo/bar"); 157 - goto err; 158 - } 159 - 160 - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { 161 - log_err("Detaching program from /foo/bar"); 162 - goto err; 163 - } 164 - 165 - if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { 166 - errno = 0; 167 - log_err("Unexpected success in double detach from /foo"); 168 - goto err; 169 - } 170 - 171 - if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { 172 - log_err("Attaching non-overridable prog to /foo"); 173 - goto err; 174 - } 175 - 176 - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { 177 - errno = 0; 178 - log_err("Unexpected success attaching non-overridable prog to /foo/bar"); 179 - goto err; 180 - } 181 - 182 - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 183 - BPF_F_ALLOW_OVERRIDE)) { 184 - errno = 0; 185 - log_err("Unexpected success attaching overridable prog to /foo/bar"); 186 - goto err; 187 - } 188 - 189 - if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 190 - BPF_F_ALLOW_OVERRIDE)) { 191 - errno = 0; 192 - log_err("Unexpected success attaching overridable prog to /foo"); 193 - goto err; 194 - } 195 - 196 - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { 197 - log_err("Attaching different non-overridable prog to /foo"); 198 - goto err; 199 - } 200 - 201 - goto out; 202 - 203 - err: 204 - rc = 1; 205 - 206 - out: 207 - close(foo); 208 - close(bar); 209 - cleanup_cgroup_environment(); 210 - if (!rc) 211 - printf("#override:PASS\n"); 212 - else 213 - printf("#override:FAIL\n"); 214 - return rc; 215 - } 216 - 217 - static int map_fd = -1; 218 - 219 - static int prog_load_cnt(int verdict, int val) 220 - { 221 - int cgroup_storage_fd, percpu_cgroup_storage_fd; 222 - 223 - if (map_fd < 0) 224 - map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); 225 - if (map_fd < 0) { 226 - printf("failed to create map '%s'\n", strerror(errno)); 227 - return -1; 228 - } 229 - 230 - cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, 231 - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); 232 - if (cgroup_storage_fd < 0) { 233 - printf("failed to create map '%s'\n", strerror(errno)); 234 - return -1; 235 - } 236 - 237 - percpu_cgroup_storage_fd = bpf_create_map( 238 - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, 239 - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); 240 - if (percpu_cgroup_storage_fd < 0) { 241 - printf("failed to create map '%s'\n", strerror(errno)); 242 - return -1; 243 - } 244 - 245 - struct bpf_insn prog[] = { 246 - BPF_MOV32_IMM(BPF_REG_0, 0), 247 - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ 248 - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), 249 - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ 250 - BPF_LD_MAP_FD(BPF_REG_1, map_fd), 251 - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), 252 - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), 253 - BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ 254 - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ 255 - 256 - BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), 257 - BPF_MOV64_IMM(BPF_REG_2, 0), 258 - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), 259 - BPF_MOV64_IMM(BPF_REG_1, val), 260 - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), 261 - 262 - BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), 263 - BPF_MOV64_IMM(BPF_REG_2, 0), 264 - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), 265 - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), 266 - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), 267 - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), 268 - 269 - BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ 270 - BPF_EXIT_INSN(), 271 - }; 272 - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); 273 - int ret; 274 - 275 - ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, 276 - prog, insns_cnt, "GPL", 0, 277 - bpf_log_buf, BPF_LOG_BUF_SIZE); 278 - 279 - if (ret < 0) { 280 - log_err("Loading program"); 281 - printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); 282 - return 0; 283 - } 284 - close(cgroup_storage_fd); 285 - return ret; 286 - } 287 - 288 - 289 - static int test_multiprog(void) 290 - { 291 - __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; 292 - int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; 293 - int drop_prog, allow_prog[6] = {}, rc = 0; 294 - unsigned long long value; 295 - int i = 0; 296 - 297 - for (i = 0; i < 6; i++) { 298 - allow_prog[i] = prog_load_cnt(1, 1 << i); 299 - if (!allow_prog[i]) 300 - goto err; 301 - } 302 - drop_prog = prog_load_cnt(0, 1); 303 - if (!drop_prog) 304 - goto err; 305 - 306 - if (setup_cgroup_environment()) 307 - goto err; 308 - 309 - cg1 = create_and_get_cgroup("/cg1"); 310 - if (cg1 < 0) 311 - goto err; 312 - cg2 = create_and_get_cgroup("/cg1/cg2"); 313 - if (cg2 < 0) 314 - goto err; 315 - cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); 316 - if (cg3 < 0) 317 - goto err; 318 - cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); 319 - if (cg4 < 0) 320 - goto err; 321 - cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); 322 - if (cg5 < 0) 323 - goto err; 324 - 325 - if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) 326 - goto err; 327 - 328 - if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 329 - BPF_F_ALLOW_MULTI)) { 330 - log_err("Attaching prog to cg1"); 331 - goto err; 332 - } 333 - if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 334 - BPF_F_ALLOW_MULTI)) { 335 - log_err("Unexpected success attaching the same prog to cg1"); 336 - goto err; 337 - } 338 - if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 339 - BPF_F_ALLOW_MULTI)) { 340 - log_err("Attaching prog2 to cg1"); 341 - goto err; 342 - } 343 - if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 344 - BPF_F_ALLOW_OVERRIDE)) { 345 - log_err("Attaching prog to cg2"); 346 - goto err; 347 - } 348 - if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 349 - BPF_F_ALLOW_MULTI)) { 350 - log_err("Attaching prog to cg3"); 351 - goto err; 352 - } 353 - if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 354 - BPF_F_ALLOW_OVERRIDE)) { 355 - log_err("Attaching prog to cg4"); 356 - goto err; 357 - } 358 - if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) { 359 - log_err("Attaching prog to cg5"); 360 - goto err; 361 - } 362 - assert(system(PING_CMD) == 0); 363 - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); 364 - assert(value == 1 + 2 + 8 + 32); 365 - 366 - /* query the number of effective progs in cg5 */ 367 - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, 368 - NULL, NULL, &prog_cnt) == 0); 369 - assert(prog_cnt == 4); 370 - /* retrieve prog_ids of effective progs in cg5 */ 371 - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, 372 - &attach_flags, prog_ids, &prog_cnt) == 0); 373 - assert(prog_cnt == 4); 374 - assert(attach_flags == 0); 375 - saved_prog_id = prog_ids[0]; 376 - /* check enospc handling */ 377 - prog_ids[0] = 0; 378 - prog_cnt = 2; 379 - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, 380 - &attach_flags, prog_ids, &prog_cnt) == -1 && 381 - errno == ENOSPC); 382 - assert(prog_cnt == 4); 383 - /* check that prog_ids are returned even when buffer is too small */ 384 - assert(prog_ids[0] == saved_prog_id); 385 - /* retrieve prog_id of single attached prog in cg5 */ 386 - prog_ids[0] = 0; 387 - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, 388 - NULL, prog_ids, &prog_cnt) == 0); 389 - assert(prog_cnt == 1); 390 - assert(prog_ids[0] == saved_prog_id); 391 - 392 - /* detach bottom program and ping again */ 393 - if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { 394 - log_err("Detaching prog from cg5"); 395 - goto err; 396 - } 397 - value = 0; 398 - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); 399 - assert(system(PING_CMD) == 0); 400 - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); 401 - assert(value == 1 + 2 + 8 + 16); 402 - 403 - /* detach 3rd from bottom program and ping again */ 404 - errno = 0; 405 - if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) { 406 - log_err("Unexpected success on detach from cg3"); 407 - goto err; 408 - } 409 - if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) { 410 - log_err("Detaching from cg3"); 411 - goto err; 412 - } 413 - value = 0; 414 - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); 415 - assert(system(PING_CMD) == 0); 416 - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); 417 - assert(value == 1 + 2 + 16); 418 - 419 - /* detach 2nd from bottom program and ping again */ 420 - if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) { 421 - log_err("Detaching prog from cg4"); 422 - goto err; 423 - } 424 - value = 0; 425 - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); 426 - assert(system(PING_CMD) == 0); 427 - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); 428 - assert(value == 1 + 2 + 4); 429 - 430 - prog_cnt = 4; 431 - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, 432 - &attach_flags, prog_ids, &prog_cnt) == 0); 433 - assert(prog_cnt == 3); 434 - assert(attach_flags == 0); 435 - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, 436 - NULL, prog_ids, &prog_cnt) == 0); 437 - assert(prog_cnt == 0); 438 - goto out; 439 - err: 440 - rc = 1; 441 - 442 - out: 443 - for (i = 0; i < 6; i++) 444 - if (allow_prog[i] > 0) 445 - close(allow_prog[i]); 446 - close(cg1); 447 - close(cg2); 448 - close(cg3); 449 - close(cg4); 450 - close(cg5); 451 - cleanup_cgroup_environment(); 452 - if (!rc) 453 - printf("#multi:PASS\n"); 454 - else 455 - printf("#multi:FAIL\n"); 456 - return rc; 457 - } 458 - 459 - static int test_autodetach(void) 460 - { 461 - __u32 prog_cnt = 4, attach_flags; 462 - int allow_prog[2] = {0}; 463 - __u32 prog_ids[2] = {0}; 464 - int cg = 0, i, rc = -1; 465 - void *ptr = NULL; 466 - int attempts; 467 - 468 - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { 469 - allow_prog[i] = prog_load_cnt(1, 1 << i); 470 - if (!allow_prog[i]) 471 - goto err; 472 - } 473 - 474 - if (setup_cgroup_environment()) 475 - goto err; 476 - 477 - /* create a cgroup, attach two programs and remember their ids */ 478 - cg = create_and_get_cgroup("/cg_autodetach"); 479 - if (cg < 0) 480 - goto err; 481 - 482 - if (join_cgroup("/cg_autodetach")) 483 - goto err; 484 - 485 - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { 486 - if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS, 487 - BPF_F_ALLOW_MULTI)) { 488 - log_err("Attaching prog[%d] to cg:egress", i); 489 - goto err; 490 - } 491 - } 492 - 493 - /* make sure that programs are attached and run some traffic */ 494 - assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags, 495 - prog_ids, &prog_cnt) == 0); 496 - assert(system(PING_CMD) == 0); 497 - 498 - /* allocate some memory (4Mb) to pin the original cgroup */ 499 - ptr = malloc(4 * (1 << 20)); 500 - if (!ptr) 501 - goto err; 502 - 503 - /* close programs and cgroup fd */ 504 - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { 505 - close(allow_prog[i]); 506 - allow_prog[i] = 0; 507 - } 508 - 509 - close(cg); 510 - cg = 0; 511 - 512 - /* leave the cgroup and remove it. don't detach programs */ 513 - cleanup_cgroup_environment(); 514 - 515 - /* wait for the asynchronous auto-detachment. 516 - * wait for no more than 5 sec and give up. 517 - */ 518 - for (i = 0; i < ARRAY_SIZE(prog_ids); i++) { 519 - for (attempts = 5; attempts >= 0; attempts--) { 520 - int fd = bpf_prog_get_fd_by_id(prog_ids[i]); 521 - 522 - if (fd < 0) 523 - break; 524 - 525 - /* don't leave the fd open */ 526 - close(fd); 527 - 528 - if (!attempts) 529 - goto err; 530 - 531 - sleep(1); 532 - } 533 - } 534 - 535 - rc = 0; 536 - err: 537 - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) 538 - if (allow_prog[i] > 0) 539 - close(allow_prog[i]); 540 - if (cg) 541 - close(cg); 542 - free(ptr); 543 - cleanup_cgroup_environment(); 544 - if (!rc) 545 - printf("#autodetach:PASS\n"); 546 - else 547 - printf("#autodetach:FAIL\n"); 548 - return rc; 549 - } 550 - 551 - int main(void) 552 - { 553 - int (*tests[])(void) = { 554 - test_foo_bar, 555 - test_multiprog, 556 - test_autodetach, 557 - }; 558 - int errors = 0; 559 - int i; 560 - 561 - for (i = 0; i < ARRAY_SIZE(tests); i++) 562 - if (tests[i]()) 563 - errors++; 564 - 565 - if (errors) 566 - printf("test_cgroup_attach:FAIL\n"); 567 - else 568 - printf("test_cgroup_attach:PASS\n"); 569 - 570 - return errors ? EXIT_FAILURE : EXIT_SUCCESS; 571 - }

+10

tools/testing/selftests/bpf/test_cpp.cpp

··· 1 1 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + #include <iostream> 2 3 #include "libbpf.h" 3 4 #include "bpf.h" 4 5 #include "btf.h" 6 + #include "test_core_extern.skel.h" 5 7 6 8 /* do nothing, just make sure we can link successfully */ 7 9 8 10 int main(int argc, char *argv[]) 9 11 { 12 + struct test_core_extern *skel; 13 + 10 14 /* libbpf.h */ 11 15 libbpf_set_print(NULL); 12 16 ··· 19 15 20 16 /* btf.h */ 21 17 btf__new(NULL, 0); 18 + 19 + /* BPF skeleton */ 20 + skel = test_core_extern__open_and_load(); 21 + test_core_extern__destroy(skel); 22 + 23 + std::cout << "DONE!" << std::endl; 22 24 23 25 return 0; 24 26 }

+4

tools/testing/selftests/bpf/test_progs.h

··· 100 100 101 101 #define _CHECK(condition, tag, duration, format...) ({ \ 102 102 int __ret = !!(condition); \ 103 + int __save_errno = errno; \ 103 104 if (__ret) { \ 104 105 test__fail(); \ 105 106 printf("%s:FAIL:%s ", __func__, tag); \ ··· 109 108 printf("%s:PASS:%s %d nsec\n", \ 110 109 __func__, tag, duration); \ 111 110 } \ 111 + errno = __save_errno; \ 112 112 __ret; \ 113 113 }) 114 114 115 115 #define CHECK_FAIL(condition) ({ \ 116 116 int __ret = !!(condition); \ 117 + int __save_errno = errno; \ 117 118 if (__ret) { \ 118 119 test__fail(); \ 119 120 printf("%s:FAIL:%d\n", __func__, __LINE__); \ 120 121 } \ 122 + errno = __save_errno; \ 121 123 __ret; \ 122 124 }) 123 125

+289 -225

tools/testing/selftests/bpf/test_select_reuseport.c tools/testing/selftests/bpf/prog_tests/select_reuseport.c

··· 20 20 #include <bpf/libbpf.h> 21 21 #include "bpf_rlimit.h" 22 22 #include "bpf_util.h" 23 + 24 + #include "test_progs.h" 23 25 #include "test_select_reuseport_common.h" 24 26 27 + #define MAX_TEST_NAME 80 25 28 #define MIN_TCPHDR_LEN 20 26 29 #define UDPHDR_LEN 8 27 30 ··· 35 32 static int result_map, tmp_index_ovr_map, linum_map, data_check_map; 36 33 static enum result expected_results[NR_RESULTS]; 37 34 static int sk_fds[REUSEPORT_ARRAY_SIZE]; 38 - static int reuseport_array, outer_map; 35 + static int reuseport_array = -1, outer_map = -1; 39 36 static int select_by_skb_data_prog; 40 - static int saved_tcp_syncookie; 37 + static int saved_tcp_syncookie = -1; 41 38 static struct bpf_object *obj; 42 - static int saved_tcp_fo; 39 + static int saved_tcp_fo = -1; 43 40 static __u32 index_zero; 44 41 static int epfd; 45 42 ··· 49 46 sa_family_t family; 50 47 } srv_sa; 51 48 52 - #define CHECK(condition, tag, format...) ({ \ 53 - int __ret = !!(condition); \ 54 - if (__ret) { \ 55 - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ 56 - printf(format); \ 57 - exit(-1); \ 49 + #define RET_IF(condition, tag, format...) ({ \ 50 + if (CHECK_FAIL(condition)) { \ 51 + printf(tag " " format); \ 52 + return; \ 58 53 } \ 59 54 }) 60 55 61 - static void create_maps(void) 56 + #define RET_ERR(condition, tag, format...) ({ \ 57 + if (CHECK_FAIL(condition)) { \ 58 + printf(tag " " format); \ 59 + return -1; \ 60 + } \ 61 + }) 62 + 63 + static int create_maps(void) 62 64 { 63 65 struct bpf_create_map_attr attr = {}; 64 66 ··· 75 67 attr.max_entries = REUSEPORT_ARRAY_SIZE; 76 68 77 69 reuseport_array = bpf_create_map_xattr(&attr); 78 - CHECK(reuseport_array == -1, "creating reuseport_array", 79 - "reuseport_array:%d errno:%d\n", reuseport_array, errno); 70 + RET_ERR(reuseport_array == -1, "creating reuseport_array", 71 + "reuseport_array:%d errno:%d\n", reuseport_array, errno); 80 72 81 73 /* Creating outer_map */ 82 74 attr.name = "outer_map"; ··· 86 78 attr.max_entries = 1; 87 79 attr.inner_map_fd = reuseport_array; 88 80 outer_map = bpf_create_map_xattr(&attr); 89 - CHECK(outer_map == -1, "creating outer_map", 90 - "outer_map:%d errno:%d\n", outer_map, errno); 81 + RET_ERR(outer_map == -1, "creating outer_map", 82 + "outer_map:%d errno:%d\n", outer_map, errno); 83 + 84 + return 0; 91 85 } 92 86 93 - static void prepare_bpf_obj(void) 87 + static int prepare_bpf_obj(void) 94 88 { 95 89 struct bpf_program *prog; 96 90 struct bpf_map *map; 97 91 int err; 98 - struct bpf_object_open_attr attr = { 99 - .file = "test_select_reuseport_kern.o", 100 - .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, 101 - }; 102 92 103 - obj = bpf_object__open_xattr(&attr); 104 - CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", 105 - "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); 106 - 107 - prog = bpf_program__next(NULL, obj); 108 - CHECK(!prog, "get first bpf_program", "!prog\n"); 109 - bpf_program__set_type(prog, attr.prog_type); 93 + obj = bpf_object__open("test_select_reuseport_kern.o"); 94 + RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", 95 + "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); 110 96 111 97 map = bpf_object__find_map_by_name(obj, "outer_map"); 112 - CHECK(!map, "find outer_map", "!map\n"); 98 + RET_ERR(!map, "find outer_map", "!map\n"); 113 99 err = bpf_map__reuse_fd(map, outer_map); 114 - CHECK(err, "reuse outer_map", "err:%d\n", err); 100 + RET_ERR(err, "reuse outer_map", "err:%d\n", err); 115 101 116 102 err = bpf_object__load(obj); 117 - CHECK(err, "load bpf_object", "err:%d\n", err); 103 + RET_ERR(err, "load bpf_object", "err:%d\n", err); 118 104 105 + prog = bpf_program__next(NULL, obj); 106 + RET_ERR(!prog, "get first bpf_program", "!prog\n"); 119 107 select_by_skb_data_prog = bpf_program__fd(prog); 120 - CHECK(select_by_skb_data_prog == -1, "get prog fd", 121 - "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); 108 + RET_ERR(select_by_skb_data_prog == -1, "get prog fd", 109 + "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); 122 110 123 111 map = bpf_object__find_map_by_name(obj, "result_map"); 124 - CHECK(!map, "find result_map", "!map\n"); 112 + RET_ERR(!map, "find result_map", "!map\n"); 125 113 result_map = bpf_map__fd(map); 126 - CHECK(result_map == -1, "get result_map fd", 127 - "result_map:%d\n", result_map); 114 + RET_ERR(result_map == -1, "get result_map fd", 115 + "result_map:%d\n", result_map); 128 116 129 117 map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map"); 130 - CHECK(!map, "find tmp_index_ovr_map", "!map\n"); 118 + RET_ERR(!map, "find tmp_index_ovr_map\n", "!map"); 131 119 tmp_index_ovr_map = bpf_map__fd(map); 132 - CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", 133 - "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); 120 + RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", 121 + "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); 134 122 135 123 map = bpf_object__find_map_by_name(obj, "linum_map"); 136 - CHECK(!map, "find linum_map", "!map\n"); 124 + RET_ERR(!map, "find linum_map", "!map\n"); 137 125 linum_map = bpf_map__fd(map); 138 - CHECK(linum_map == -1, "get linum_map fd", 139 - "linum_map:%d\n", linum_map); 126 + RET_ERR(linum_map == -1, "get linum_map fd", 127 + "linum_map:%d\n", linum_map); 140 128 141 129 map = bpf_object__find_map_by_name(obj, "data_check_map"); 142 - CHECK(!map, "find data_check_map", "!map\n"); 130 + RET_ERR(!map, "find data_check_map", "!map\n"); 143 131 data_check_map = bpf_map__fd(map); 144 - CHECK(data_check_map == -1, "get data_check_map fd", 145 - "data_check_map:%d\n", data_check_map); 132 + RET_ERR(data_check_map == -1, "get data_check_map fd", 133 + "data_check_map:%d\n", data_check_map); 134 + 135 + return 0; 146 136 } 147 137 148 138 static void sa46_init_loopback(union sa46 *sa, sa_family_t family) ··· 169 163 int fd, ret; 170 164 171 165 fd = open(sysctl, 0); 172 - CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n", 173 - sysctl, fd, errno); 166 + RET_ERR(fd == -1, "open(sysctl)", 167 + "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno); 174 168 175 169 ret = read(fd, buf, sizeof(buf)); 176 - CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n", 177 - sysctl, ret, errno); 178 - close(fd); 170 + RET_ERR(ret <= 0, "read(sysctl)", 171 + "sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno); 179 172 173 + close(fd); 180 174 return atoi(buf); 181 175 } 182 176 183 - static void write_int_sysctl(const char *sysctl, int v) 177 + static int write_int_sysctl(const char *sysctl, int v) 184 178 { 185 179 int fd, ret, size; 186 180 char buf[16]; 187 181 188 182 fd = open(sysctl, O_RDWR); 189 - CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n", 190 - sysctl, fd, errno); 183 + RET_ERR(fd == -1, "open(sysctl)", 184 + "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno); 191 185 192 186 size = snprintf(buf, sizeof(buf), "%d", v); 193 187 ret = write(fd, buf, size); 194 - CHECK(ret != size, "write(sysctl)", 195 - "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno); 188 + RET_ERR(ret != size, "write(sysctl)", 189 + "sysctl:%s ret:%d size:%d errno:%d\n", 190 + sysctl, ret, size, errno); 191 + 196 192 close(fd); 193 + return 0; 197 194 } 198 195 199 196 static void restore_sysctls(void) 200 197 { 201 - write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo); 202 - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie); 198 + if (saved_tcp_fo != -1) 199 + write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo); 200 + if (saved_tcp_syncookie != -1) 201 + write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie); 203 202 } 204 203 205 - static void enable_fastopen(void) 204 + static int enable_fastopen(void) 206 205 { 207 206 int fo; 208 207 209 208 fo = read_int_sysctl(TCP_FO_SYSCTL); 210 - write_int_sysctl(TCP_FO_SYSCTL, fo | 7); 209 + if (fo < 0) 210 + return -1; 211 + 212 + return write_int_sysctl(TCP_FO_SYSCTL, fo | 7); 211 213 } 212 214 213 - static void enable_syncookie(void) 215 + static int enable_syncookie(void) 214 216 { 215 - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2); 217 + return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2); 216 218 } 217 219 218 - static void disable_syncookie(void) 220 + static int disable_syncookie(void) 219 221 { 220 - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0); 222 + return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0); 221 223 } 222 224 223 - static __u32 get_linum(void) 225 + static long get_linum(void) 224 226 { 225 227 __u32 linum; 226 228 int err; 227 229 228 230 err = bpf_map_lookup_elem(linum_map, &index_zero, &linum); 229 - CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", 230 - err, errno); 231 + RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", 232 + err, errno); 231 233 232 234 return linum; 233 235 } ··· 251 237 addrlen = sizeof(cli_sa); 252 238 err = getsockname(cli_fd, (struct sockaddr *)&cli_sa, 253 239 &addrlen); 254 - CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", 255 - err, errno); 240 + RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", 241 + err, errno); 256 242 257 243 err = bpf_map_lookup_elem(data_check_map, &index_zero, &result); 258 - CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", 259 - err, errno); 244 + RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", 245 + err, errno); 260 246 261 247 if (type == SOCK_STREAM) { 262 248 expected.len = MIN_TCPHDR_LEN; ··· 298 284 printf("expected: (0x%x, %u, %u)\n", 299 285 expected.eth_protocol, expected.ip_protocol, 300 286 expected.bind_inany); 301 - CHECK(1, "data_check result != expected", 302 - "bpf_prog_linum:%u\n", get_linum()); 287 + RET_IF(1, "data_check result != expected", 288 + "bpf_prog_linum:%ld\n", get_linum()); 303 289 } 304 290 305 - CHECK(!result.hash, "data_check result.hash empty", 306 - "result.hash:%u", result.hash); 291 + RET_IF(!result.hash, "data_check result.hash empty", 292 + "result.hash:%u", result.hash); 307 293 308 294 expected.len += cmd ? sizeof(*cmd) : 0; 309 295 if (type == SOCK_STREAM) 310 - CHECK(expected.len > result.len, "expected.len > result.len", 311 - "expected.len:%u result.len:%u bpf_prog_linum:%u\n", 312 - expected.len, result.len, get_linum()); 296 + RET_IF(expected.len > result.len, "expected.len > result.len", 297 + "expected.len:%u result.len:%u bpf_prog_linum:%ld\n", 298 + expected.len, result.len, get_linum()); 313 299 else 314 - CHECK(expected.len != result.len, "expected.len != result.len", 315 - "expected.len:%u result.len:%u bpf_prog_linum:%u\n", 316 - expected.len, result.len, get_linum()); 300 + RET_IF(expected.len != result.len, "expected.len != result.len", 301 + "expected.len:%u result.len:%u bpf_prog_linum:%ld\n", 302 + expected.len, result.len, get_linum()); 317 303 } 318 304 319 305 static void check_results(void) ··· 324 310 325 311 for (i = 0; i < NR_RESULTS; i++) { 326 312 err = bpf_map_lookup_elem(result_map, &i, &results[i]); 327 - CHECK(err == -1, "lookup_elem(result_map)", 328 - "i:%u err:%d errno:%d\n", i, err, errno); 313 + RET_IF(err == -1, "lookup_elem(result_map)", 314 + "i:%u err:%d errno:%d\n", i, err, errno); 329 315 } 330 316 331 317 for (i = 0; i < NR_RESULTS; i++) { ··· 351 337 printf(", %u", expected_results[i]); 352 338 printf("]\n"); 353 339 354 - CHECK(expected_results[broken] != results[broken], 355 - "unexpected result", 356 - "expected_results[%u] != results[%u] bpf_prog_linum:%u\n", 357 - broken, broken, get_linum()); 340 + RET_IF(expected_results[broken] != results[broken], 341 + "unexpected result", 342 + "expected_results[%u] != results[%u] bpf_prog_linum:%ld\n", 343 + broken, broken, get_linum()); 358 344 } 359 345 360 346 static int send_data(int type, sa_family_t family, void *data, size_t len, ··· 364 350 int fd, err; 365 351 366 352 fd = socket(family, type, 0); 367 - CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); 353 + RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); 368 354 369 355 sa46_init_loopback(&cli_sa, family); 370 356 err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa)); 371 - CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); 357 + RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); 372 358 373 359 err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa, 374 360 sizeof(srv_sa)); 375 - CHECK(err != len && expected >= PASS, 376 - "sendto()", "family:%u err:%d errno:%d expected:%d\n", 377 - family, err, errno, expected); 361 + RET_ERR(err != len && expected >= PASS, 362 + "sendto()", "family:%u err:%d errno:%d expected:%d\n", 363 + family, err, errno, expected); 378 364 379 365 return fd; 380 366 } ··· 389 375 390 376 cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0, 391 377 expected); 378 + if (cli_fd < 0) 379 + return; 392 380 nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0); 393 - CHECK((nev <= 0 && expected >= PASS) || 394 - (nev > 0 && expected < PASS), 395 - "nev <> expected", 396 - "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n", 397 - nev, expected, type, family, 398 - cmd ? cmd->reuseport_index : -1, 399 - cmd ? cmd->pass_on_failure : -1); 381 + RET_IF((nev <= 0 && expected >= PASS) || 382 + (nev > 0 && expected < PASS), 383 + "nev <> expected", 384 + "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n", 385 + nev, expected, type, family, 386 + cmd ? cmd->reuseport_index : -1, 387 + cmd ? cmd->pass_on_failure : -1); 400 388 check_results(); 401 389 check_data(type, family, cmd, cli_fd); 402 390 403 391 if (expected < PASS) 404 392 return; 405 393 406 - CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT && 407 - cmd->reuseport_index != ev.data.u32, 408 - "check cmd->reuseport_index", 409 - "cmd:(%u, %u) ev.data.u32:%u\n", 410 - cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32); 394 + RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT && 395 + cmd->reuseport_index != ev.data.u32, 396 + "check cmd->reuseport_index", 397 + "cmd:(%u, %u) ev.data.u32:%u\n", 398 + cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32); 411 399 412 400 srv_fd = sk_fds[ev.data.u32]; 413 401 if (type == SOCK_STREAM) { 414 402 int new_fd = accept(srv_fd, NULL, 0); 415 403 416 - CHECK(new_fd == -1, "accept(srv_fd)", 417 - "ev.data.u32:%u new_fd:%d errno:%d\n", 418 - ev.data.u32, new_fd, errno); 404 + RET_IF(new_fd == -1, "accept(srv_fd)", 405 + "ev.data.u32:%u new_fd:%d errno:%d\n", 406 + ev.data.u32, new_fd, errno); 419 407 420 408 nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT); 421 - CHECK(nread != sizeof(rcv_cmd), 422 - "recv(new_fd)", 423 - "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", 424 - ev.data.u32, nread, sizeof(rcv_cmd), errno); 409 + RET_IF(nread != sizeof(rcv_cmd), 410 + "recv(new_fd)", 411 + "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", 412 + ev.data.u32, nread, sizeof(rcv_cmd), errno); 425 413 426 414 close(new_fd); 427 415 } else { 428 416 nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT); 429 - CHECK(nread != sizeof(rcv_cmd), 430 - "recv(sk_fds)", 431 - "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", 432 - ev.data.u32, nread, sizeof(rcv_cmd), errno); 417 + RET_IF(nread != sizeof(rcv_cmd), 418 + "recv(sk_fds)", 419 + "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", 420 + ev.data.u32, nread, sizeof(rcv_cmd), errno); 433 421 } 434 422 435 423 close(cli_fd); ··· 444 428 .pass_on_failure = 0, 445 429 }; 446 430 447 - printf("%s: ", __func__); 448 431 expected_results[DROP_ERR_INNER_MAP]++; 449 432 do_test(type, family, &cmd, DROP_ERR_INNER_MAP); 450 - printf("OK\n"); 451 433 } 452 434 453 435 static void test_err_skb_data(int type, sa_family_t family) 454 436 { 455 - printf("%s: ", __func__); 456 437 expected_results[DROP_ERR_SKB_DATA]++; 457 438 do_test(type, family, NULL, DROP_ERR_SKB_DATA); 458 - printf("OK\n"); 459 439 } 460 440 461 441 static void test_err_sk_select_port(int type, sa_family_t family) ··· 461 449 .pass_on_failure = 0, 462 450 }; 463 451 464 - printf("%s: ", __func__); 465 452 expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++; 466 453 do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT); 467 - printf("OK\n"); 468 454 } 469 455 470 456 static void test_pass(int type, sa_family_t family) ··· 470 460 struct cmd cmd; 471 461 int i; 472 462 473 - printf("%s: ", __func__); 474 463 cmd.pass_on_failure = 0; 475 464 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) { 476 465 expected_results[PASS]++; 477 466 cmd.reuseport_index = i; 478 467 do_test(type, family, &cmd, PASS); 479 468 } 480 - printf("OK\n"); 481 469 } 482 470 483 471 static void test_syncookie(int type, sa_family_t family) ··· 489 481 if (type != SOCK_STREAM) 490 482 return; 491 483 492 - printf("%s: ", __func__); 493 484 /* 494 485 * +1 for TCP-SYN and 495 486 * +1 for the TCP-ACK (ack the syncookie) ··· 504 497 */ 505 498 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, 506 499 &tmp_index, BPF_ANY); 507 - CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", 508 - "err:%d errno:%d\n", err, errno); 500 + RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", 501 + "err:%d errno:%d\n", err, errno); 509 502 do_test(type, family, &cmd, PASS); 510 503 err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero, 511 504 &tmp_index); 512 - CHECK(err == -1 || tmp_index != -1, 513 - "lookup_elem(tmp_index_ovr_map)", 514 - "err:%d errno:%d tmp_index:%d\n", 515 - err, errno, tmp_index); 505 + RET_IF(err == -1 || tmp_index != -1, 506 + "lookup_elem(tmp_index_ovr_map)", 507 + "err:%d errno:%d tmp_index:%d\n", 508 + err, errno, tmp_index); 516 509 disable_syncookie(); 517 - printf("OK\n"); 518 510 } 519 511 520 512 static void test_pass_on_err(int type, sa_family_t family) ··· 523 517 .pass_on_failure = 1, 524 518 }; 525 519 526 - printf("%s: ", __func__); 527 520 expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1; 528 521 do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT); 529 - printf("OK\n"); 530 522 } 531 523 532 524 static void test_detach_bpf(int type, sa_family_t family) ··· 536 532 struct cmd cmd = {}; 537 533 int optvalue = 0; 538 534 539 - printf("%s: ", __func__); 540 535 err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, 541 536 &optvalue, sizeof(optvalue)); 542 - CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)", 543 - "err:%d errno:%d\n", err, errno); 537 + RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)", 538 + "err:%d errno:%d\n", err, errno); 544 539 545 540 err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, 546 541 &optvalue, sizeof(optvalue)); 547 - CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)", 548 - "err:%d errno:%d\n", err, errno); 542 + RET_IF(err == 0 || errno != ENOENT, 543 + "setsockopt(SO_DETACH_REUSEPORT_BPF)", 544 + "err:%d errno:%d\n", err, errno); 549 545 550 546 for (i = 0; i < NR_RESULTS; i++) { 551 547 err = bpf_map_lookup_elem(result_map, &i, &tmp); 552 - CHECK(err == -1, "lookup_elem(result_map)", 553 - "i:%u err:%d errno:%d\n", i, err, errno); 548 + RET_IF(err == -1, "lookup_elem(result_map)", 549 + "i:%u err:%d errno:%d\n", i, err, errno); 554 550 nr_run_before += tmp; 555 551 } 556 552 557 553 cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS); 554 + if (cli_fd < 0) 555 + return; 558 556 nev = epoll_wait(epfd, &ev, 1, 5); 559 - CHECK(nev <= 0, "nev <= 0", 560 - "nev:%d expected:1 type:%d family:%d data:(0, 0)\n", 561 - nev, type, family); 557 + RET_IF(nev <= 0, "nev <= 0", 558 + "nev:%d expected:1 type:%d family:%d data:(0, 0)\n", 559 + nev, type, family); 562 560 563 561 for (i = 0; i < NR_RESULTS; i++) { 564 562 err = bpf_map_lookup_elem(result_map, &i, &tmp); 565 - CHECK(err == -1, "lookup_elem(result_map)", 566 - "i:%u err:%d errno:%d\n", i, err, errno); 563 + RET_IF(err == -1, "lookup_elem(result_map)", 564 + "i:%u err:%d errno:%d\n", i, err, errno); 567 565 nr_run_after += tmp; 568 566 } 569 567 570 - CHECK(nr_run_before != nr_run_after, 571 - "nr_run_before != nr_run_after", 572 - "nr_run_before:%u nr_run_after:%u\n", 573 - nr_run_before, nr_run_after); 568 + RET_IF(nr_run_before != nr_run_after, 569 + "nr_run_before != nr_run_after", 570 + "nr_run_before:%u nr_run_after:%u\n", 571 + nr_run_before, nr_run_after); 574 572 575 - printf("OK\n"); 576 573 close(cli_fd); 577 574 #else 578 - printf("%s: SKIP\n", __func__); 575 + test__skip(); 579 576 #endif 580 577 } 581 578 ··· 599 594 */ 600 595 for (i = first; i >= 0; i--) { 601 596 sk_fds[i] = socket(family, type, 0); 602 - CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n", 603 - i, sk_fds[i], errno); 597 + RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n", 598 + i, sk_fds[i], errno); 604 599 err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT, 605 600 &optval, sizeof(optval)); 606 - CHECK(err == -1, "setsockopt(SO_REUSEPORT)", 607 - "sk_fds[%d] err:%d errno:%d\n", 608 - i, err, errno); 601 + RET_IF(err == -1, "setsockopt(SO_REUSEPORT)", 602 + "sk_fds[%d] err:%d errno:%d\n", 603 + i, err, errno); 609 604 610 605 if (i == first) { 611 606 err = setsockopt(sk_fds[i], SOL_SOCKET, 612 607 SO_ATTACH_REUSEPORT_EBPF, 613 608 &select_by_skb_data_prog, 614 609 sizeof(select_by_skb_data_prog)); 615 - CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", 616 - "err:%d errno:%d\n", err, errno); 610 + RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", 611 + "err:%d errno:%d\n", err, errno); 617 612 } 618 613 619 614 err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen); 620 - CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", 621 - i, err, errno); 615 + RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", 616 + i, err, errno); 622 617 623 618 if (type == SOCK_STREAM) { 624 619 err = listen(sk_fds[i], 10); 625 - CHECK(err == -1, "listen()", 626 - "sk_fds[%d] err:%d errno:%d\n", 627 - i, err, errno); 620 + RET_IF(err == -1, "listen()", 621 + "sk_fds[%d] err:%d errno:%d\n", 622 + i, err, errno); 628 623 } 629 624 630 625 err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i], 631 626 BPF_NOEXIST); 632 - CHECK(err == -1, "update_elem(reuseport_array)", 633 - "sk_fds[%d] err:%d errno:%d\n", i, err, errno); 627 + RET_IF(err == -1, "update_elem(reuseport_array)", 628 + "sk_fds[%d] err:%d errno:%d\n", i, err, errno); 634 629 635 630 if (i == first) { 636 631 socklen_t addrlen = sizeof(srv_sa); 637 632 638 633 err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa, 639 634 &addrlen); 640 - CHECK(err == -1, "getsockname()", 641 - "sk_fds[%d] err:%d errno:%d\n", i, err, errno); 635 + RET_IF(err == -1, "getsockname()", 636 + "sk_fds[%d] err:%d errno:%d\n", i, err, errno); 642 637 } 643 638 } 644 639 645 640 epfd = epoll_create(1); 646 - CHECK(epfd == -1, "epoll_create(1)", 647 - "epfd:%d errno:%d\n", epfd, errno); 641 + RET_IF(epfd == -1, "epoll_create(1)", 642 + "epfd:%d errno:%d\n", epfd, errno); 648 643 649 644 ev.events = EPOLLIN; 650 645 for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) { 651 646 ev.data.u32 = i; 652 647 err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev); 653 - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i); 648 + RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i); 654 649 } 655 650 } 656 651 657 - static void setup_per_test(int type, unsigned short family, bool inany) 652 + static void setup_per_test(int type, sa_family_t family, bool inany, 653 + bool no_inner_map) 658 654 { 659 655 int ovr = -1, err; 660 656 661 657 prepare_sk_fds(type, family, inany); 662 658 err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr, 663 659 BPF_ANY); 664 - CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", 665 - "err:%d errno:%d\n", err, errno); 660 + RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", 661 + "err:%d errno:%d\n", err, errno); 662 + 663 + /* Install reuseport_array to outer_map? */ 664 + if (no_inner_map) 665 + return; 666 + 667 + err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array, 668 + BPF_ANY); 669 + RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)", 670 + "err:%d errno:%d\n", err, errno); 666 671 } 667 672 668 - static void cleanup_per_test(void) 673 + static void cleanup_per_test(bool no_inner_map) 669 674 { 670 675 int i, err; 671 676 ··· 683 668 close(sk_fds[i]); 684 669 close(epfd); 685 670 671 + /* Delete reuseport_array from outer_map? */ 672 + if (no_inner_map) 673 + return; 674 + 686 675 err = bpf_map_delete_elem(outer_map, &index_zero); 687 - CHECK(err == -1, "delete_elem(outer_map)", 688 - "err:%d errno:%d\n", err, errno); 676 + RET_IF(err == -1, "delete_elem(outer_map)", 677 + "err:%d errno:%d\n", err, errno); 689 678 } 690 679 691 680 static void cleanup(void) 692 681 { 693 - close(outer_map); 694 - close(reuseport_array); 695 - bpf_object__close(obj); 682 + if (outer_map != -1) 683 + close(outer_map); 684 + if (reuseport_array != -1) 685 + close(reuseport_array); 686 + if (obj) 687 + bpf_object__close(obj); 696 688 } 697 689 698 - static void test_all(void) 690 + static const char *family_str(sa_family_t family) 699 691 { 700 - /* Extra SOCK_STREAM to test bind_inany==true */ 701 - const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM }; 702 - const char * const type_strings[] = { "TCP", "UDP", "TCP" }; 703 - const char * const family_strings[] = { "IPv6", "IPv4" }; 704 - const unsigned short families[] = { AF_INET6, AF_INET }; 705 - const bool bind_inany[] = { false, false, true }; 706 - int t, f, err; 707 - 708 - for (f = 0; f < ARRAY_SIZE(families); f++) { 709 - unsigned short family = families[f]; 710 - 711 - for (t = 0; t < ARRAY_SIZE(types); t++) { 712 - bool inany = bind_inany[t]; 713 - int type = types[t]; 714 - 715 - printf("######## %s/%s %s ########\n", 716 - family_strings[f], type_strings[t], 717 - inany ? " INANY " : "LOOPBACK"); 718 - 719 - setup_per_test(type, family, inany); 720 - 721 - test_err_inner_map(type, family); 722 - 723 - /* Install reuseport_array to the outer_map */ 724 - err = bpf_map_update_elem(outer_map, &index_zero, 725 - &reuseport_array, BPF_ANY); 726 - CHECK(err == -1, "update_elem(outer_map)", 727 - "err:%d errno:%d\n", err, errno); 728 - 729 - test_err_skb_data(type, family); 730 - test_err_sk_select_port(type, family); 731 - test_pass(type, family); 732 - test_syncookie(type, family); 733 - test_pass_on_err(type, family); 734 - /* Must be the last test */ 735 - test_detach_bpf(type, family); 736 - 737 - cleanup_per_test(); 738 - printf("\n"); 739 - } 692 + switch (family) { 693 + case AF_INET: 694 + return "IPv4"; 695 + case AF_INET6: 696 + return "IPv6"; 697 + default: 698 + return "unknown"; 740 699 } 741 700 } 742 701 743 - int main(int argc, const char **argv) 702 + static const char *sotype_str(int sotype) 744 703 { 745 - create_maps(); 746 - prepare_bpf_obj(); 704 + switch (sotype) { 705 + case SOCK_STREAM: 706 + return "TCP"; 707 + case SOCK_DGRAM: 708 + return "UDP"; 709 + default: 710 + return "unknown"; 711 + } 712 + } 713 + 714 + #define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ } 715 + 716 + static void test_config(int sotype, sa_family_t family, bool inany) 717 + { 718 + const struct test { 719 + void (*fn)(int sotype, sa_family_t family); 720 + const char *name; 721 + bool no_inner_map; 722 + } tests[] = { 723 + TEST_INIT(test_err_inner_map, true /* no_inner_map */), 724 + TEST_INIT(test_err_skb_data), 725 + TEST_INIT(test_err_sk_select_port), 726 + TEST_INIT(test_pass), 727 + TEST_INIT(test_syncookie), 728 + TEST_INIT(test_pass_on_err), 729 + TEST_INIT(test_detach_bpf), 730 + }; 731 + char s[MAX_TEST_NAME]; 732 + const struct test *t; 733 + 734 + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 735 + snprintf(s, sizeof(s), "%s/%s %s %s", 736 + family_str(family), sotype_str(sotype), 737 + inany ? "INANY" : "LOOPBACK", t->name); 738 + 739 + if (!test__start_subtest(s)) 740 + continue; 741 + 742 + setup_per_test(sotype, family, inany, t->no_inner_map); 743 + t->fn(sotype, family); 744 + cleanup_per_test(t->no_inner_map); 745 + } 746 + } 747 + 748 + #define BIND_INANY true 749 + 750 + static void test_all(void) 751 + { 752 + const struct config { 753 + int sotype; 754 + sa_family_t family; 755 + bool inany; 756 + } configs[] = { 757 + { SOCK_STREAM, AF_INET }, 758 + { SOCK_STREAM, AF_INET, BIND_INANY }, 759 + { SOCK_STREAM, AF_INET6 }, 760 + { SOCK_STREAM, AF_INET6, BIND_INANY }, 761 + { SOCK_DGRAM, AF_INET }, 762 + { SOCK_DGRAM, AF_INET6 }, 763 + }; 764 + const struct config *c; 765 + 766 + for (c = configs; c < configs + ARRAY_SIZE(configs); c++) 767 + test_config(c->sotype, c->family, c->inany); 768 + } 769 + 770 + void test_select_reuseport(void) 771 + { 772 + if (create_maps()) 773 + goto out; 774 + if (prepare_bpf_obj()) 775 + goto out; 776 + 747 777 saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); 748 778 saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL); 749 - enable_fastopen(); 750 - disable_syncookie(); 751 - atexit(restore_sysctls); 779 + if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0) 780 + goto out; 781 + 782 + if (enable_fastopen()) 783 + goto out; 784 + if (disable_syncookie()) 785 + goto out; 752 786 753 787 test_all(); 754 - 788 + out: 755 789 cleanup(); 756 - return 0; 790 + restore_sysctls(); 757 791 }