Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2023-08-25

We've added 87 non-merge commits during the last 8 day(s) which contain
a total of 104 files changed, 3719 insertions(+), 4212 deletions(-).

The main changes are:

1) Add multi uprobe BPF links for attaching multiple uprobes
and usdt probes, which is significantly faster and saves extra fds,
from Jiri Olsa.

2) Add support BPF cpu v4 instructions for arm64 JIT compiler,
from Xu Kuohai.

3) Add support BPF cpu v4 instructions for riscv64 JIT compiler,
from Pu Lehui.

4) Fix LWT BPF xmit hooks wrt their return values where propagating
the result from skb_do_redirect() would trigger a use-after-free,
from Yan Zhai.

5) Fix a BPF verifier issue related to bpf_kptr_xchg() with local kptr
where the map's value kptr type and locally allocated obj type
mismatch, from Yonghong Song.

6) Fix BPF verifier's check_func_arg_reg_off() function wrt graph
root/node which bypassed reg->off == 0 enforcement,
from Kumar Kartikeya Dwivedi.

7) Lift BPF verifier restriction in networking BPF programs to treat
comparison of packet pointers not as a pointer leak,
from Yafang Shao.

8) Remove unmaintained XDP BPF samples as they are maintained
in xdp-tools repository out of tree, from Toke Høiland-Jørgensen.

9) Batch of fixes for the tracing programs from BPF samples in order
to make them more libbpf-aware, from Daniel T. Lee.

10) Fix a libbpf signedness determination bug in the CO-RE relocation
handling logic, from Andrii Nakryiko.

11) Extend libbpf to support CO-RE kfunc relocations. Also follow-up
fixes for bpf_refcount shared ownership implementation,
both from Dave Marchevsky.

12) Add a new bpf_object__unpin() API function to libbpf,
from Daniel Xu.

13) Fix a memory leak in libbpf to also free btf_vmlinux
when the bpf_object gets closed, from Hao Luo.

14) Small error output improvements to test_bpf module, from Helge Deller.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (87 commits)
selftests/bpf: Add tests for rbtree API interaction in sleepable progs
bpf: Allow bpf_spin_{lock,unlock} in sleepable progs
bpf: Consider non-owning refs to refcounted nodes RCU protected
bpf: Reenable bpf_refcount_acquire
bpf: Use bpf_mem_free_rcu when bpf_obj_dropping refcounted nodes
bpf: Consider non-owning refs trusted
bpf: Ensure kptr_struct_meta is non-NULL for collection insert and refcount_acquire
selftests/bpf: Enable cpu v4 tests for RV64
riscv, bpf: Support unconditional bswap insn
riscv, bpf: Support signed div/mod insns
riscv, bpf: Support 32-bit offset jmp insn
riscv, bpf: Support sign-extension mov insns
riscv, bpf: Support sign-extension load insns
riscv, bpf: Fix missing exception handling and redundant zext for LDX_B/H/W
samples/bpf: Add note to README about the XDP utilities moved to xdp-tools
samples/bpf: Cleanup .gitignore
samples/bpf: Remove the xdp_sample_pkts utility
samples/bpf: Remove the xdp1 and xdp2 utilities
samples/bpf: Remove the xdp_rxq_info utility
samples/bpf: Remove the xdp_redirect* utilities
...
====================

Link: https://lore.kernel.org/r/20230825194319.12727-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+3720 -4213
+4
arch/arm64/include/asm/insn.h
··· 186 186 AARCH64_INSN_LDST_LOAD_ACQ_EX, 187 187 AARCH64_INSN_LDST_STORE_EX, 188 188 AARCH64_INSN_LDST_STORE_REL_EX, 189 + AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, 190 + AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET, 189 191 }; 190 192 191 193 enum aarch64_insn_adsb_type { ··· 326 324 __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) 327 325 __AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000) 328 326 __AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000) 327 + __AARCH64_INSN_FUNCS(signed_load_imm, 0X3FC00000, 0x39800000) 329 328 __AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00) 330 329 __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00) 331 330 __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) ··· 340 337 __AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) 341 338 __AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) 342 339 __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) 340 + __AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800) 343 341 __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) 344 342 __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) 345 343 __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
+6
arch/arm64/lib/insn.c
··· 385 385 case AARCH64_INSN_LDST_LOAD_REG_OFFSET: 386 386 insn = aarch64_insn_get_ldr_reg_value(); 387 387 break; 388 + case AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET: 389 + insn = aarch64_insn_get_signed_ldr_reg_value(); 390 + break; 388 391 case AARCH64_INSN_LDST_STORE_REG_OFFSET: 389 392 insn = aarch64_insn_get_str_reg_value(); 390 393 break; ··· 432 429 switch (type) { 433 430 case AARCH64_INSN_LDST_LOAD_IMM_OFFSET: 434 431 insn = aarch64_insn_get_ldr_imm_value(); 432 + break; 433 + case AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET: 434 + insn = aarch64_insn_get_signed_load_imm_value(); 435 435 break; 436 436 case AARCH64_INSN_LDST_STORE_IMM_OFFSET: 437 437 insn = aarch64_insn_get_str_imm_value();
+12
arch/arm64/net/bpf_jit.h
··· 59 59 AARCH64_INSN_LDST_##type##_REG_OFFSET) 60 60 #define A64_STRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, STORE) 61 61 #define A64_LDRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, LOAD) 62 + #define A64_LDRSB(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 8, SIGNED_LOAD) 62 63 #define A64_STRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, STORE) 63 64 #define A64_LDRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, LOAD) 65 + #define A64_LDRSH(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 16, SIGNED_LOAD) 64 66 #define A64_STR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, STORE) 65 67 #define A64_LDR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, LOAD) 68 + #define A64_LDRSW(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 32, SIGNED_LOAD) 66 69 #define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE) 67 70 #define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD) 68 71 ··· 76 73 AARCH64_INSN_LDST_##type##_IMM_OFFSET) 77 74 #define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE) 78 75 #define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD) 76 + #define A64_LDRSBI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 8, SIGNED_LOAD) 79 77 #define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE) 80 78 #define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD) 79 + #define A64_LDRSHI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 16, SIGNED_LOAD) 81 80 #define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE) 82 81 #define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD) 82 + #define A64_LDRSWI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 32, SIGNED_LOAD) 83 83 #define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) 84 84 #define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) 85 85 ··· 192 186 #define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) 193 187 #define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) 194 188 189 + /* Sign extend */ 190 + #define A64_SXTB(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 7) 191 + #define A64_SXTH(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 15) 192 + #define A64_SXTW(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 31) 193 + 195 194 /* Move wide (immediate) */ 196 195 #define A64_MOVEW(sf, Rd, imm16, shift, type) \ 197 196 aarch64_insn_gen_movewide(Rd, imm16, shift, \ ··· 234 223 #define A64_DATA2(sf, Rd, Rn, Rm, type) aarch64_insn_gen_data2(Rd, Rn, Rm, \ 235 224 A64_VARIANT(sf), AARCH64_INSN_DATA2_##type) 236 225 #define A64_UDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, UDIV) 226 + #define A64_SDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, SDIV) 237 227 #define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV) 238 228 #define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV) 239 229 #define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)
+75 -16
arch/arm64/net/bpf_jit_comp.c
··· 715 715 /* First pass */ 716 716 return 0; 717 717 718 - if (BPF_MODE(insn->code) != BPF_PROBE_MEM) 718 + if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 719 + BPF_MODE(insn->code) != BPF_PROBE_MEMSX) 719 720 return 0; 720 721 721 722 if (!ctx->prog->aux->extable || ··· 780 779 u8 dst_adj; 781 780 int off_adj; 782 781 int ret; 782 + bool sign_extend; 783 783 784 784 switch (code) { 785 785 /* dst = src */ 786 786 case BPF_ALU | BPF_MOV | BPF_X: 787 787 case BPF_ALU64 | BPF_MOV | BPF_X: 788 - emit(A64_MOV(is64, dst, src), ctx); 788 + switch (insn->off) { 789 + case 0: 790 + emit(A64_MOV(is64, dst, src), ctx); 791 + break; 792 + case 8: 793 + emit(A64_SXTB(is64, dst, src), ctx); 794 + break; 795 + case 16: 796 + emit(A64_SXTH(is64, dst, src), ctx); 797 + break; 798 + case 32: 799 + emit(A64_SXTW(is64, dst, src), ctx); 800 + break; 801 + } 789 802 break; 790 803 /* dst = dst OP src */ 791 804 case BPF_ALU | BPF_ADD | BPF_X: ··· 828 813 break; 829 814 case BPF_ALU | BPF_DIV | BPF_X: 830 815 case BPF_ALU64 | BPF_DIV | BPF_X: 831 - emit(A64_UDIV(is64, dst, dst, src), ctx); 816 + if (!off) 817 + emit(A64_UDIV(is64, dst, dst, src), ctx); 818 + else 819 + emit(A64_SDIV(is64, dst, dst, src), ctx); 832 820 break; 833 821 case BPF_ALU | BPF_MOD | BPF_X: 834 822 case BPF_ALU64 | BPF_MOD | BPF_X: 835 - emit(A64_UDIV(is64, tmp, dst, src), ctx); 823 + if (!off) 824 + emit(A64_UDIV(is64, tmp, dst, src), ctx); 825 + else 826 + emit(A64_SDIV(is64, tmp, dst, src), ctx); 836 827 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); 837 828 break; 838 829 case BPF_ALU | BPF_LSH | BPF_X: ··· 861 840 /* dst = BSWAP##imm(dst) */ 862 841 case BPF_ALU | BPF_END | BPF_FROM_LE: 863 842 case BPF_ALU | BPF_END | BPF_FROM_BE: 843 + case BPF_ALU64 | BPF_END | BPF_FROM_LE: 864 844 #ifdef CONFIG_CPU_BIG_ENDIAN 865 - if (BPF_SRC(code) == BPF_FROM_BE) 845 + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) 866 846 goto emit_bswap_uxt; 867 847 #else /* !CONFIG_CPU_BIG_ENDIAN */ 868 - if (BPF_SRC(code) == BPF_FROM_LE) 848 + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) 869 849 goto emit_bswap_uxt; 870 850 #endif 871 851 switch (imm) { ··· 965 943 case BPF_ALU | BPF_DIV | BPF_K: 966 944 case BPF_ALU64 | BPF_DIV | BPF_K: 967 945 emit_a64_mov_i(is64, tmp, imm, ctx); 968 - emit(A64_UDIV(is64, dst, dst, tmp), ctx); 946 + if (!off) 947 + emit(A64_UDIV(is64, dst, dst, tmp), ctx); 948 + else 949 + emit(A64_SDIV(is64, dst, dst, tmp), ctx); 969 950 break; 970 951 case BPF_ALU | BPF_MOD | BPF_K: 971 952 case BPF_ALU64 | BPF_MOD | BPF_K: 972 953 emit_a64_mov_i(is64, tmp2, imm, ctx); 973 - emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 954 + if (!off) 955 + emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 956 + else 957 + emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); 974 958 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); 975 959 break; 976 960 case BPF_ALU | BPF_LSH | BPF_K: ··· 994 966 995 967 /* JUMP off */ 996 968 case BPF_JMP | BPF_JA: 997 - jmp_offset = bpf2a64_offset(i, off, ctx); 969 + case BPF_JMP32 | BPF_JA: 970 + if (BPF_CLASS(code) == BPF_JMP) 971 + jmp_offset = bpf2a64_offset(i, off, ctx); 972 + else 973 + jmp_offset = bpf2a64_offset(i, imm, ctx); 998 974 check_imm26(jmp_offset); 999 975 emit(A64_B(jmp_offset), ctx); 1000 976 break; ··· 1154 1122 return 1; 1155 1123 } 1156 1124 1157 - /* LDX: dst = *(size *)(src + off) */ 1125 + /* LDX: dst = (u64)*(unsigned size *)(src + off) */ 1158 1126 case BPF_LDX | BPF_MEM | BPF_W: 1159 1127 case BPF_LDX | BPF_MEM | BPF_H: 1160 1128 case BPF_LDX | BPF_MEM | BPF_B: ··· 1163 1131 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1164 1132 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1165 1133 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1134 + /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ 1135 + case BPF_LDX | BPF_MEMSX | BPF_B: 1136 + case BPF_LDX | BPF_MEMSX | BPF_H: 1137 + case BPF_LDX | BPF_MEMSX | BPF_W: 1138 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1139 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1140 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1166 1141 if (ctx->fpb_offset > 0 && src == fp) { 1167 1142 src_adj = fpb; 1168 1143 off_adj = off + ctx->fpb_offset; ··· 1177 1138 src_adj = src; 1178 1139 off_adj = off; 1179 1140 } 1141 + sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || 1142 + BPF_MODE(insn->code) == BPF_PROBE_MEMSX); 1180 1143 switch (BPF_SIZE(code)) { 1181 1144 case BPF_W: 1182 1145 if (is_lsi_offset(off_adj, 2)) { 1183 - emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1146 + if (sign_extend) 1147 + emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); 1148 + else 1149 + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1184 1150 } else { 1185 1151 emit_a64_mov_i(1, tmp, off, ctx); 1186 - emit(A64_LDR32(dst, src, tmp), ctx); 1152 + if (sign_extend) 1153 + emit(A64_LDRSW(dst, src_adj, off_adj), ctx); 1154 + else 1155 + emit(A64_LDR32(dst, src, tmp), ctx); 1187 1156 } 1188 1157 break; 1189 1158 case BPF_H: 1190 1159 if (is_lsi_offset(off_adj, 1)) { 1191 - emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1160 + if (sign_extend) 1161 + emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); 1162 + else 1163 + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1192 1164 } else { 1193 1165 emit_a64_mov_i(1, tmp, off, ctx); 1194 - emit(A64_LDRH(dst, src, tmp), ctx); 1166 + if (sign_extend) 1167 + emit(A64_LDRSH(dst, src, tmp), ctx); 1168 + else 1169 + emit(A64_LDRH(dst, src, tmp), ctx); 1195 1170 } 1196 1171 break; 1197 1172 case BPF_B: 1198 1173 if (is_lsi_offset(off_adj, 0)) { 1199 - emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1174 + if (sign_extend) 1175 + emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); 1176 + else 1177 + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1200 1178 } else { 1201 1179 emit_a64_mov_i(1, tmp, off, ctx); 1202 - emit(A64_LDRB(dst, src, tmp), ctx); 1180 + if (sign_extend) 1181 + emit(A64_LDRSB(dst, src, tmp), ctx); 1182 + else 1183 + emit(A64_LDRB(dst, src, tmp), ctx); 1203 1184 } 1204 1185 break; 1205 1186 case BPF_DW:
+30
arch/riscv/net/bpf_jit.h
··· 431 431 return rv_r_insn(1, rs2, rs1, 3, rd, 0x33); 432 432 } 433 433 434 + static inline u32 rv_div(u8 rd, u8 rs1, u8 rs2) 435 + { 436 + return rv_r_insn(1, rs2, rs1, 4, rd, 0x33); 437 + } 438 + 434 439 static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2) 435 440 { 436 441 return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); 442 + } 443 + 444 + static inline u32 rv_rem(u8 rd, u8 rs1, u8 rs2) 445 + { 446 + return rv_r_insn(1, rs2, rs1, 6, rd, 0x33); 437 447 } 438 448 439 449 static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2) ··· 509 499 static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1) 510 500 { 511 501 return rv_bge(rs2, rs1, imm12_1); 502 + } 503 + 504 + static inline u32 rv_lb(u8 rd, u16 imm11_0, u8 rs1) 505 + { 506 + return rv_i_insn(imm11_0, rs1, 0, rd, 0x03); 507 + } 508 + 509 + static inline u32 rv_lh(u8 rd, u16 imm11_0, u8 rs1) 510 + { 511 + return rv_i_insn(imm11_0, rs1, 1, rd, 0x03); 512 512 } 513 513 514 514 static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1) ··· 786 766 return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); 787 767 } 788 768 769 + static inline u32 rv_divw(u8 rd, u8 rs1, u8 rs2) 770 + { 771 + return rv_r_insn(1, rs2, rs1, 4, rd, 0x3b); 772 + } 773 + 789 774 static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) 790 775 { 791 776 return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); 777 + } 778 + 779 + static inline u32 rv_remw(u8 rd, u8 rs1, u8 rs2) 780 + { 781 + return rv_r_insn(1, rs2, rs1, 6, rd, 0x3b); 792 782 } 793 783 794 784 static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2)
+80 -22
arch/riscv/net/bpf_jit_comp64.c
··· 580 580 unsigned long pc; 581 581 off_t offset; 582 582 583 - if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) 583 + if (!ctx->insns || !ctx->prog->aux->extable || 584 + (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) 584 585 return 0; 585 586 586 587 if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) ··· 1047 1046 emit_zext_32(rd, ctx); 1048 1047 break; 1049 1048 } 1050 - emit_mv(rd, rs, ctx); 1049 + switch (insn->off) { 1050 + case 0: 1051 + emit_mv(rd, rs, ctx); 1052 + break; 1053 + case 8: 1054 + case 16: 1055 + emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx); 1056 + emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx); 1057 + break; 1058 + case 32: 1059 + emit_addiw(rd, rs, 0, ctx); 1060 + break; 1061 + } 1051 1062 if (!is64 && !aux->verifier_zext) 1052 1063 emit_zext_32(rd, ctx); 1053 1064 break; ··· 1107 1094 break; 1108 1095 case BPF_ALU | BPF_DIV | BPF_X: 1109 1096 case BPF_ALU64 | BPF_DIV | BPF_X: 1110 - emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 1097 + if (off) 1098 + emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx); 1099 + else 1100 + emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 1111 1101 if (!is64 && !aux->verifier_zext) 1112 1102 emit_zext_32(rd, ctx); 1113 1103 break; 1114 1104 case BPF_ALU | BPF_MOD | BPF_X: 1115 1105 case BPF_ALU64 | BPF_MOD | BPF_X: 1116 - emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 1106 + if (off) 1107 + emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx); 1108 + else 1109 + emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 1117 1110 if (!is64 && !aux->verifier_zext) 1118 1111 emit_zext_32(rd, ctx); 1119 1112 break; ··· 1168 1149 break; 1169 1150 1170 1151 case BPF_ALU | BPF_END | BPF_FROM_BE: 1152 + case BPF_ALU64 | BPF_END | BPF_FROM_LE: 1171 1153 emit_li(RV_REG_T2, 0, ctx); 1172 1154 1173 1155 emit_andi(RV_REG_T1, rd, 0xff, ctx); ··· 1291 1271 case BPF_ALU | BPF_DIV | BPF_K: 1292 1272 case BPF_ALU64 | BPF_DIV | BPF_K: 1293 1273 emit_imm(RV_REG_T1, imm, ctx); 1294 - emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 1295 - rv_divuw(rd, rd, RV_REG_T1), ctx); 1274 + if (off) 1275 + emit(is64 ? rv_div(rd, rd, RV_REG_T1) : 1276 + rv_divw(rd, rd, RV_REG_T1), ctx); 1277 + else 1278 + emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 1279 + rv_divuw(rd, rd, RV_REG_T1), ctx); 1296 1280 if (!is64 && !aux->verifier_zext) 1297 1281 emit_zext_32(rd, ctx); 1298 1282 break; 1299 1283 case BPF_ALU | BPF_MOD | BPF_K: 1300 1284 case BPF_ALU64 | BPF_MOD | BPF_K: 1301 1285 emit_imm(RV_REG_T1, imm, ctx); 1302 - emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 1303 - rv_remuw(rd, rd, RV_REG_T1), ctx); 1286 + if (off) 1287 + emit(is64 ? rv_rem(rd, rd, RV_REG_T1) : 1288 + rv_remw(rd, rd, RV_REG_T1), ctx); 1289 + else 1290 + emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 1291 + rv_remuw(rd, rd, RV_REG_T1), ctx); 1304 1292 if (!is64 && !aux->verifier_zext) 1305 1293 emit_zext_32(rd, ctx); 1306 1294 break; ··· 1342 1314 1343 1315 /* JUMP off */ 1344 1316 case BPF_JMP | BPF_JA: 1345 - rvoff = rv_offset(i, off, ctx); 1317 + case BPF_JMP32 | BPF_JA: 1318 + if (BPF_CLASS(code) == BPF_JMP) 1319 + rvoff = rv_offset(i, off, ctx); 1320 + else 1321 + rvoff = rv_offset(i, imm, ctx); 1346 1322 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 1347 1323 if (ret) 1348 1324 return ret; ··· 1518 1486 return 1; 1519 1487 } 1520 1488 1521 - /* LDX: dst = *(size *)(src + off) */ 1489 + /* LDX: dst = *(unsigned size *)(src + off) */ 1522 1490 case BPF_LDX | BPF_MEM | BPF_B: 1523 1491 case BPF_LDX | BPF_MEM | BPF_H: 1524 1492 case BPF_LDX | BPF_MEM | BPF_W: ··· 1527 1495 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1528 1496 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1529 1497 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1498 + /* LDSX: dst = *(signed size *)(src + off) */ 1499 + case BPF_LDX | BPF_MEMSX | BPF_B: 1500 + case BPF_LDX | BPF_MEMSX | BPF_H: 1501 + case BPF_LDX | BPF_MEMSX | BPF_W: 1502 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1503 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1504 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1530 1505 { 1531 1506 int insn_len, insns_start; 1507 + bool sign_ext; 1508 + 1509 + sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || 1510 + BPF_MODE(insn->code) == BPF_PROBE_MEMSX; 1532 1511 1533 1512 switch (BPF_SIZE(code)) { 1534 1513 case BPF_B: 1535 1514 if (is_12b_int(off)) { 1536 1515 insns_start = ctx->ninsns; 1537 - emit(rv_lbu(rd, off, rs), ctx); 1516 + if (sign_ext) 1517 + emit(rv_lb(rd, off, rs), ctx); 1518 + else 1519 + emit(rv_lbu(rd, off, rs), ctx); 1538 1520 insn_len = ctx->ninsns - insns_start; 1539 1521 break; 1540 1522 } ··· 1556 1510 emit_imm(RV_REG_T1, off, ctx); 1557 1511 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 1558 1512 insns_start = ctx->ninsns; 1559 - emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 1513 + if (sign_ext) 1514 + emit(rv_lb(rd, 0, RV_REG_T1), ctx); 1515 + else 1516 + emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 1560 1517 insn_len = ctx->ninsns - insns_start; 1561 - if (insn_is_zext(&insn[1])) 1562 - return 1; 1563 1518 break; 1564 1519 case BPF_H: 1565 1520 if (is_12b_int(off)) { 1566 1521 insns_start = ctx->ninsns; 1567 - emit(rv_lhu(rd, off, rs), ctx); 1522 + if (sign_ext) 1523 + emit(rv_lh(rd, off, rs), ctx); 1524 + else 1525 + emit(rv_lhu(rd, off, rs), ctx); 1568 1526 insn_len = ctx->ninsns - insns_start; 1569 1527 break; 1570 1528 } ··· 1576 1526 emit_imm(RV_REG_T1, off, ctx); 1577 1527 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 1578 1528 insns_start = ctx->ninsns; 1579 - emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 1529 + if (sign_ext) 1530 + emit(rv_lh(rd, 0, RV_REG_T1), ctx); 1531 + else 1532 + emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 1580 1533 insn_len = ctx->ninsns - insns_start; 1581 - if (insn_is_zext(&insn[1])) 1582 - return 1; 1583 1534 break; 1584 1535 case BPF_W: 1585 1536 if (is_12b_int(off)) { 1586 1537 insns_start = ctx->ninsns; 1587 - emit(rv_lwu(rd, off, rs), ctx); 1538 + if (sign_ext) 1539 + emit(rv_lw(rd, off, rs), ctx); 1540 + else 1541 + emit(rv_lwu(rd, off, rs), ctx); 1588 1542 insn_len = ctx->ninsns - insns_start; 1589 1543 break; 1590 1544 } ··· 1596 1542 emit_imm(RV_REG_T1, off, ctx); 1597 1543 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 1598 1544 insns_start = ctx->ninsns; 1599 - emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 1545 + if (sign_ext) 1546 + emit(rv_lw(rd, 0, RV_REG_T1), ctx); 1547 + else 1548 + emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 1600 1549 insn_len = ctx->ninsns - insns_start; 1601 - if (insn_is_zext(&insn[1])) 1602 - return 1; 1603 1550 break; 1604 1551 case BPF_DW: 1605 1552 if (is_12b_int(off)) { ··· 1621 1566 ret = add_exception_handler(insn, ctx, rd, insn_len); 1622 1567 if (ret) 1623 1568 return ret; 1569 + 1570 + if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) 1571 + return 1; 1624 1572 break; 1625 1573 } 1626 1574 /* speculation barrier */
+2 -1
include/linux/bpf.h
··· 653 653 MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), 654 654 655 655 /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning. 656 - * Currently only valid for linked-list and rbtree nodes. 656 + * Currently only valid for linked-list and rbtree nodes. If the nodes 657 + * have a bpf_refcount_field, they must be tagged MEM_RCU as well. 657 658 */ 658 659 NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), 659 660
+1 -1
include/linux/bpf_verifier.h
··· 745 745 } 746 746 } 747 747 748 - #define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED) 748 + #define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED | NON_OWN_REF) 749 749 750 750 static inline bool bpf_type_has_unsafe_modifiers(u32 type) 751 751 {
+6
include/linux/trace_events.h
··· 752 752 u32 *fd_type, const char **buf, 753 753 u64 *probe_offset, u64 *probe_addr); 754 754 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); 755 + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); 755 756 #else 756 757 static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 757 758 { ··· 796 795 } 797 796 static inline int 798 797 bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 798 + { 799 + return -EOPNOTSUPP; 800 + } 801 + static inline int 802 + bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 799 803 { 800 804 return -EOPNOTSUPP; 801 805 }
+4 -1
include/net/lwtunnel.h
··· 16 16 #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) 17 17 #define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) 18 18 19 + /* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return 20 + * values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety. 21 + */ 19 22 enum { 20 23 LWTUNNEL_XMIT_DONE, 21 - LWTUNNEL_XMIT_CONTINUE, 24 + LWTUNNEL_XMIT_CONTINUE = 0x100, 22 25 }; 23 26 24 27
+21 -1
include/uapi/linux/bpf.h
··· 1039 1039 BPF_NETFILTER, 1040 1040 BPF_TCX_INGRESS, 1041 1041 BPF_TCX_EGRESS, 1042 + BPF_TRACE_UPROBE_MULTI, 1042 1043 __MAX_BPF_ATTACH_TYPE 1043 1044 }; 1044 1045 ··· 1058 1057 BPF_LINK_TYPE_STRUCT_OPS = 9, 1059 1058 BPF_LINK_TYPE_NETFILTER = 10, 1060 1059 BPF_LINK_TYPE_TCX = 11, 1060 + BPF_LINK_TYPE_UPROBE_MULTI = 12, 1061 1061 MAX_BPF_LINK_TYPE, 1062 1062 }; 1063 1063 ··· 1188 1186 /* link_create.kprobe_multi.flags used in LINK_CREATE command for 1189 1187 * BPF_TRACE_KPROBE_MULTI attach type to create return probe. 1190 1188 */ 1191 - #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) 1189 + enum { 1190 + BPF_F_KPROBE_MULTI_RETURN = (1U << 0) 1191 + }; 1192 + 1193 + /* link_create.uprobe_multi.flags used in LINK_CREATE command for 1194 + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. 1195 + */ 1196 + enum { 1197 + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) 1198 + }; 1192 1199 1193 1200 /* link_create.netfilter.flags used in LINK_CREATE command for 1194 1201 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. ··· 1635 1624 }; 1636 1625 __u64 expected_revision; 1637 1626 } tcx; 1627 + struct { 1628 + __aligned_u64 path; 1629 + __aligned_u64 offsets; 1630 + __aligned_u64 ref_ctr_offsets; 1631 + __aligned_u64 cookies; 1632 + __u32 cnt; 1633 + __u32 flags; 1634 + __u32 pid; 1635 + } uprobe_multi; 1638 1636 }; 1639 1637 } link_create; 1640 1638
+35 -78
kernel/bpf/cpumap.c
··· 68 68 struct bpf_cpumap_val value; 69 69 struct bpf_prog *prog; 70 70 71 - atomic_t refcnt; /* Control when this struct can be free'ed */ 72 - struct rcu_head rcu; 73 - 74 - struct work_struct kthread_stop_wq; 75 71 struct completion kthread_running; 72 + struct rcu_work free_work; 76 73 }; 77 74 78 75 struct bpf_cpu_map { ··· 114 117 return &cmap->map; 115 118 } 116 119 117 - static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) 118 - { 119 - atomic_inc(&rcpu->refcnt); 120 - } 121 - 122 120 static void __cpu_map_ring_cleanup(struct ptr_ring *ring) 123 121 { 124 122 /* The tear-down procedure should have made sure that queue is ··· 132 140 } 133 141 xdp_return_frame(ptr); 134 142 } 135 - } 136 - 137 - static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) 138 - { 139 - if (atomic_dec_and_test(&rcpu->refcnt)) { 140 - if (rcpu->prog) 141 - bpf_prog_put(rcpu->prog); 142 - /* The queue should be empty at this point */ 143 - __cpu_map_ring_cleanup(rcpu->queue); 144 - ptr_ring_cleanup(rcpu->queue, NULL); 145 - kfree(rcpu->queue); 146 - kfree(rcpu); 147 - } 148 - } 149 - 150 - /* called from workqueue, to workaround syscall using preempt_disable */ 151 - static void cpu_map_kthread_stop(struct work_struct *work) 152 - { 153 - struct bpf_cpu_map_entry *rcpu; 154 - 155 - rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); 156 - 157 - /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, 158 - * as it waits until all in-flight call_rcu() callbacks complete. 159 - */ 160 - rcu_barrier(); 161 - 162 - /* kthread_stop will wake_up_process and wait for it to complete */ 163 - kthread_stop(rcpu->kthread); 164 143 } 165 144 166 145 static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, ··· 358 395 } 359 396 __set_current_state(TASK_RUNNING); 360 397 361 - put_cpu_map_entry(rcpu); 362 398 return 0; 363 399 } 364 400 ··· 434 472 if (IS_ERR(rcpu->kthread)) 435 473 goto free_prog; 436 474 437 - get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */ 438 - get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */ 439 - 440 475 /* Make sure kthread runs on a single CPU */ 441 476 kthread_bind(rcpu->kthread, cpu); 442 477 wake_up_process(rcpu->kthread); ··· 460 501 return NULL; 461 502 } 462 503 463 - static void __cpu_map_entry_free(struct rcu_head *rcu) 504 + static void __cpu_map_entry_free(struct work_struct *work) 464 505 { 465 506 struct bpf_cpu_map_entry *rcpu; 466 507 467 508 /* This cpu_map_entry have been disconnected from map and one 468 - * RCU grace-period have elapsed. Thus, XDP cannot queue any 509 + * RCU grace-period have elapsed. Thus, XDP cannot queue any 469 510 * new packets and cannot change/set flush_needed that can 470 511 * find this entry. 471 512 */ 472 - rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); 513 + rcpu = container_of(to_rcu_work(work), struct bpf_cpu_map_entry, free_work); 473 514 515 + /* kthread_stop will wake_up_process and wait for it to complete. 516 + * cpu_map_kthread_run() makes sure the pointer ring is empty 517 + * before exiting. 518 + */ 519 + kthread_stop(rcpu->kthread); 520 + 521 + if (rcpu->prog) 522 + bpf_prog_put(rcpu->prog); 523 + /* The queue should be empty at this point */ 524 + __cpu_map_ring_cleanup(rcpu->queue); 525 + ptr_ring_cleanup(rcpu->queue, NULL); 526 + kfree(rcpu->queue); 474 527 free_percpu(rcpu->bulkq); 475 - /* Cannot kthread_stop() here, last put free rcpu resources */ 476 - put_cpu_map_entry(rcpu); 528 + kfree(rcpu); 477 529 } 478 530 479 - /* After xchg pointer to bpf_cpu_map_entry, use the call_rcu() to 480 - * ensure any driver rcu critical sections have completed, but this 481 - * does not guarantee a flush has happened yet. Because driver side 482 - * rcu_read_lock/unlock only protects the running XDP program. The 483 - * atomic xchg and NULL-ptr check in __cpu_map_flush() makes sure a 484 - * pending flush op doesn't fail. 485 - * 486 - * The bpf_cpu_map_entry is still used by the kthread, and there can 487 - * still be pending packets (in queue and percpu bulkq). A refcnt 488 - * makes sure to last user (kthread_stop vs. call_rcu) free memory 489 - * resources. 490 - * 491 - * The rcu callback __cpu_map_entry_free flush remaining packets in 492 - * percpu bulkq to queue. Due to caller map_delete_elem() disable 493 - * preemption, cannot call kthread_stop() to make sure queue is empty. 494 - * Instead a work_queue is started for stopping kthread, 495 - * cpu_map_kthread_stop, which waits for an RCU grace period before 496 - * stopping kthread, emptying the queue. 531 + /* After the xchg of the bpf_cpu_map_entry pointer, we need to make sure the old 532 + * entry is no longer in use before freeing. We use queue_rcu_work() to call 533 + * __cpu_map_entry_free() in a separate workqueue after waiting for an RCU grace 534 + * period. This means that (a) all pending enqueue and flush operations have 535 + * completed (because of the RCU callback), and (b) we are in a workqueue 536 + * context where we can stop the kthread and wait for it to exit before freeing 537 + * everything. 497 538 */ 498 539 static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, 499 540 u32 key_cpu, struct bpf_cpu_map_entry *rcpu) ··· 502 543 503 544 old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu))); 504 545 if (old_rcpu) { 505 - call_rcu(&old_rcpu->rcu, __cpu_map_entry_free); 506 - INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop); 507 - schedule_work(&old_rcpu->kthread_stop_wq); 546 + INIT_RCU_WORK(&old_rcpu->free_work, __cpu_map_entry_free); 547 + queue_rcu_work(system_wq, &old_rcpu->free_work); 508 548 } 509 549 } 510 550 ··· 515 557 if (key_cpu >= map->max_entries) 516 558 return -EINVAL; 517 559 518 - /* notice caller map_delete_elem() use preempt_disable() */ 560 + /* notice caller map_delete_elem() uses rcu_read_lock() */ 519 561 __cpu_map_entry_replace(cmap, key_cpu, NULL); 520 562 return 0; 521 563 } ··· 566 608 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 567 609 * so the bpf programs (can be more than one that used this map) were 568 610 * disconnected from events. Wait for outstanding critical sections in 569 - * these programs to complete. The rcu critical section only guarantees 570 - * no further "XDP/bpf-side" reads against bpf_cpu_map->cpu_map. 571 - * It does __not__ ensure pending flush operations (if any) are 572 - * complete. 611 + * these programs to complete. synchronize_rcu() below not only 612 + * guarantees no further "XDP/bpf-side" reads against 613 + * bpf_cpu_map->cpu_map, but also ensure pending flush operations 614 + * (if any) are completed. 573 615 */ 574 - 575 616 synchronize_rcu(); 576 617 577 - /* For cpu_map the remote CPUs can still be using the entries 578 - * (struct bpf_cpu_map_entry). 618 + /* The only possible user of bpf_cpu_map_entry is 619 + * cpu_map_kthread_run(). 579 620 */ 580 621 for (i = 0; i < cmap->map.max_entries; i++) { 581 622 struct bpf_cpu_map_entry *rcpu; ··· 583 626 if (!rcpu) 584 627 continue; 585 628 586 - /* bq flush and cleanup happens after RCU grace-period */ 587 - __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ 629 + /* Stop kthread and cleanup entry directly */ 630 + __cpu_map_entry_free(&rcpu->free_work.work); 588 631 } 589 632 bpf_map_area_free(cmap->cpu_map); 590 633 bpf_map_area_free(cmap);
+7 -1
kernel/bpf/helpers.c
··· 286 286 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 287 287 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 288 288 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 289 + preempt_disable(); 289 290 arch_spin_lock(l); 290 291 } 291 292 ··· 295 294 arch_spinlock_t *l = (void *)lock; 296 295 297 296 arch_spin_unlock(l); 297 + preempt_enable(); 298 298 } 299 299 300 300 #else ··· 1915 1913 1916 1914 if (rec) 1917 1915 bpf_obj_free_fields(rec, p); 1918 - bpf_mem_free(&bpf_global_ma, p); 1916 + 1917 + if (rec && rec->refcount_off >= 0) 1918 + bpf_mem_free_rcu(&bpf_global_ma, p); 1919 + else 1920 + bpf_mem_free(&bpf_global_ma, p); 1919 1921 } 1920 1922 1921 1923 __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
+63 -72
kernel/bpf/syscall.c
··· 657 657 if (!btf_is_kernel(field->kptr.btf)) { 658 658 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 659 659 field->kptr.btf_id); 660 - WARN_ON_ONCE(!pointee_struct_meta); 661 660 migrate_disable(); 662 661 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 663 662 pointee_struct_meta->record : ··· 2814 2815 2815 2816 /* Clean up bpf_link and corresponding anon_inode file and FD. After 2816 2817 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2817 - * anon_inode's release() call. This helper marksbpf_link as 2818 + * anon_inode's release() call. This helper marks bpf_link as 2818 2819 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 2819 2820 * is not decremented, it's the responsibility of a calling code that failed 2820 2821 * to complete bpf_link initialization. 2822 + * This helper eventually calls link's dealloc callback, but does not call 2823 + * link's release callback. 2821 2824 */ 2822 2825 void bpf_link_cleanup(struct bpf_link_primer *primer) 2823 2826 { ··· 3656 3655 return fd; 3657 3656 } 3658 3657 3659 - static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3660 - enum bpf_attach_type attach_type) 3661 - { 3662 - switch (prog->type) { 3663 - case BPF_PROG_TYPE_CGROUP_SOCK: 3664 - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3665 - case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3666 - case BPF_PROG_TYPE_SK_LOOKUP: 3667 - return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3668 - case BPF_PROG_TYPE_CGROUP_SKB: 3669 - if (!capable(CAP_NET_ADMIN)) 3670 - /* cg-skb progs can be loaded by unpriv user. 3671 - * check permissions at attach time. 3672 - */ 3673 - return -EPERM; 3674 - return prog->enforce_expected_attach_type && 3675 - prog->expected_attach_type != attach_type ? 3676 - -EINVAL : 0; 3677 - case BPF_PROG_TYPE_KPROBE: 3678 - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 3679 - attach_type != BPF_TRACE_KPROBE_MULTI) 3680 - return -EINVAL; 3681 - return 0; 3682 - default: 3683 - return 0; 3684 - } 3685 - } 3686 - 3687 3658 static enum bpf_prog_type 3688 3659 attach_type_to_prog_type(enum bpf_attach_type attach_type) 3689 3660 { ··· 3719 3746 return BPF_PROG_TYPE_SCHED_CLS; 3720 3747 default: 3721 3748 return BPF_PROG_TYPE_UNSPEC; 3749 + } 3750 + } 3751 + 3752 + static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3753 + enum bpf_attach_type attach_type) 3754 + { 3755 + enum bpf_prog_type ptype; 3756 + 3757 + switch (prog->type) { 3758 + case BPF_PROG_TYPE_CGROUP_SOCK: 3759 + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3760 + case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3761 + case BPF_PROG_TYPE_SK_LOOKUP: 3762 + return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3763 + case BPF_PROG_TYPE_CGROUP_SKB: 3764 + if (!capable(CAP_NET_ADMIN)) 3765 + /* cg-skb progs can be loaded by unpriv user. 3766 + * check permissions at attach time. 3767 + */ 3768 + return -EPERM; 3769 + return prog->enforce_expected_attach_type && 3770 + prog->expected_attach_type != attach_type ? 3771 + -EINVAL : 0; 3772 + case BPF_PROG_TYPE_EXT: 3773 + return 0; 3774 + case BPF_PROG_TYPE_NETFILTER: 3775 + if (attach_type != BPF_NETFILTER) 3776 + return -EINVAL; 3777 + return 0; 3778 + case BPF_PROG_TYPE_PERF_EVENT: 3779 + case BPF_PROG_TYPE_TRACEPOINT: 3780 + if (attach_type != BPF_PERF_EVENT) 3781 + return -EINVAL; 3782 + return 0; 3783 + case BPF_PROG_TYPE_KPROBE: 3784 + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 3785 + attach_type != BPF_TRACE_KPROBE_MULTI) 3786 + return -EINVAL; 3787 + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 3788 + attach_type != BPF_TRACE_UPROBE_MULTI) 3789 + return -EINVAL; 3790 + if (attach_type != BPF_PERF_EVENT && 3791 + attach_type != BPF_TRACE_KPROBE_MULTI && 3792 + attach_type != BPF_TRACE_UPROBE_MULTI) 3793 + return -EINVAL; 3794 + return 0; 3795 + case BPF_PROG_TYPE_SCHED_CLS: 3796 + if (attach_type != BPF_TCX_INGRESS && 3797 + attach_type != BPF_TCX_EGRESS) 3798 + return -EINVAL; 3799 + return 0; 3800 + default: 3801 + ptype = attach_type_to_prog_type(attach_type); 3802 + if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 3803 + return -EINVAL; 3804 + return 0; 3722 3805 } 3723 3806 } 3724 3807 ··· 4881 4852 return err; 4882 4853 } 4883 4854 4884 - #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies 4855 + #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 4885 4856 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 4886 4857 { 4887 - enum bpf_prog_type ptype; 4888 4858 struct bpf_prog *prog; 4889 4859 int ret; 4890 4860 ··· 4901 4873 attr->link_create.attach_type); 4902 4874 if (ret) 4903 4875 goto out; 4904 - 4905 - switch (prog->type) { 4906 - case BPF_PROG_TYPE_EXT: 4907 - break; 4908 - case BPF_PROG_TYPE_NETFILTER: 4909 - if (attr->link_create.attach_type != BPF_NETFILTER) { 4910 - ret = -EINVAL; 4911 - goto out; 4912 - } 4913 - break; 4914 - case BPF_PROG_TYPE_PERF_EVENT: 4915 - case BPF_PROG_TYPE_TRACEPOINT: 4916 - if (attr->link_create.attach_type != BPF_PERF_EVENT) { 4917 - ret = -EINVAL; 4918 - goto out; 4919 - } 4920 - break; 4921 - case BPF_PROG_TYPE_KPROBE: 4922 - if (attr->link_create.attach_type != BPF_PERF_EVENT && 4923 - attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { 4924 - ret = -EINVAL; 4925 - goto out; 4926 - } 4927 - break; 4928 - case BPF_PROG_TYPE_SCHED_CLS: 4929 - if (attr->link_create.attach_type != BPF_TCX_INGRESS && 4930 - attr->link_create.attach_type != BPF_TCX_EGRESS) { 4931 - ret = -EINVAL; 4932 - goto out; 4933 - } 4934 - break; 4935 - default: 4936 - ptype = attach_type_to_prog_type(attr->link_create.attach_type); 4937 - if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { 4938 - ret = -EINVAL; 4939 - goto out; 4940 - } 4941 - break; 4942 - } 4943 4876 4944 4877 switch (prog->type) { 4945 4878 case BPF_PROG_TYPE_CGROUP_SKB: ··· 4958 4969 case BPF_PROG_TYPE_KPROBE: 4959 4970 if (attr->link_create.attach_type == BPF_PERF_EVENT) 4960 4971 ret = bpf_perf_link_attach(attr, prog); 4961 - else 4972 + else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI) 4962 4973 ret = bpf_kprobe_multi_link_attach(attr, prog); 4974 + else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI) 4975 + ret = bpf_uprobe_multi_link_attach(attr, prog); 4963 4976 break; 4964 4977 default: 4965 4978 ret = -EINVAL;
+54 -40
kernel/bpf/verifier.c
··· 4990 4990 struct bpf_reg_state *reg, u32 regno) 4991 4991 { 4992 4992 const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); 4993 - int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; 4993 + int perm_flags; 4994 4994 const char *reg_name = ""; 4995 4995 4996 - /* Only unreferenced case accepts untrusted pointers */ 4997 - if (kptr_field->type == BPF_KPTR_UNREF) 4998 - perm_flags |= PTR_UNTRUSTED; 4996 + if (btf_is_kernel(reg->btf)) { 4997 + perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; 4998 + 4999 + /* Only unreferenced case accepts untrusted pointers */ 5000 + if (kptr_field->type == BPF_KPTR_UNREF) 5001 + perm_flags |= PTR_UNTRUSTED; 5002 + } else { 5003 + perm_flags = PTR_MAYBE_NULL | MEM_ALLOC; 5004 + } 4999 5005 5000 5006 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) 5001 5007 goto bad_type; 5002 5008 5003 - if (!btf_is_kernel(reg->btf)) { 5004 - verbose(env, "R%d must point to kernel BTF\n", regno); 5005 - return -EINVAL; 5006 - } 5007 5009 /* We need to verify reg->type and reg->btf, before accessing reg->btf */ 5008 5010 reg_name = btf_type_name(reg->btf, reg->btf_id); 5009 5011 ··· 5018 5016 if (__check_ptr_off_reg(env, reg, regno, true)) 5019 5017 return -EACCES; 5020 5018 5021 - /* A full type match is needed, as BTF can be vmlinux or module BTF, and 5019 + /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and 5022 5020 * we also need to take into account the reg->off. 5023 5021 * 5024 5022 * We want to support cases like: ··· 5064 5062 */ 5065 5063 static bool in_rcu_cs(struct bpf_verifier_env *env) 5066 5064 { 5067 - return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable; 5065 + return env->cur_state->active_rcu_lock || 5066 + env->cur_state->active_lock.ptr || 5067 + !env->prog->aux->sleepable; 5068 5068 } 5069 5069 5070 5070 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ ··· 7920 7916 verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); 7921 7917 return -EFAULT; 7922 7918 } 7923 - /* Handled by helper specific checks */ 7919 + if (meta->func_id == BPF_FUNC_kptr_xchg) { 7920 + if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) 7921 + return -EACCES; 7922 + } 7924 7923 break; 7925 7924 case PTR_TO_BTF_ID | MEM_PERCPU: 7926 7925 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED: ··· 7975 7968 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK) 7976 7969 return 0; 7977 7970 7978 - if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) { 7979 - if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT)) 7980 - return __check_ptr_off_reg(env, reg, regno, true); 7981 - 7982 - verbose(env, "R%d must have zero offset when passed to release func\n", 7983 - regno); 7984 - verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno, 7985 - btf_type_name(reg->btf, reg->btf_id), reg->off); 7986 - return -EINVAL; 7987 - } 7988 - 7989 7971 /* Doing check_ptr_off_reg check for the offset will catch this 7990 7972 * because fixed_off_ok is false, but checking here allows us 7991 7973 * to give the user a better error message. ··· 8009 8013 case PTR_TO_BTF_ID | PTR_TRUSTED: 8010 8014 case PTR_TO_BTF_ID | MEM_RCU: 8011 8015 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: 8016 + case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: 8012 8017 /* When referenced PTR_TO_BTF_ID is passed to release function, 8013 8018 * its fixed offset must be 0. In the other cases, fixed offset 8014 8019 * can be non-zero. This was already checked above. So pass ··· 10476 10479 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 10477 10480 { 10478 10481 struct bpf_verifier_state *state = env->cur_state; 10482 + struct btf_record *rec = reg_btf_record(reg); 10479 10483 10480 10484 if (!state->active_lock.ptr) { 10481 10485 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); ··· 10489 10491 } 10490 10492 10491 10493 reg->type |= NON_OWN_REF; 10494 + if (rec->refcount_off >= 0) 10495 + reg->type |= MEM_RCU; 10496 + 10492 10497 return 0; 10493 10498 } 10494 10499 ··· 11224 11223 verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i); 11225 11224 return -EINVAL; 11226 11225 } 11227 - if (rec->refcount_off >= 0) { 11228 - verbose(env, "bpf_refcount_acquire calls are disabled for now\n"); 11229 - return -EINVAL; 11230 - } 11226 + 11231 11227 meta->arg_btf = reg->btf; 11232 11228 meta->arg_btf_id = reg->btf_id; 11233 11229 break; ··· 11328 11330 if (env->cur_state->active_rcu_lock) { 11329 11331 struct bpf_func_state *state; 11330 11332 struct bpf_reg_state *reg; 11333 + 11334 + if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { 11335 + verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); 11336 + return -EACCES; 11337 + } 11331 11338 11332 11339 if (rcu_lock) { 11333 11340 verbose(env, "nested rcu read lock (kernel function %s)\n", func_name); ··· 14050 14047 return -EINVAL; 14051 14048 } 14052 14049 14050 + /* check src2 operand */ 14051 + err = check_reg_arg(env, insn->dst_reg, SRC_OP); 14052 + if (err) 14053 + return err; 14054 + 14055 + dst_reg = &regs[insn->dst_reg]; 14053 14056 if (BPF_SRC(insn->code) == BPF_X) { 14054 14057 if (insn->imm != 0) { 14055 14058 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); ··· 14067 14058 if (err) 14068 14059 return err; 14069 14060 14070 - if (is_pointer_value(env, insn->src_reg)) { 14061 + src_reg = &regs[insn->src_reg]; 14062 + if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) && 14063 + is_pointer_value(env, insn->src_reg)) { 14071 14064 verbose(env, "R%d pointer comparison prohibited\n", 14072 14065 insn->src_reg); 14073 14066 return -EACCES; 14074 14067 } 14075 - src_reg = &regs[insn->src_reg]; 14076 14068 } else { 14077 14069 if (insn->src_reg != BPF_REG_0) { 14078 14070 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); ··· 14081 14071 } 14082 14072 } 14083 14073 14084 - /* check src2 operand */ 14085 - err = check_reg_arg(env, insn->dst_reg, SRC_OP); 14086 - if (err) 14087 - return err; 14088 - 14089 - dst_reg = &regs[insn->dst_reg]; 14090 14074 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 14091 14075 14092 14076 if (BPF_SRC(insn->code) == BPF_K) { ··· 16696 16692 return -EINVAL; 16697 16693 } 16698 16694 16699 - if (env->cur_state->active_rcu_lock) { 16695 + if (env->cur_state->active_rcu_lock && 16696 + !in_rbtree_lock_required_cb(env)) { 16700 16697 verbose(env, "bpf_rcu_read_unlock is missing\n"); 16701 16698 return -EINVAL; 16702 16699 } ··· 16975 16970 16976 16971 if (is_tracing_prog_type(prog_type)) { 16977 16972 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 16978 - return -EINVAL; 16979 - } 16980 - 16981 - if (prog->aux->sleepable) { 16982 - verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n"); 16983 16973 return -EINVAL; 16984 16974 } 16985 16975 } ··· 18281 18281 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 18282 18282 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; 18283 18283 18284 + if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && 18285 + !kptr_struct_meta) { 18286 + verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n", 18287 + insn_idx); 18288 + return -EFAULT; 18289 + } 18290 + 18284 18291 insn_buf[0] = addr[0]; 18285 18292 insn_buf[1] = addr[1]; 18286 18293 insn_buf[2] = *insn; ··· 18295 18288 } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || 18296 18289 desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || 18297 18290 desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 18291 + struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 18298 18292 int struct_meta_reg = BPF_REG_3; 18299 18293 int node_offset_reg = BPF_REG_4; 18300 18294 ··· 18303 18295 if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 18304 18296 struct_meta_reg = BPF_REG_4; 18305 18297 node_offset_reg = BPF_REG_5; 18298 + } 18299 + 18300 + if (!kptr_struct_meta) { 18301 + verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n", 18302 + insn_idx); 18303 + return -EFAULT; 18306 18304 } 18307 18305 18308 18306 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
+336 -6
kernel/trace/bpf_trace.c
··· 23 23 #include <linux/sort.h> 24 24 #include <linux/key.h> 25 25 #include <linux/verification.h> 26 + #include <linux/namei.h> 26 27 27 28 #include <net/bpf_sk_storage.h> 28 29 ··· 86 85 s32 *btf_id); 87 86 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx); 88 87 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 88 + 89 + static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx); 90 + static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 89 91 90 92 /** 91 93 * trace_call_bpf - invoke BPF program ··· 1107 1103 .arg1_type = ARG_PTR_TO_CTX, 1108 1104 }; 1109 1105 1106 + BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs) 1107 + { 1108 + return bpf_uprobe_multi_entry_ip(current->bpf_ctx); 1109 + } 1110 + 1111 + static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = { 1112 + .func = bpf_get_func_ip_uprobe_multi, 1113 + .gpl_only = false, 1114 + .ret_type = RET_INTEGER, 1115 + .arg1_type = ARG_PTR_TO_CTX, 1116 + }; 1117 + 1118 + BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs) 1119 + { 1120 + return bpf_uprobe_multi_cookie(current->bpf_ctx); 1121 + } 1122 + 1123 + static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = { 1124 + .func = bpf_get_attach_cookie_uprobe_multi, 1125 + .gpl_only = false, 1126 + .ret_type = RET_INTEGER, 1127 + .arg1_type = ARG_PTR_TO_CTX, 1128 + }; 1129 + 1110 1130 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) 1111 1131 { 1112 1132 struct bpf_trace_run_ctx *run_ctx; ··· 1573 1545 return &bpf_override_return_proto; 1574 1546 #endif 1575 1547 case BPF_FUNC_get_func_ip: 1576 - return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ? 1577 - &bpf_get_func_ip_proto_kprobe_multi : 1578 - &bpf_get_func_ip_proto_kprobe; 1548 + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) 1549 + return &bpf_get_func_ip_proto_kprobe_multi; 1550 + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) 1551 + return &bpf_get_func_ip_proto_uprobe_multi; 1552 + return &bpf_get_func_ip_proto_kprobe; 1579 1553 case BPF_FUNC_get_attach_cookie: 1580 - return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ? 1581 - &bpf_get_attach_cookie_proto_kmulti : 1582 - &bpf_get_attach_cookie_proto_trace; 1554 + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) 1555 + return &bpf_get_attach_cookie_proto_kmulti; 1556 + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) 1557 + return &bpf_get_attach_cookie_proto_umulti; 1558 + return &bpf_get_attach_cookie_proto_trace; 1583 1559 default: 1584 1560 return bpf_tracing_func_proto(func_id, prog); 1585 1561 } ··· 3002 2970 return 0; 3003 2971 } 3004 2972 #endif 2973 + 2974 + #ifdef CONFIG_UPROBES 2975 + struct bpf_uprobe_multi_link; 2976 + 2977 + struct bpf_uprobe { 2978 + struct bpf_uprobe_multi_link *link; 2979 + loff_t offset; 2980 + u64 cookie; 2981 + struct uprobe_consumer consumer; 2982 + }; 2983 + 2984 + struct bpf_uprobe_multi_link { 2985 + struct path path; 2986 + struct bpf_link link; 2987 + u32 cnt; 2988 + struct bpf_uprobe *uprobes; 2989 + struct task_struct *task; 2990 + }; 2991 + 2992 + struct bpf_uprobe_multi_run_ctx { 2993 + struct bpf_run_ctx run_ctx; 2994 + unsigned long entry_ip; 2995 + struct bpf_uprobe *uprobe; 2996 + }; 2997 + 2998 + static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, 2999 + u32 cnt) 3000 + { 3001 + u32 i; 3002 + 3003 + for (i = 0; i < cnt; i++) { 3004 + uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, 3005 + &uprobes[i].consumer); 3006 + } 3007 + } 3008 + 3009 + static void bpf_uprobe_multi_link_release(struct bpf_link *link) 3010 + { 3011 + struct bpf_uprobe_multi_link *umulti_link; 3012 + 3013 + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3014 + bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); 3015 + } 3016 + 3017 + static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) 3018 + { 3019 + struct bpf_uprobe_multi_link *umulti_link; 3020 + 3021 + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3022 + if (umulti_link->task) 3023 + put_task_struct(umulti_link->task); 3024 + path_put(&umulti_link->path); 3025 + kvfree(umulti_link->uprobes); 3026 + kfree(umulti_link); 3027 + } 3028 + 3029 + static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { 3030 + .release = bpf_uprobe_multi_link_release, 3031 + .dealloc = bpf_uprobe_multi_link_dealloc, 3032 + }; 3033 + 3034 + static int uprobe_prog_run(struct bpf_uprobe *uprobe, 3035 + unsigned long entry_ip, 3036 + struct pt_regs *regs) 3037 + { 3038 + struct bpf_uprobe_multi_link *link = uprobe->link; 3039 + struct bpf_uprobe_multi_run_ctx run_ctx = { 3040 + .entry_ip = entry_ip, 3041 + .uprobe = uprobe, 3042 + }; 3043 + struct bpf_prog *prog = link->link.prog; 3044 + bool sleepable = prog->aux->sleepable; 3045 + struct bpf_run_ctx *old_run_ctx; 3046 + int err = 0; 3047 + 3048 + if (link->task && current != link->task) 3049 + return 0; 3050 + 3051 + if (sleepable) 3052 + rcu_read_lock_trace(); 3053 + else 3054 + rcu_read_lock(); 3055 + 3056 + migrate_disable(); 3057 + 3058 + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 3059 + err = bpf_prog_run(link->link.prog, regs); 3060 + bpf_reset_run_ctx(old_run_ctx); 3061 + 3062 + migrate_enable(); 3063 + 3064 + if (sleepable) 3065 + rcu_read_unlock_trace(); 3066 + else 3067 + rcu_read_unlock(); 3068 + return err; 3069 + } 3070 + 3071 + static bool 3072 + uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx, 3073 + struct mm_struct *mm) 3074 + { 3075 + struct bpf_uprobe *uprobe; 3076 + 3077 + uprobe = container_of(con, struct bpf_uprobe, consumer); 3078 + return uprobe->link->task->mm == mm; 3079 + } 3080 + 3081 + static int 3082 + uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) 3083 + { 3084 + struct bpf_uprobe *uprobe; 3085 + 3086 + uprobe = container_of(con, struct bpf_uprobe, consumer); 3087 + return uprobe_prog_run(uprobe, instruction_pointer(regs), regs); 3088 + } 3089 + 3090 + static int 3091 + uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) 3092 + { 3093 + struct bpf_uprobe *uprobe; 3094 + 3095 + uprobe = container_of(con, struct bpf_uprobe, consumer); 3096 + return uprobe_prog_run(uprobe, func, regs); 3097 + } 3098 + 3099 + static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3100 + { 3101 + struct bpf_uprobe_multi_run_ctx *run_ctx; 3102 + 3103 + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); 3104 + return run_ctx->entry_ip; 3105 + } 3106 + 3107 + static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3108 + { 3109 + struct bpf_uprobe_multi_run_ctx *run_ctx; 3110 + 3111 + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); 3112 + return run_ctx->uprobe->cookie; 3113 + } 3114 + 3115 + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3116 + { 3117 + struct bpf_uprobe_multi_link *link = NULL; 3118 + unsigned long __user *uref_ctr_offsets; 3119 + unsigned long *ref_ctr_offsets = NULL; 3120 + struct bpf_link_primer link_primer; 3121 + struct bpf_uprobe *uprobes = NULL; 3122 + struct task_struct *task = NULL; 3123 + unsigned long __user *uoffsets; 3124 + u64 __user *ucookies; 3125 + void __user *upath; 3126 + u32 flags, cnt, i; 3127 + struct path path; 3128 + char *name; 3129 + pid_t pid; 3130 + int err; 3131 + 3132 + /* no support for 32bit archs yet */ 3133 + if (sizeof(u64) != sizeof(void *)) 3134 + return -EOPNOTSUPP; 3135 + 3136 + if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI) 3137 + return -EINVAL; 3138 + 3139 + flags = attr->link_create.uprobe_multi.flags; 3140 + if (flags & ~BPF_F_UPROBE_MULTI_RETURN) 3141 + return -EINVAL; 3142 + 3143 + /* 3144 + * path, offsets and cnt are mandatory, 3145 + * ref_ctr_offsets and cookies are optional 3146 + */ 3147 + upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); 3148 + uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); 3149 + cnt = attr->link_create.uprobe_multi.cnt; 3150 + 3151 + if (!upath || !uoffsets || !cnt) 3152 + return -EINVAL; 3153 + 3154 + uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); 3155 + ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); 3156 + 3157 + name = strndup_user(upath, PATH_MAX); 3158 + if (IS_ERR(name)) { 3159 + err = PTR_ERR(name); 3160 + return err; 3161 + } 3162 + 3163 + err = kern_path(name, LOOKUP_FOLLOW, &path); 3164 + kfree(name); 3165 + if (err) 3166 + return err; 3167 + 3168 + if (!d_is_reg(path.dentry)) { 3169 + err = -EBADF; 3170 + goto error_path_put; 3171 + } 3172 + 3173 + pid = attr->link_create.uprobe_multi.pid; 3174 + if (pid) { 3175 + rcu_read_lock(); 3176 + task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 3177 + rcu_read_unlock(); 3178 + if (!task) 3179 + goto error_path_put; 3180 + } 3181 + 3182 + err = -ENOMEM; 3183 + 3184 + link = kzalloc(sizeof(*link), GFP_KERNEL); 3185 + uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL); 3186 + 3187 + if (!uprobes || !link) 3188 + goto error_free; 3189 + 3190 + if (uref_ctr_offsets) { 3191 + ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL); 3192 + if (!ref_ctr_offsets) 3193 + goto error_free; 3194 + } 3195 + 3196 + for (i = 0; i < cnt; i++) { 3197 + if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { 3198 + err = -EFAULT; 3199 + goto error_free; 3200 + } 3201 + if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) { 3202 + err = -EFAULT; 3203 + goto error_free; 3204 + } 3205 + if (__get_user(uprobes[i].offset, uoffsets + i)) { 3206 + err = -EFAULT; 3207 + goto error_free; 3208 + } 3209 + 3210 + uprobes[i].link = link; 3211 + 3212 + if (flags & BPF_F_UPROBE_MULTI_RETURN) 3213 + uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; 3214 + else 3215 + uprobes[i].consumer.handler = uprobe_multi_link_handler; 3216 + 3217 + if (pid) 3218 + uprobes[i].consumer.filter = uprobe_multi_link_filter; 3219 + } 3220 + 3221 + link->cnt = cnt; 3222 + link->uprobes = uprobes; 3223 + link->path = path; 3224 + link->task = task; 3225 + 3226 + bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, 3227 + &bpf_uprobe_multi_link_lops, prog); 3228 + 3229 + for (i = 0; i < cnt; i++) { 3230 + err = uprobe_register_refctr(d_real_inode(link->path.dentry), 3231 + uprobes[i].offset, 3232 + ref_ctr_offsets ? ref_ctr_offsets[i] : 0, 3233 + &uprobes[i].consumer); 3234 + if (err) { 3235 + bpf_uprobe_unregister(&path, uprobes, i); 3236 + goto error_free; 3237 + } 3238 + } 3239 + 3240 + err = bpf_link_prime(&link->link, &link_primer); 3241 + if (err) 3242 + goto error_free; 3243 + 3244 + kvfree(ref_ctr_offsets); 3245 + return bpf_link_settle(&link_primer); 3246 + 3247 + error_free: 3248 + kvfree(ref_ctr_offsets); 3249 + kvfree(uprobes); 3250 + kfree(link); 3251 + if (task) 3252 + put_task_struct(task); 3253 + error_path_put: 3254 + path_put(&path); 3255 + return err; 3256 + } 3257 + #else /* !CONFIG_UPROBES */ 3258 + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3259 + { 3260 + return -EOPNOTSUPP; 3261 + } 3262 + static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3263 + { 3264 + return 0; 3265 + } 3266 + static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3267 + { 3268 + return 0; 3269 + } 3270 + #endif /* CONFIG_UPROBES */
+7 -5
lib/test_bpf.c
··· 596 596 { 597 597 static const s64 regs[] = { 598 598 0x0123456789abcdefLL, /* dword > 0, word < 0 */ 599 - 0xfedcba9876543210LL, /* dowrd < 0, word > 0 */ 600 - 0xfedcba0198765432LL, /* dowrd < 0, word < 0 */ 599 + 0xfedcba9876543210LL, /* dword < 0, word > 0 */ 600 + 0xfedcba0198765432LL, /* dword < 0, word < 0 */ 601 601 0x0123458967abcdefLL, /* dword > 0, word > 0 */ 602 602 }; 603 603 int bits = alu32 ? 32 : 64; ··· 14567 14567 if (ret == test->test[i].result) { 14568 14568 pr_cont("%lld ", duration); 14569 14569 } else { 14570 - pr_cont("ret %d != %d ", ret, 14571 - test->test[i].result); 14570 + s32 res = test->test[i].result; 14571 + 14572 + pr_cont("ret %d != %d (%#x != %#x)", 14573 + ret, res, ret, res); 14572 14574 err_cnt++; 14573 14575 } 14574 14576 } ··· 15047 15045 struct bpf_array *progs; 15048 15046 int which, err; 15049 15047 15050 - /* Allocate the table of programs to be used for tall calls */ 15048 + /* Allocate the table of programs to be used for tail calls */ 15051 15049 progs = kzalloc(struct_size(progs, ptrs, ntests + 1), GFP_KERNEL); 15052 15050 if (!progs) 15053 15051 goto out_nomem;
+3 -4
net/core/lwt_bpf.c
··· 60 60 ret = BPF_OK; 61 61 } else { 62 62 skb_reset_mac_header(skb); 63 - ret = skb_do_redirect(skb); 64 - if (ret == 0) 65 - ret = BPF_REDIRECT; 63 + skb_do_redirect(skb); 64 + ret = BPF_REDIRECT; 66 65 } 67 66 break; 68 67 ··· 254 255 255 256 err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); 256 257 if (unlikely(err)) 257 - return err; 258 + return net_xmit_errno(err); 258 259 259 260 /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ 260 261 return LWTUNNEL_XMIT_DONE;
+1 -1
net/ipv4/ip_output.c
··· 216 216 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 217 217 int res = lwtunnel_xmit(skb); 218 218 219 - if (res < 0 || res == LWTUNNEL_XMIT_DONE) 219 + if (res != LWTUNNEL_XMIT_CONTINUE) 220 220 return res; 221 221 } 222 222
+1 -1
net/ipv6/ip6_output.c
··· 113 113 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 114 114 int res = lwtunnel_xmit(skb); 115 115 116 - if (res < 0 || res == LWTUNNEL_XMIT_DONE) 116 + if (res != LWTUNNEL_XMIT_CONTINUE) 117 117 return res; 118 118 } 119 119
-12
samples/bpf/.gitignore
··· 37 37 tracex5 38 38 tracex6 39 39 tracex7 40 - xdp1 41 - xdp2 42 40 xdp_adjust_tail 43 41 xdp_fwd 44 - xdp_monitor 45 - xdp_redirect 46 - xdp_redirect_cpu 47 - xdp_redirect_map 48 - xdp_redirect_map_multi 49 42 xdp_router_ipv4 50 - xdp_rxq_info 51 - xdp_sample_pkts 52 43 xdp_tx_iptunnel 53 - xdpsock 54 - xdpsock_ctrl_proc 55 - xsk_fwd 56 44 testfile.img 57 45 hbm_out.log 58 46 iperf.*
+11 -57
samples/bpf/Makefile
··· 30 30 tprogs-y += test_cgrp2_attach 31 31 tprogs-y += test_cgrp2_sock 32 32 tprogs-y += test_cgrp2_sock2 33 - tprogs-y += xdp1 34 - tprogs-y += xdp2 35 33 tprogs-y += xdp_router_ipv4 36 34 tprogs-y += test_current_task_under_cgroup 37 35 tprogs-y += trace_event ··· 39 41 tprogs-y += xdp_tx_iptunnel 40 42 tprogs-y += test_map_in_map 41 43 tprogs-y += per_socket_stats_example 42 - tprogs-y += xdp_rxq_info 43 44 tprogs-y += syscall_tp 44 45 tprogs-y += cpustat 45 46 tprogs-y += xdp_adjust_tail 46 47 tprogs-y += xdp_fwd 47 48 tprogs-y += task_fd_query 48 - tprogs-y += xdp_sample_pkts 49 49 tprogs-y += ibumad 50 50 tprogs-y += hbm 51 - 52 - tprogs-y += xdp_redirect_cpu 53 - tprogs-y += xdp_redirect_map_multi 54 - tprogs-y += xdp_redirect_map 55 - tprogs-y += xdp_redirect 56 - tprogs-y += xdp_monitor 57 51 58 52 # Libbpf dependencies 59 53 LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf ··· 80 90 test_cgrp2_attach-objs := test_cgrp2_attach.o 81 91 test_cgrp2_sock-objs := test_cgrp2_sock.o 82 92 test_cgrp2_sock2-objs := test_cgrp2_sock2.o 83 - xdp1-objs := xdp1_user.o 84 - # reuse xdp1 source intentionally 85 - xdp2-objs := xdp1_user.o 86 93 test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ 87 94 test_current_task_under_cgroup_user.o 88 95 trace_event-objs := trace_event_user.o $(TRACE_HELPERS) ··· 89 102 xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o 90 103 test_map_in_map-objs := test_map_in_map_user.o 91 104 per_socket_stats_example-objs := cookie_uid_helper_example.o 92 - xdp_rxq_info-objs := xdp_rxq_info_user.o 93 105 syscall_tp-objs := syscall_tp_user.o 94 106 cpustat-objs := cpustat_user.o 95 107 xdp_adjust_tail-objs := xdp_adjust_tail_user.o 96 108 xdp_fwd-objs := xdp_fwd_user.o 97 109 task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) 98 - xdp_sample_pkts-objs := xdp_sample_pkts_user.o 99 110 ibumad-objs := ibumad_user.o 100 111 hbm-objs := hbm.o $(CGROUP_HELPERS) 101 112 102 - xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE) 103 - xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE) 104 - xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE) 105 - xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) 106 - xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) 107 113 xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) 108 114 109 115 # Tell kbuild to always build the programs ··· 104 124 always-y += sockex1_kern.o 105 125 always-y += sockex2_kern.o 106 126 always-y += sockex3_kern.o 107 - always-y += tracex1_kern.o 127 + always-y += tracex1.bpf.o 108 128 always-y += tracex2.bpf.o 109 - always-y += tracex3_kern.o 110 - always-y += tracex4_kern.o 111 - always-y += tracex5_kern.o 112 - always-y += tracex6_kern.o 113 - always-y += tracex7_kern.o 129 + always-y += tracex3.bpf.o 130 + always-y += tracex4.bpf.o 131 + always-y += tracex5.bpf.o 132 + always-y += tracex6.bpf.o 133 + always-y += tracex7.bpf.o 114 134 always-y += sock_flags.bpf.o 115 135 always-y += test_probe_write_user.bpf.o 116 136 always-y += trace_output.bpf.o 117 137 always-y += tcbpf1_kern.o 118 138 always-y += tc_l2_redirect_kern.o 119 139 always-y += lathist_kern.o 120 - always-y += offwaketime_kern.o 121 - always-y += spintest_kern.o 140 + always-y += offwaketime.bpf.o 141 + always-y += spintest.bpf.o 122 142 always-y += map_perf_test.bpf.o 123 143 always-y += test_overhead_tp.bpf.o 124 144 always-y += test_overhead_raw_tp.bpf.o 125 145 always-y += test_overhead_kprobe.bpf.o 126 146 always-y += parse_varlen.o parse_simple.o parse_ldabs.o 127 147 always-y += test_cgrp2_tc.bpf.o 128 - always-y += xdp1_kern.o 129 - always-y += xdp2_kern.o 130 148 always-y += test_current_task_under_cgroup.bpf.o 131 149 always-y += trace_event_kern.o 132 150 always-y += sampleip_kern.o ··· 140 162 always-y += tcp_basertt_kern.o 141 163 always-y += tcp_tos_reflect_kern.o 142 164 always-y += tcp_dumpstats_kern.o 143 - always-y += xdp_rxq_info_kern.o 144 165 always-y += xdp2skb_meta_kern.o 145 166 always-y += syscall_tp_kern.o 146 167 always-y += cpustat_kern.o 147 168 always-y += xdp_adjust_tail_kern.o 148 169 always-y += xdp_fwd_kern.o 149 170 always-y += task_fd_query_kern.o 150 - always-y += xdp_sample_pkts_kern.o 151 171 always-y += ibumad_kern.o 152 172 always-y += hbm_out_kern.o 153 173 always-y += hbm_edt_kern.o ··· 183 207 endif 184 208 185 209 TPROGS_LDLIBS += $(LIBBPF) -lelf -lz 186 - TPROGLDLIBS_xdp_monitor += -lm 187 - TPROGLDLIBS_xdp_redirect += -lm 188 - TPROGLDLIBS_xdp_redirect_cpu += -lm 189 - TPROGLDLIBS_xdp_redirect_map += -lm 190 - TPROGLDLIBS_xdp_redirect_map_multi += -lm 191 210 TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread 192 211 TPROGLDLIBS_tracex4 += -lrt 193 212 TPROGLDLIBS_trace_output += -lrt ··· 297 326 298 327 .PHONY: libbpf_hdrs 299 328 300 - $(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h 301 - $(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h 302 - $(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h 303 - $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h 304 - $(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h 305 329 $(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h 306 330 307 - $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h 331 + $(obj)/tracex5.bpf.o: $(obj)/syscall_nrs.h 308 332 $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h 309 333 $(obj)/hbm.o: $(src)/hbm.h 310 334 $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h ··· 349 383 350 384 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) 351 385 352 - $(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o 353 - $(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o 354 - $(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o 355 - $(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o 356 - $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o 357 386 $(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o 358 387 359 388 $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h ··· 359 398 -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ 360 399 -c $(filter %.bpf.c,$^) -o $@ 361 400 362 - LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \ 363 - xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h \ 364 - xdp_router_ipv4.skel.h 401 + LINKED_SKELS := xdp_router_ipv4.skel.h 365 402 clean-files += $(LINKED_SKELS) 366 403 367 - xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o 368 - xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o 369 - xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o 370 - xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o 371 - xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o 372 404 xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o 373 405 374 406 LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) ··· 394 440 -Wno-gnu-variable-sized-type-not-at-end \ 395 441 -Wno-address-of-packed-member -Wno-tautological-compare \ 396 442 -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ 397 - -fno-asynchronous-unwind-tables \ 443 + -fno-asynchronous-unwind-tables -fcf-protection \ 398 444 -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ 399 445 -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ 400 446 $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \
+6
samples/bpf/README.rst
··· 4 4 This directory contains a test stubs, verifier test-suite and examples 5 5 for using eBPF. The examples use libbpf from tools/lib/bpf. 6 6 7 + Note that the XDP-specific samples have been removed from this directory and 8 + moved to the xdp-tools repository: https://github.com/xdp-project/xdp-tools 9 + See the commit messages removing each tool from this directory for how to 10 + convert specific command invocations between the old samples and the utilities 11 + in xdp-tools. 12 + 7 13 Build dependencies 8 14 ================== 9 15
+2
samples/bpf/net_shared.h
··· 17 17 #define TC_ACT_OK 0 18 18 #define TC_ACT_SHOT 2 19 19 20 + #define IFNAMSIZ 16 21 + 20 22 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ 21 23 __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 22 24 #define bpf_ntohs(x) __builtin_bswap16(x)
+11 -28
samples/bpf/offwaketime_kern.c samples/bpf/offwaketime.bpf.c
··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <uapi/linux/bpf.h> 8 - #include <uapi/linux/ptrace.h> 9 - #include <uapi/linux/perf_event.h> 7 + #include "vmlinux.h" 10 8 #include <linux/version.h> 11 - #include <linux/sched.h> 12 9 #include <bpf/bpf_helpers.h> 13 10 #include <bpf/bpf_tracing.h> 11 + #include <bpf/bpf_core_read.h> 14 12 15 - #define _(P) \ 16 - ({ \ 17 - typeof(P) val; \ 18 - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 19 - val; \ 20 - }) 13 + #ifndef PERF_MAX_STACK_DEPTH 14 + #define PERF_MAX_STACK_DEPTH 127 15 + #endif 21 16 22 17 #define MINBLOCK_US 1 23 18 #define MAX_ENTRIES 10000 ··· 62 67 SEC("kprobe/try_to_wake_up") 63 68 int waker(struct pt_regs *ctx) 64 69 { 65 - struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 70 + struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx); 71 + u32 pid = BPF_CORE_READ(p, pid); 66 72 struct wokeby_t woke; 67 - u32 pid; 68 - 69 - pid = _(p->pid); 70 73 71 74 bpf_get_current_comm(&woke.name, sizeof(woke.name)); 72 75 woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); ··· 104 111 105 112 #if 1 106 113 /* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ 107 - struct sched_switch_args { 108 - unsigned long long pad; 109 - char prev_comm[TASK_COMM_LEN]; 110 - int prev_pid; 111 - int prev_prio; 112 - long long prev_state; 113 - char next_comm[TASK_COMM_LEN]; 114 - int next_pid; 115 - int next_prio; 116 - }; 117 114 SEC("tracepoint/sched/sched_switch") 118 - int oncpu(struct sched_switch_args *ctx) 115 + int oncpu(struct trace_event_raw_sched_switch *ctx) 119 116 { 120 117 /* record previous thread sleep time */ 121 118 u32 pid = ctx->prev_pid; 122 119 #else 123 - SEC("kprobe/finish_task_switch") 120 + SEC("kprobe.multi/finish_task_switch*") 124 121 int oncpu(struct pt_regs *ctx) 125 122 { 126 - struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 123 + struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx); 127 124 /* record previous thread sleep time */ 128 - u32 pid = _(p->pid); 125 + u32 pid = BPF_CORE_READ(p, pid); 129 126 #endif 130 127 u64 delta, ts, *tsp; 131 128
+1 -1
samples/bpf/offwaketime_user.c
··· 105 105 return 2; 106 106 } 107 107 108 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 108 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 109 109 obj = bpf_object__open_file(filename, NULL); 110 110 if (libbpf_get_error(obj)) { 111 111 fprintf(stderr, "ERROR: opening BPF object file failed\n");
+9 -18
samples/bpf/spintest_kern.c samples/bpf/spintest.bpf.c
··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/skbuff.h> 8 - #include <linux/netdevice.h> 7 + #include "vmlinux.h" 9 8 #include <linux/version.h> 10 - #include <uapi/linux/bpf.h> 11 - #include <uapi/linux/perf_event.h> 12 9 #include <bpf/bpf_helpers.h> 13 10 #include <bpf/bpf_tracing.h> 11 + 12 + #ifndef PERF_MAX_STACK_DEPTH 13 + #define PERF_MAX_STACK_DEPTH 127 14 + #endif 14 15 15 16 struct { 16 17 __uint(type, BPF_MAP_TYPE_HASH); ··· 47 46 } 48 47 49 48 /* add kprobes to all possible *spin* functions */ 50 - SEC("kprobe/spin_unlock")PROG(p1) 51 - SEC("kprobe/spin_lock")PROG(p2) 52 - SEC("kprobe/mutex_spin_on_owner")PROG(p3) 53 - SEC("kprobe/rwsem_spin_on_owner")PROG(p4) 54 - SEC("kprobe/spin_unlock_irqrestore")PROG(p5) 55 - SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6) 56 - SEC("kprobe/_raw_spin_unlock_bh")PROG(p7) 57 - SEC("kprobe/_raw_spin_unlock")PROG(p8) 58 - SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9) 59 - SEC("kprobe/_raw_spin_trylock_bh")PROG(p10) 60 - SEC("kprobe/_raw_spin_lock_irq")PROG(p11) 61 - SEC("kprobe/_raw_spin_trylock")PROG(p12) 62 - SEC("kprobe/_raw_spin_lock")PROG(p13) 63 - SEC("kprobe/_raw_spin_lock_bh")PROG(p14) 49 + SEC("kprobe.multi/spin_*lock*")PROG(spin_lock) 50 + SEC("kprobe.multi/*_spin_on_owner")PROG(spin_on_owner) 51 + SEC("kprobe.multi/_raw_spin_*lock*")PROG(raw_spin_lock) 52 + 64 53 /* and to inner bpf helpers */ 65 54 SEC("kprobe/htab_map_update_elem")PROG(p15) 66 55 SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)
+8 -16
samples/bpf/spintest_user.c
··· 9 9 10 10 int main(int ac, char **argv) 11 11 { 12 - char filename[256], symbol[256]; 13 12 struct bpf_object *obj = NULL; 14 13 struct bpf_link *links[20]; 15 14 long key, next_key, value; 16 15 struct bpf_program *prog; 17 16 int map_fd, i, j = 0; 18 - const char *section; 17 + char filename[256]; 19 18 struct ksym *sym; 20 19 21 20 if (load_kallsyms()) { ··· 22 23 return 2; 23 24 } 24 25 25 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 26 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 26 27 obj = bpf_object__open_file(filename, NULL); 27 28 if (libbpf_get_error(obj)) { 28 29 fprintf(stderr, "ERROR: opening BPF object file failed\n"); ··· 43 44 } 44 45 45 46 bpf_object__for_each_program(prog, obj) { 46 - section = bpf_program__section_name(prog); 47 - if (sscanf(section, "kprobe/%s", symbol) != 1) 48 - continue; 49 - 50 - /* Attach prog only when symbol exists */ 51 - if (ksym_get_addr(symbol)) { 52 - links[j] = bpf_program__attach(prog); 53 - if (libbpf_get_error(links[j])) { 54 - fprintf(stderr, "bpf_program__attach failed\n"); 55 - links[j] = NULL; 56 - goto cleanup; 57 - } 58 - j++; 47 + links[j] = bpf_program__attach(prog); 48 + if (libbpf_get_error(links[j])) { 49 + fprintf(stderr, "bpf_program__attach failed\n"); 50 + links[j] = NULL; 51 + goto cleanup; 59 52 } 53 + j++; 60 54 } 61 55 62 56 for (i = 0; i < 5; i++) {
+3 -7
samples/bpf/test_map_in_map.bpf.c
··· 103 103 return result ? *result : -ENOENT; 104 104 } 105 105 106 - SEC("kprobe/__sys_connect") 107 - int trace_sys_connect(struct pt_regs *ctx) 106 + SEC("ksyscall/connect") 107 + int BPF_KSYSCALL(trace_sys_connect, unsigned int fd, struct sockaddr_in6 *in6, int addrlen) 108 108 { 109 - struct sockaddr_in6 *in6; 110 109 u16 test_case, port, dst6[8]; 111 - int addrlen, ret, inline_ret, ret_key = 0; 110 + int ret, inline_ret, ret_key = 0; 112 111 u32 port_key; 113 112 void *outer_map, *inner_map; 114 113 bool inline_hash = false; 115 - 116 - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx); 117 - addrlen = (int)PT_REGS_PARM3_CORE(ctx); 118 114 119 115 if (addrlen != sizeof(*in6)) 120 116 return 0;
+7 -13
samples/bpf/test_overhead_kprobe.bpf.c
··· 8 8 #include <linux/version.h> 9 9 #include <bpf/bpf_helpers.h> 10 10 #include <bpf/bpf_tracing.h> 11 - 12 - #define _(P) \ 13 - ({ \ 14 - typeof(P) val = 0; \ 15 - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 16 - val; \ 17 - }) 11 + #include <bpf/bpf_core_read.h> 18 12 19 13 SEC("kprobe/__set_task_comm") 20 14 int prog(struct pt_regs *ctx) ··· 20 26 u16 oom_score_adj; 21 27 u32 pid; 22 28 23 - tsk = (void *)PT_REGS_PARM1(ctx); 29 + tsk = (void *)PT_REGS_PARM1_CORE(ctx); 24 30 25 - pid = _(tsk->pid); 26 - bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm); 27 - bpf_probe_read_kernel_str(newcomm, sizeof(newcomm), 31 + pid = BPF_CORE_READ(tsk, pid); 32 + bpf_core_read_str(oldcomm, sizeof(oldcomm), &tsk->comm); 33 + bpf_core_read_str(newcomm, sizeof(newcomm), 28 34 (void *)PT_REGS_PARM2(ctx)); 29 - signal = _(tsk->signal); 30 - oom_score_adj = _(signal->oom_score_adj); 35 + signal = BPF_CORE_READ(tsk, signal); 36 + oom_score_adj = BPF_CORE_READ(signal, oom_score_adj); 31 37 return 0; 32 38 } 33 39
+2 -27
samples/bpf/test_overhead_tp.bpf.c
··· 8 8 #include <bpf/bpf_helpers.h> 9 9 10 10 /* from /sys/kernel/tracing/events/task/task_rename/format */ 11 - struct task_rename { 12 - __u64 pad; 13 - __u32 pid; 14 - char oldcomm[TASK_COMM_LEN]; 15 - char newcomm[TASK_COMM_LEN]; 16 - __u16 oom_score_adj; 17 - }; 18 11 SEC("tracepoint/task/task_rename") 19 - int prog(struct task_rename *ctx) 12 + int prog(struct trace_event_raw_task_rename *ctx) 20 13 { 21 14 return 0; 22 15 } 23 16 24 17 /* from /sys/kernel/tracing/events/fib/fib_table_lookup/format */ 25 - struct fib_table_lookup { 26 - __u64 pad; 27 - __u32 tb_id; 28 - int err; 29 - int oif; 30 - int iif; 31 - __u8 proto; 32 - __u8 tos; 33 - __u8 scope; 34 - __u8 flags; 35 - __u8 src[4]; 36 - __u8 dst[4]; 37 - __u8 gw4[4]; 38 - __u8 gw6[16]; 39 - __u16 sport; 40 - __u16 dport; 41 - char name[16]; 42 - }; 43 18 SEC("tracepoint/fib/fib_table_lookup") 44 - int prog2(struct fib_table_lookup *ctx) 19 + int prog2(struct trace_event_raw_fib_table_lookup *ctx) 45 20 { 46 21 return 0; 47 22 }
+9 -16
samples/bpf/tracex1_kern.c samples/bpf/tracex1.bpf.c
··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/skbuff.h> 8 - #include <linux/netdevice.h> 9 - #include <uapi/linux/bpf.h> 7 + #include "vmlinux.h" 8 + #include "net_shared.h" 10 9 #include <linux/version.h> 11 10 #include <bpf/bpf_helpers.h> 11 + #include <bpf/bpf_core_read.h> 12 12 #include <bpf/bpf_tracing.h> 13 - 14 - #define _(P) \ 15 - ({ \ 16 - typeof(P) val = 0; \ 17 - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 18 - val; \ 19 - }) 20 13 21 14 /* kprobe is NOT a stable ABI 22 15 * kernel functions can be removed, renamed or completely change semantics. 23 16 * Number of arguments and their positions can change, etc. 24 17 * In such case this bpf+kprobe example will no longer be meaningful 25 18 */ 26 - SEC("kprobe/__netif_receive_skb_core") 19 + SEC("kprobe.multi/__netif_receive_skb_core*") 27 20 int bpf_prog1(struct pt_regs *ctx) 28 21 { 29 22 /* attaches to kprobe __netif_receive_skb_core, 30 23 * looks for packets on loobpack device and prints them 24 + * (wildcard is used for avoiding symbol mismatch due to optimization) 31 25 */ 32 26 char devname[IFNAMSIZ]; 33 27 struct net_device *dev; 34 28 struct sk_buff *skb; 35 29 int len; 36 30 37 - /* non-portable! works for the given kernel only */ 38 - bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx)); 39 - dev = _(skb->dev); 40 - len = _(skb->len); 31 + bpf_core_read(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx)); 32 + dev = BPF_CORE_READ(skb, dev); 33 + len = BPF_CORE_READ(skb, len); 41 34 42 - bpf_probe_read_kernel(devname, sizeof(devname), dev->name); 35 + BPF_CORE_READ_STR_INTO(&devname, dev, name); 43 36 44 37 if (devname[0] == 'l' && devname[1] == 'o') { 45 38 char fmt[] = "skb %p len %d\n";
+1 -1
samples/bpf/tracex1_user.c
··· 12 12 char filename[256]; 13 13 FILE *f; 14 14 15 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 15 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 16 16 obj = bpf_object__open_file(filename, NULL); 17 17 if (libbpf_get_error(obj)) { 18 18 fprintf(stderr, "ERROR: opening BPF object file failed\n");
+25 -15
samples/bpf/tracex3_kern.c samples/bpf/tracex3.bpf.c
··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/skbuff.h> 8 - #include <linux/netdevice.h> 7 + #include "vmlinux.h" 9 8 #include <linux/version.h> 10 - #include <uapi/linux/bpf.h> 11 9 #include <bpf/bpf_helpers.h> 12 10 #include <bpf/bpf_tracing.h> 11 + 12 + struct start_key { 13 + dev_t dev; 14 + u32 _pad; 15 + sector_t sector; 16 + }; 13 17 14 18 struct { 15 19 __uint(type, BPF_MAP_TYPE_HASH); ··· 22 18 __uint(max_entries, 4096); 23 19 } my_map SEC(".maps"); 24 20 25 - /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe 26 - * example will no longer be meaningful 27 - */ 28 - SEC("kprobe/blk_mq_start_request") 29 - int bpf_prog1(struct pt_regs *ctx) 21 + /* from /sys/kernel/tracing/events/block/block_io_start/format */ 22 + SEC("tracepoint/block/block_io_start") 23 + int bpf_prog1(struct trace_event_raw_block_rq *ctx) 30 24 { 31 - long rq = PT_REGS_PARM1(ctx); 32 25 u64 val = bpf_ktime_get_ns(); 26 + struct start_key key = { 27 + .dev = ctx->dev, 28 + .sector = ctx->sector 29 + }; 33 30 34 - bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); 31 + bpf_map_update_elem(&my_map, &key, &val, BPF_ANY); 35 32 return 0; 36 33 } 37 34 ··· 54 49 __uint(max_entries, SLOTS); 55 50 } lat_map SEC(".maps"); 56 51 57 - SEC("kprobe/__blk_account_io_done") 58 - int bpf_prog2(struct pt_regs *ctx) 52 + /* from /sys/kernel/tracing/events/block/block_io_done/format */ 53 + SEC("tracepoint/block/block_io_done") 54 + int bpf_prog2(struct trace_event_raw_block_rq *ctx) 59 55 { 60 - long rq = PT_REGS_PARM1(ctx); 56 + struct start_key key = { 57 + .dev = ctx->dev, 58 + .sector = ctx->sector 59 + }; 60 + 61 61 u64 *value, l, base; 62 62 u32 index; 63 63 64 - value = bpf_map_lookup_elem(&my_map, &rq); 64 + value = bpf_map_lookup_elem(&my_map, &key); 65 65 if (!value) 66 66 return 0; 67 67 68 68 u64 cur_time = bpf_ktime_get_ns(); 69 69 u64 delta = cur_time - *value; 70 70 71 - bpf_map_delete_elem(&my_map, &rq); 71 + bpf_map_delete_elem(&my_map, &key); 72 72 73 73 /* the lines below are computing index = log10(delta)*10 74 74 * using integer arithmetic
+1 -1
samples/bpf/tracex3_user.c
··· 125 125 } 126 126 } 127 127 128 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 128 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 129 129 obj = bpf_object__open_file(filename, NULL); 130 130 if (libbpf_get_error(obj)) { 131 131 fprintf(stderr, "ERROR: opening BPF object file failed\n");
+1 -2
samples/bpf/tracex4_kern.c samples/bpf/tracex4.bpf.c
··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/ptrace.h> 7 + #include "vmlinux.h" 8 8 #include <linux/version.h> 9 - #include <uapi/linux/bpf.h> 10 9 #include <bpf/bpf_helpers.h> 11 10 #include <bpf/bpf_tracing.h> 12 11
+1 -1
samples/bpf/tracex4_user.c
··· 53 53 char filename[256]; 54 54 int map_fd, j = 0; 55 55 56 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 56 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 57 57 obj = bpf_object__open_file(filename, NULL); 58 58 if (libbpf_get_error(obj)) { 59 59 fprintf(stderr, "ERROR: opening BPF object file failed\n");
+7 -7
samples/bpf/tracex5_kern.c samples/bpf/tracex5.bpf.c
··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/ptrace.h> 8 - #include <linux/version.h> 9 - #include <uapi/linux/bpf.h> 10 - #include <uapi/linux/seccomp.h> 11 - #include <uapi/linux/unistd.h> 7 + #include "vmlinux.h" 12 8 #include "syscall_nrs.h" 9 + #include <linux/version.h> 10 + #include <uapi/linux/unistd.h> 13 11 #include <bpf/bpf_helpers.h> 14 12 #include <bpf/bpf_tracing.h> 13 + #include <bpf/bpf_core_read.h> 15 14 15 + #define __stringify(x) #x 16 16 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F 17 17 18 18 struct { ··· 47 47 { 48 48 struct seccomp_data sd; 49 49 50 - bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 50 + bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 51 51 if (sd.args[2] == 512) { 52 52 char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; 53 53 bpf_trace_printk(fmt, sizeof(fmt), ··· 60 60 { 61 61 struct seccomp_data sd; 62 62 63 - bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 63 + bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 64 64 if (sd.args[2] > 128 && sd.args[2] <= 1024) { 65 65 char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; 66 66 bpf_trace_printk(fmt, sizeof(fmt),
+1 -1
samples/bpf/tracex5_user.c
··· 42 42 char filename[256]; 43 43 FILE *f; 44 44 45 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 45 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 46 46 obj = bpf_object__open_file(filename, NULL); 47 47 if (libbpf_get_error(obj)) { 48 48 fprintf(stderr, "ERROR: opening BPF object file failed\n");
+16 -4
samples/bpf/tracex6_kern.c samples/bpf/tracex6.bpf.c
··· 1 - #include <linux/ptrace.h> 1 + #include "vmlinux.h" 2 2 #include <linux/version.h> 3 - #include <uapi/linux/bpf.h> 4 3 #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + #include <bpf/bpf_core_read.h> 5 6 6 7 struct { 7 8 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); ··· 46 45 return 0; 47 46 } 48 47 49 - SEC("kprobe/htab_map_lookup_elem") 50 - int bpf_prog2(struct pt_regs *ctx) 48 + /* 49 + * Since *_map_lookup_elem can't be expected to trigger bpf programs 50 + * due to potential deadlocks (bpf_disable_instrumentation), this bpf 51 + * program will be attached to bpf_map_copy_value (which is called 52 + * from map_lookup_elem) and will only filter the hashtable type. 53 + */ 54 + SEC("kprobe/bpf_map_copy_value") 55 + int BPF_KPROBE(bpf_prog2, struct bpf_map *map) 51 56 { 52 57 u32 key = bpf_get_smp_processor_id(); 53 58 struct bpf_perf_event_value *val, buf; 59 + enum bpf_map_type type; 54 60 int error; 61 + 62 + type = BPF_CORE_READ(map, map_type); 63 + if (type != BPF_MAP_TYPE_HASH) 64 + return 0; 55 65 56 66 error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf)); 57 67 if (error)
+1 -1
samples/bpf/tracex6_user.c
··· 180 180 char filename[256]; 181 181 int i = 0; 182 182 183 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 183 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 184 184 obj = bpf_object__open_file(filename, NULL); 185 185 if (libbpf_get_error(obj)) { 186 186 fprintf(stderr, "ERROR: opening BPF object file failed\n");
+1 -2
samples/bpf/tracex7_kern.c samples/bpf/tracex7.bpf.c
··· 1 - #include <uapi/linux/ptrace.h> 2 - #include <uapi/linux/bpf.h> 1 + #include "vmlinux.h" 3 2 #include <linux/version.h> 4 3 #include <bpf/bpf_helpers.h> 5 4
+1 -1
samples/bpf/tracex7_user.c
··· 19 19 return 0; 20 20 } 21 21 22 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 22 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 23 23 obj = bpf_object__open_file(filename, NULL); 24 24 if (libbpf_get_error(obj)) { 25 25 fprintf(stderr, "ERROR: opening BPF object file failed\n");
-100
samples/bpf/xdp1_kern.c
··· 1 - /* Copyright (c) 2016 PLUMgrid 2 - * 3 - * This program is free software; you can redistribute it and/or 4 - * modify it under the terms of version 2 of the GNU General Public 5 - * License as published by the Free Software Foundation. 6 - */ 7 - #define KBUILD_MODNAME "foo" 8 - #include <uapi/linux/bpf.h> 9 - #include <linux/in.h> 10 - #include <linux/if_ether.h> 11 - #include <linux/if_packet.h> 12 - #include <linux/if_vlan.h> 13 - #include <linux/ip.h> 14 - #include <linux/ipv6.h> 15 - #include <bpf/bpf_helpers.h> 16 - 17 - struct { 18 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 19 - __type(key, u32); 20 - __type(value, long); 21 - __uint(max_entries, 256); 22 - } rxcnt SEC(".maps"); 23 - 24 - static int parse_ipv4(void *data, u64 nh_off, void *data_end) 25 - { 26 - struct iphdr *iph = data + nh_off; 27 - 28 - if (iph + 1 > data_end) 29 - return 0; 30 - return iph->protocol; 31 - } 32 - 33 - static int parse_ipv6(void *data, u64 nh_off, void *data_end) 34 - { 35 - struct ipv6hdr *ip6h = data + nh_off; 36 - 37 - if (ip6h + 1 > data_end) 38 - return 0; 39 - return ip6h->nexthdr; 40 - } 41 - 42 - #define XDPBUFSIZE 60 43 - SEC("xdp.frags") 44 - int xdp_prog1(struct xdp_md *ctx) 45 - { 46 - __u8 pkt[XDPBUFSIZE] = {}; 47 - void *data_end = &pkt[XDPBUFSIZE-1]; 48 - void *data = pkt; 49 - struct ethhdr *eth = data; 50 - int rc = XDP_DROP; 51 - long *value; 52 - u16 h_proto; 53 - u64 nh_off; 54 - u32 ipproto; 55 - 56 - if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) 57 - return rc; 58 - 59 - nh_off = sizeof(*eth); 60 - if (data + nh_off > data_end) 61 - return rc; 62 - 63 - h_proto = eth->h_proto; 64 - 65 - /* Handle VLAN tagged packet */ 66 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 67 - struct vlan_hdr *vhdr; 68 - 69 - vhdr = data + nh_off; 70 - nh_off += sizeof(struct vlan_hdr); 71 - if (data + nh_off > data_end) 72 - return rc; 73 - h_proto = vhdr->h_vlan_encapsulated_proto; 74 - } 75 - /* Handle double VLAN tagged packet */ 76 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 77 - struct vlan_hdr *vhdr; 78 - 79 - vhdr = data + nh_off; 80 - nh_off += sizeof(struct vlan_hdr); 81 - if (data + nh_off > data_end) 82 - return rc; 83 - h_proto = vhdr->h_vlan_encapsulated_proto; 84 - } 85 - 86 - if (h_proto == htons(ETH_P_IP)) 87 - ipproto = parse_ipv4(data, nh_off, data_end); 88 - else if (h_proto == htons(ETH_P_IPV6)) 89 - ipproto = parse_ipv6(data, nh_off, data_end); 90 - else 91 - ipproto = 0; 92 - 93 - value = bpf_map_lookup_elem(&rxcnt, &ipproto); 94 - if (value) 95 - *value += 1; 96 - 97 - return rc; 98 - } 99 - 100 - char _license[] SEC("license") = "GPL";
-166
samples/bpf/xdp1_user.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright (c) 2016 PLUMgrid 3 - */ 4 - #include <linux/bpf.h> 5 - #include <linux/if_link.h> 6 - #include <assert.h> 7 - #include <errno.h> 8 - #include <signal.h> 9 - #include <stdio.h> 10 - #include <stdlib.h> 11 - #include <string.h> 12 - #include <unistd.h> 13 - #include <libgen.h> 14 - #include <net/if.h> 15 - 16 - #include "bpf_util.h" 17 - #include <bpf/bpf.h> 18 - #include <bpf/libbpf.h> 19 - 20 - static int ifindex; 21 - static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 22 - static __u32 prog_id; 23 - 24 - static void int_exit(int sig) 25 - { 26 - __u32 curr_prog_id = 0; 27 - 28 - if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 29 - printf("bpf_xdp_query_id failed\n"); 30 - exit(1); 31 - } 32 - if (prog_id == curr_prog_id) 33 - bpf_xdp_detach(ifindex, xdp_flags, NULL); 34 - else if (!curr_prog_id) 35 - printf("couldn't find a prog id on a given interface\n"); 36 - else 37 - printf("program on interface changed, not removing\n"); 38 - exit(0); 39 - } 40 - 41 - /* simple per-protocol drop counter 42 - */ 43 - static void poll_stats(int map_fd, int interval) 44 - { 45 - unsigned int nr_cpus = bpf_num_possible_cpus(); 46 - __u64 values[nr_cpus], prev[UINT8_MAX] = { 0 }; 47 - int i; 48 - 49 - while (1) { 50 - __u32 key = UINT32_MAX; 51 - 52 - sleep(interval); 53 - 54 - while (bpf_map_get_next_key(map_fd, &key, &key) == 0) { 55 - __u64 sum = 0; 56 - 57 - assert(bpf_map_lookup_elem(map_fd, &key, values) == 0); 58 - for (i = 0; i < nr_cpus; i++) 59 - sum += values[i]; 60 - if (sum > prev[key]) 61 - printf("proto %u: %10llu pkt/s\n", 62 - key, (sum - prev[key]) / interval); 63 - prev[key] = sum; 64 - } 65 - } 66 - } 67 - 68 - static void usage(const char *prog) 69 - { 70 - fprintf(stderr, 71 - "usage: %s [OPTS] IFACE\n\n" 72 - "OPTS:\n" 73 - " -S use skb-mode\n" 74 - " -N enforce native mode\n" 75 - " -F force loading prog\n", 76 - prog); 77 - } 78 - 79 - int main(int argc, char **argv) 80 - { 81 - struct bpf_prog_info info = {}; 82 - __u32 info_len = sizeof(info); 83 - const char *optstr = "FSN"; 84 - int prog_fd, map_fd, opt; 85 - struct bpf_program *prog; 86 - struct bpf_object *obj; 87 - struct bpf_map *map; 88 - char filename[256]; 89 - int err; 90 - 91 - while ((opt = getopt(argc, argv, optstr)) != -1) { 92 - switch (opt) { 93 - case 'S': 94 - xdp_flags |= XDP_FLAGS_SKB_MODE; 95 - break; 96 - case 'N': 97 - /* default, set below */ 98 - break; 99 - case 'F': 100 - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 101 - break; 102 - default: 103 - usage(basename(argv[0])); 104 - return 1; 105 - } 106 - } 107 - 108 - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 109 - xdp_flags |= XDP_FLAGS_DRV_MODE; 110 - 111 - if (optind == argc) { 112 - usage(basename(argv[0])); 113 - return 1; 114 - } 115 - 116 - ifindex = if_nametoindex(argv[optind]); 117 - if (!ifindex) { 118 - perror("if_nametoindex"); 119 - return 1; 120 - } 121 - 122 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 123 - obj = bpf_object__open_file(filename, NULL); 124 - if (libbpf_get_error(obj)) 125 - return 1; 126 - 127 - prog = bpf_object__next_program(obj, NULL); 128 - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); 129 - 130 - err = bpf_object__load(obj); 131 - if (err) 132 - return 1; 133 - 134 - prog_fd = bpf_program__fd(prog); 135 - 136 - map = bpf_object__next_map(obj, NULL); 137 - if (!map) { 138 - printf("finding a map in obj file failed\n"); 139 - return 1; 140 - } 141 - map_fd = bpf_map__fd(map); 142 - 143 - if (!prog_fd) { 144 - printf("bpf_prog_load_xattr: %s\n", strerror(errno)); 145 - return 1; 146 - } 147 - 148 - signal(SIGINT, int_exit); 149 - signal(SIGTERM, int_exit); 150 - 151 - if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 152 - printf("link set xdp fd failed\n"); 153 - return 1; 154 - } 155 - 156 - err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 157 - if (err) { 158 - printf("can't get prog info - %s\n", strerror(errno)); 159 - return err; 160 - } 161 - prog_id = info.id; 162 - 163 - poll_stats(map_fd, 1); 164 - 165 - return 0; 166 - }
-125
samples/bpf/xdp2_kern.c
··· 1 - /* Copyright (c) 2016 PLUMgrid 2 - * 3 - * This program is free software; you can redistribute it and/or 4 - * modify it under the terms of version 2 of the GNU General Public 5 - * License as published by the Free Software Foundation. 6 - */ 7 - #define KBUILD_MODNAME "foo" 8 - #include <uapi/linux/bpf.h> 9 - #include <linux/in.h> 10 - #include <linux/if_ether.h> 11 - #include <linux/if_packet.h> 12 - #include <linux/if_vlan.h> 13 - #include <linux/ip.h> 14 - #include <linux/ipv6.h> 15 - #include <bpf/bpf_helpers.h> 16 - 17 - struct { 18 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 19 - __type(key, u32); 20 - __type(value, long); 21 - __uint(max_entries, 256); 22 - } rxcnt SEC(".maps"); 23 - 24 - static void swap_src_dst_mac(void *data) 25 - { 26 - unsigned short *p = data; 27 - unsigned short dst[3]; 28 - 29 - dst[0] = p[0]; 30 - dst[1] = p[1]; 31 - dst[2] = p[2]; 32 - p[0] = p[3]; 33 - p[1] = p[4]; 34 - p[2] = p[5]; 35 - p[3] = dst[0]; 36 - p[4] = dst[1]; 37 - p[5] = dst[2]; 38 - } 39 - 40 - static int parse_ipv4(void *data, u64 nh_off, void *data_end) 41 - { 42 - struct iphdr *iph = data + nh_off; 43 - 44 - if (iph + 1 > data_end) 45 - return 0; 46 - return iph->protocol; 47 - } 48 - 49 - static int parse_ipv6(void *data, u64 nh_off, void *data_end) 50 - { 51 - struct ipv6hdr *ip6h = data + nh_off; 52 - 53 - if (ip6h + 1 > data_end) 54 - return 0; 55 - return ip6h->nexthdr; 56 - } 57 - 58 - #define XDPBUFSIZE 60 59 - SEC("xdp.frags") 60 - int xdp_prog1(struct xdp_md *ctx) 61 - { 62 - __u8 pkt[XDPBUFSIZE] = {}; 63 - void *data_end = &pkt[XDPBUFSIZE-1]; 64 - void *data = pkt; 65 - struct ethhdr *eth = data; 66 - int rc = XDP_DROP; 67 - long *value; 68 - u16 h_proto; 69 - u64 nh_off; 70 - u32 ipproto; 71 - 72 - if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) 73 - return rc; 74 - 75 - nh_off = sizeof(*eth); 76 - if (data + nh_off > data_end) 77 - return rc; 78 - 79 - h_proto = eth->h_proto; 80 - 81 - /* Handle VLAN tagged packet */ 82 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 83 - struct vlan_hdr *vhdr; 84 - 85 - vhdr = data + nh_off; 86 - nh_off += sizeof(struct vlan_hdr); 87 - if (data + nh_off > data_end) 88 - return rc; 89 - h_proto = vhdr->h_vlan_encapsulated_proto; 90 - } 91 - /* Handle double VLAN tagged packet */ 92 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 93 - struct vlan_hdr *vhdr; 94 - 95 - vhdr = data + nh_off; 96 - nh_off += sizeof(struct vlan_hdr); 97 - if (data + nh_off > data_end) 98 - return rc; 99 - h_proto = vhdr->h_vlan_encapsulated_proto; 100 - } 101 - 102 - if (h_proto == htons(ETH_P_IP)) 103 - ipproto = parse_ipv4(data, nh_off, data_end); 104 - else if (h_proto == htons(ETH_P_IPV6)) 105 - ipproto = parse_ipv6(data, nh_off, data_end); 106 - else 107 - ipproto = 0; 108 - 109 - value = bpf_map_lookup_elem(&rxcnt, &ipproto); 110 - if (value) 111 - *value += 1; 112 - 113 - if (ipproto == IPPROTO_UDP) { 114 - swap_src_dst_mac(data); 115 - 116 - if (bpf_xdp_store_bytes(ctx, 0, pkt, sizeof(pkt))) 117 - return rc; 118 - 119 - rc = XDP_TX; 120 - } 121 - 122 - return rc; 123 - } 124 - 125 - char _license[] SEC("license") = "GPL";
-8
samples/bpf/xdp_monitor.bpf.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. 3 - * 4 - * XDP monitor tool, based on tracepoints 5 - */ 6 - #include "xdp_sample.bpf.h" 7 - 8 - char _license[] SEC("license") = "GPL";
-118
samples/bpf/xdp_monitor_user.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ 3 - static const char *__doc__= 4 - "XDP monitor tool, based on tracepoints\n"; 5 - 6 - static const char *__doc_err_only__= 7 - " NOTICE: Only tracking XDP redirect errors\n" 8 - " Enable redirect success stats via '-s/--stats'\n" 9 - " (which comes with a per packet processing overhead)\n"; 10 - 11 - #include <errno.h> 12 - #include <stdio.h> 13 - #include <stdlib.h> 14 - #include <stdbool.h> 15 - #include <stdint.h> 16 - #include <string.h> 17 - #include <ctype.h> 18 - #include <unistd.h> 19 - #include <locale.h> 20 - #include <getopt.h> 21 - #include <net/if.h> 22 - #include <time.h> 23 - #include <signal.h> 24 - #include <bpf/bpf.h> 25 - #include <bpf/libbpf.h> 26 - #include "bpf_util.h" 27 - #include "xdp_sample_user.h" 28 - #include "xdp_monitor.skel.h" 29 - 30 - static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT | 31 - SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT | 32 - SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; 33 - 34 - DEFINE_SAMPLE_INIT(xdp_monitor); 35 - 36 - static const struct option long_options[] = { 37 - { "help", no_argument, NULL, 'h' }, 38 - { "stats", no_argument, NULL, 's' }, 39 - { "interval", required_argument, NULL, 'i' }, 40 - { "verbose", no_argument, NULL, 'v' }, 41 - {} 42 - }; 43 - 44 - int main(int argc, char **argv) 45 - { 46 - unsigned long interval = 2; 47 - int ret = EXIT_FAIL_OPTION; 48 - struct xdp_monitor *skel; 49 - bool errors_only = true; 50 - int longindex = 0, opt; 51 - bool error = true; 52 - 53 - /* Parse commands line args */ 54 - while ((opt = getopt_long(argc, argv, "si:vh", 55 - long_options, &longindex)) != -1) { 56 - switch (opt) { 57 - case 's': 58 - errors_only = false; 59 - mask |= SAMPLE_REDIRECT_CNT; 60 - break; 61 - case 'i': 62 - interval = strtoul(optarg, NULL, 0); 63 - break; 64 - case 'v': 65 - sample_switch_mode(); 66 - break; 67 - case 'h': 68 - error = false; 69 - default: 70 - sample_usage(argv, long_options, __doc__, mask, error); 71 - return ret; 72 - } 73 - } 74 - 75 - skel = xdp_monitor__open(); 76 - if (!skel) { 77 - fprintf(stderr, "Failed to xdp_monitor__open: %s\n", 78 - strerror(errno)); 79 - ret = EXIT_FAIL_BPF; 80 - goto end; 81 - } 82 - 83 - ret = sample_init_pre_load(skel); 84 - if (ret < 0) { 85 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 86 - ret = EXIT_FAIL_BPF; 87 - goto end_destroy; 88 - } 89 - 90 - ret = xdp_monitor__load(skel); 91 - if (ret < 0) { 92 - fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno)); 93 - ret = EXIT_FAIL_BPF; 94 - goto end_destroy; 95 - } 96 - 97 - ret = sample_init(skel, mask); 98 - if (ret < 0) { 99 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 100 - ret = EXIT_FAIL_BPF; 101 - goto end_destroy; 102 - } 103 - 104 - if (errors_only) 105 - printf("%s", __doc_err_only__); 106 - 107 - ret = sample_run(interval, NULL, NULL); 108 - if (ret < 0) { 109 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 110 - ret = EXIT_FAIL; 111 - goto end_destroy; 112 - } 113 - ret = EXIT_OK; 114 - end_destroy: 115 - xdp_monitor__destroy(skel); 116 - end: 117 - sample_exit(ret); 118 - }
-49
samples/bpf/xdp_redirect.bpf.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> 3 - * 4 - * This program is free software; you can redistribute it and/or 5 - * modify it under the terms of version 2 of the GNU General Public 6 - * License as published by the Free Software Foundation. 7 - * 8 - * This program is distributed in the hope that it will be useful, but 9 - * WITHOUT ANY WARRANTY; without even the implied warranty of 10 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 - * General Public License for more details. 12 - */ 13 - #include "vmlinux.h" 14 - #include "xdp_sample.bpf.h" 15 - #include "xdp_sample_shared.h" 16 - 17 - const volatile int ifindex_out; 18 - 19 - SEC("xdp") 20 - int xdp_redirect_prog(struct xdp_md *ctx) 21 - { 22 - void *data_end = (void *)(long)ctx->data_end; 23 - void *data = (void *)(long)ctx->data; 24 - u32 key = bpf_get_smp_processor_id(); 25 - struct ethhdr *eth = data; 26 - struct datarec *rec; 27 - u64 nh_off; 28 - 29 - nh_off = sizeof(*eth); 30 - if (data + nh_off > data_end) 31 - return XDP_DROP; 32 - 33 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 34 - if (!rec) 35 - return XDP_PASS; 36 - NO_TEAR_INC(rec->processed); 37 - 38 - swap_src_dst_mac(data); 39 - return bpf_redirect(ifindex_out, 0); 40 - } 41 - 42 - /* Redirect require an XDP bpf_prog loaded on the TX device */ 43 - SEC("xdp") 44 - int xdp_redirect_dummy_prog(struct xdp_md *ctx) 45 - { 46 - return XDP_PASS; 47 - } 48 - 49 - char _license[] SEC("license") = "GPL";
-539
samples/bpf/xdp_redirect_cpu.bpf.c
··· 1 - /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) 2 - * 3 - * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 4 - */ 5 - #include "vmlinux.h" 6 - #include "xdp_sample.bpf.h" 7 - #include "xdp_sample_shared.h" 8 - #include "hash_func01.h" 9 - 10 - /* Special map type that can XDP_REDIRECT frames to another CPU */ 11 - struct { 12 - __uint(type, BPF_MAP_TYPE_CPUMAP); 13 - __uint(key_size, sizeof(u32)); 14 - __uint(value_size, sizeof(struct bpf_cpumap_val)); 15 - } cpu_map SEC(".maps"); 16 - 17 - /* Set of maps controlling available CPU, and for iterating through 18 - * selectable redirect CPUs. 19 - */ 20 - struct { 21 - __uint(type, BPF_MAP_TYPE_ARRAY); 22 - __type(key, u32); 23 - __type(value, u32); 24 - } cpus_available SEC(".maps"); 25 - 26 - struct { 27 - __uint(type, BPF_MAP_TYPE_ARRAY); 28 - __type(key, u32); 29 - __type(value, u32); 30 - __uint(max_entries, 1); 31 - } cpus_count SEC(".maps"); 32 - 33 - struct { 34 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 35 - __type(key, u32); 36 - __type(value, u32); 37 - __uint(max_entries, 1); 38 - } cpus_iterator SEC(".maps"); 39 - 40 - struct { 41 - __uint(type, BPF_MAP_TYPE_DEVMAP); 42 - __uint(key_size, sizeof(int)); 43 - __uint(value_size, sizeof(struct bpf_devmap_val)); 44 - __uint(max_entries, 1); 45 - } tx_port SEC(".maps"); 46 - 47 - char tx_mac_addr[ETH_ALEN]; 48 - 49 - /* Helper parse functions */ 50 - 51 - static __always_inline 52 - bool parse_eth(struct ethhdr *eth, void *data_end, 53 - u16 *eth_proto, u64 *l3_offset) 54 - { 55 - u16 eth_type; 56 - u64 offset; 57 - 58 - offset = sizeof(*eth); 59 - if ((void *)eth + offset > data_end) 60 - return false; 61 - 62 - eth_type = eth->h_proto; 63 - 64 - /* Skip non 802.3 Ethertypes */ 65 - if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0)) 66 - return false; 67 - 68 - /* Handle VLAN tagged packet */ 69 - if (eth_type == bpf_htons(ETH_P_8021Q) || 70 - eth_type == bpf_htons(ETH_P_8021AD)) { 71 - struct vlan_hdr *vlan_hdr; 72 - 73 - vlan_hdr = (void *)eth + offset; 74 - offset += sizeof(*vlan_hdr); 75 - if ((void *)eth + offset > data_end) 76 - return false; 77 - eth_type = vlan_hdr->h_vlan_encapsulated_proto; 78 - } 79 - /* Handle double VLAN tagged packet */ 80 - if (eth_type == bpf_htons(ETH_P_8021Q) || 81 - eth_type == bpf_htons(ETH_P_8021AD)) { 82 - struct vlan_hdr *vlan_hdr; 83 - 84 - vlan_hdr = (void *)eth + offset; 85 - offset += sizeof(*vlan_hdr); 86 - if ((void *)eth + offset > data_end) 87 - return false; 88 - eth_type = vlan_hdr->h_vlan_encapsulated_proto; 89 - } 90 - 91 - *eth_proto = bpf_ntohs(eth_type); 92 - *l3_offset = offset; 93 - return true; 94 - } 95 - 96 - static __always_inline 97 - u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) 98 - { 99 - void *data_end = (void *)(long)ctx->data_end; 100 - void *data = (void *)(long)ctx->data; 101 - struct iphdr *iph = data + nh_off; 102 - struct udphdr *udph; 103 - 104 - if (iph + 1 > data_end) 105 - return 0; 106 - if (!(iph->protocol == IPPROTO_UDP)) 107 - return 0; 108 - 109 - udph = (void *)(iph + 1); 110 - if (udph + 1 > data_end) 111 - return 0; 112 - 113 - return bpf_ntohs(udph->dest); 114 - } 115 - 116 - static __always_inline 117 - int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) 118 - { 119 - void *data_end = (void *)(long)ctx->data_end; 120 - void *data = (void *)(long)ctx->data; 121 - struct iphdr *iph = data + nh_off; 122 - 123 - if (iph + 1 > data_end) 124 - return 0; 125 - return iph->protocol; 126 - } 127 - 128 - static __always_inline 129 - int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) 130 - { 131 - void *data_end = (void *)(long)ctx->data_end; 132 - void *data = (void *)(long)ctx->data; 133 - struct ipv6hdr *ip6h = data + nh_off; 134 - 135 - if (ip6h + 1 > data_end) 136 - return 0; 137 - return ip6h->nexthdr; 138 - } 139 - 140 - SEC("xdp") 141 - int xdp_prognum0_no_touch(struct xdp_md *ctx) 142 - { 143 - u32 key = bpf_get_smp_processor_id(); 144 - struct datarec *rec; 145 - u32 *cpu_selected; 146 - u32 cpu_dest = 0; 147 - u32 key0 = 0; 148 - 149 - /* Only use first entry in cpus_available */ 150 - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 151 - if (!cpu_selected) 152 - return XDP_ABORTED; 153 - cpu_dest = *cpu_selected; 154 - 155 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 156 - if (!rec) 157 - return XDP_PASS; 158 - NO_TEAR_INC(rec->processed); 159 - 160 - if (cpu_dest >= nr_cpus) { 161 - NO_TEAR_INC(rec->issue); 162 - return XDP_ABORTED; 163 - } 164 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 165 - } 166 - 167 - SEC("xdp") 168 - int xdp_prognum1_touch_data(struct xdp_md *ctx) 169 - { 170 - void *data_end = (void *)(long)ctx->data_end; 171 - void *data = (void *)(long)ctx->data; 172 - u32 key = bpf_get_smp_processor_id(); 173 - struct ethhdr *eth = data; 174 - struct datarec *rec; 175 - u32 *cpu_selected; 176 - u32 cpu_dest = 0; 177 - u32 key0 = 0; 178 - u16 eth_type; 179 - 180 - /* Only use first entry in cpus_available */ 181 - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 182 - if (!cpu_selected) 183 - return XDP_ABORTED; 184 - cpu_dest = *cpu_selected; 185 - 186 - /* Validate packet length is minimum Eth header size */ 187 - if (eth + 1 > data_end) 188 - return XDP_ABORTED; 189 - 190 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 191 - if (!rec) 192 - return XDP_PASS; 193 - NO_TEAR_INC(rec->processed); 194 - 195 - /* Read packet data, and use it (drop non 802.3 Ethertypes) */ 196 - eth_type = eth->h_proto; 197 - if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) { 198 - NO_TEAR_INC(rec->dropped); 199 - return XDP_DROP; 200 - } 201 - 202 - if (cpu_dest >= nr_cpus) { 203 - NO_TEAR_INC(rec->issue); 204 - return XDP_ABORTED; 205 - } 206 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 207 - } 208 - 209 - SEC("xdp") 210 - int xdp_prognum2_round_robin(struct xdp_md *ctx) 211 - { 212 - void *data_end = (void *)(long)ctx->data_end; 213 - void *data = (void *)(long)ctx->data; 214 - u32 key = bpf_get_smp_processor_id(); 215 - struct datarec *rec; 216 - u32 cpu_dest = 0; 217 - u32 key0 = 0; 218 - 219 - u32 *cpu_selected; 220 - u32 *cpu_iterator; 221 - u32 *cpu_max; 222 - u32 cpu_idx; 223 - 224 - cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 225 - if (!cpu_max) 226 - return XDP_ABORTED; 227 - 228 - cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); 229 - if (!cpu_iterator) 230 - return XDP_ABORTED; 231 - cpu_idx = *cpu_iterator; 232 - 233 - *cpu_iterator += 1; 234 - if (*cpu_iterator == *cpu_max) 235 - *cpu_iterator = 0; 236 - 237 - cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 238 - if (!cpu_selected) 239 - return XDP_ABORTED; 240 - cpu_dest = *cpu_selected; 241 - 242 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 243 - if (!rec) 244 - return XDP_PASS; 245 - NO_TEAR_INC(rec->processed); 246 - 247 - if (cpu_dest >= nr_cpus) { 248 - NO_TEAR_INC(rec->issue); 249 - return XDP_ABORTED; 250 - } 251 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 252 - } 253 - 254 - SEC("xdp") 255 - int xdp_prognum3_proto_separate(struct xdp_md *ctx) 256 - { 257 - void *data_end = (void *)(long)ctx->data_end; 258 - void *data = (void *)(long)ctx->data; 259 - u32 key = bpf_get_smp_processor_id(); 260 - struct ethhdr *eth = data; 261 - u8 ip_proto = IPPROTO_UDP; 262 - struct datarec *rec; 263 - u16 eth_proto = 0; 264 - u64 l3_offset = 0; 265 - u32 cpu_dest = 0; 266 - u32 *cpu_lookup; 267 - u32 cpu_idx = 0; 268 - 269 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 270 - if (!rec) 271 - return XDP_PASS; 272 - NO_TEAR_INC(rec->processed); 273 - 274 - if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 275 - return XDP_PASS; /* Just skip */ 276 - 277 - /* Extract L4 protocol */ 278 - switch (eth_proto) { 279 - case ETH_P_IP: 280 - ip_proto = get_proto_ipv4(ctx, l3_offset); 281 - break; 282 - case ETH_P_IPV6: 283 - ip_proto = get_proto_ipv6(ctx, l3_offset); 284 - break; 285 - case ETH_P_ARP: 286 - cpu_idx = 0; /* ARP packet handled on separate CPU */ 287 - break; 288 - default: 289 - cpu_idx = 0; 290 - } 291 - 292 - /* Choose CPU based on L4 protocol */ 293 - switch (ip_proto) { 294 - case IPPROTO_ICMP: 295 - case IPPROTO_ICMPV6: 296 - cpu_idx = 2; 297 - break; 298 - case IPPROTO_TCP: 299 - cpu_idx = 0; 300 - break; 301 - case IPPROTO_UDP: 302 - cpu_idx = 1; 303 - break; 304 - default: 305 - cpu_idx = 0; 306 - } 307 - 308 - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 309 - if (!cpu_lookup) 310 - return XDP_ABORTED; 311 - cpu_dest = *cpu_lookup; 312 - 313 - if (cpu_dest >= nr_cpus) { 314 - NO_TEAR_INC(rec->issue); 315 - return XDP_ABORTED; 316 - } 317 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 318 - } 319 - 320 - SEC("xdp") 321 - int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) 322 - { 323 - void *data_end = (void *)(long)ctx->data_end; 324 - void *data = (void *)(long)ctx->data; 325 - u32 key = bpf_get_smp_processor_id(); 326 - struct ethhdr *eth = data; 327 - u8 ip_proto = IPPROTO_UDP; 328 - struct datarec *rec; 329 - u16 eth_proto = 0; 330 - u64 l3_offset = 0; 331 - u32 cpu_dest = 0; 332 - u32 *cpu_lookup; 333 - u32 cpu_idx = 0; 334 - u16 dest_port; 335 - 336 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 337 - if (!rec) 338 - return XDP_PASS; 339 - NO_TEAR_INC(rec->processed); 340 - 341 - if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 342 - return XDP_PASS; /* Just skip */ 343 - 344 - /* Extract L4 protocol */ 345 - switch (eth_proto) { 346 - case ETH_P_IP: 347 - ip_proto = get_proto_ipv4(ctx, l3_offset); 348 - break; 349 - case ETH_P_IPV6: 350 - ip_proto = get_proto_ipv6(ctx, l3_offset); 351 - break; 352 - case ETH_P_ARP: 353 - cpu_idx = 0; /* ARP packet handled on separate CPU */ 354 - break; 355 - default: 356 - cpu_idx = 0; 357 - } 358 - 359 - /* Choose CPU based on L4 protocol */ 360 - switch (ip_proto) { 361 - case IPPROTO_ICMP: 362 - case IPPROTO_ICMPV6: 363 - cpu_idx = 2; 364 - break; 365 - case IPPROTO_TCP: 366 - cpu_idx = 0; 367 - break; 368 - case IPPROTO_UDP: 369 - cpu_idx = 1; 370 - /* DDoS filter UDP port 9 (pktgen) */ 371 - dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); 372 - if (dest_port == 9) { 373 - NO_TEAR_INC(rec->dropped); 374 - return XDP_DROP; 375 - } 376 - break; 377 - default: 378 - cpu_idx = 0; 379 - } 380 - 381 - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 382 - if (!cpu_lookup) 383 - return XDP_ABORTED; 384 - cpu_dest = *cpu_lookup; 385 - 386 - if (cpu_dest >= nr_cpus) { 387 - NO_TEAR_INC(rec->issue); 388 - return XDP_ABORTED; 389 - } 390 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 391 - } 392 - 393 - /* Hashing initval */ 394 - #define INITVAL 15485863 395 - 396 - static __always_inline 397 - u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 398 - { 399 - void *data_end = (void *)(long)ctx->data_end; 400 - void *data = (void *)(long)ctx->data; 401 - struct iphdr *iph = data + nh_off; 402 - u32 cpu_hash; 403 - 404 - if (iph + 1 > data_end) 405 - return 0; 406 - 407 - cpu_hash = iph->saddr + iph->daddr; 408 - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); 409 - 410 - return cpu_hash; 411 - } 412 - 413 - static __always_inline 414 - u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 415 - { 416 - void *data_end = (void *)(long)ctx->data_end; 417 - void *data = (void *)(long)ctx->data; 418 - struct ipv6hdr *ip6h = data + nh_off; 419 - u32 cpu_hash; 420 - 421 - if (ip6h + 1 > data_end) 422 - return 0; 423 - 424 - cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0]; 425 - cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1]; 426 - cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2]; 427 - cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3]; 428 - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); 429 - 430 - return cpu_hash; 431 - } 432 - 433 - /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The 434 - * hashing scheme is symmetric, meaning swapping IP src/dest still hit 435 - * same CPU. 436 - */ 437 - SEC("xdp") 438 - int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) 439 - { 440 - void *data_end = (void *)(long)ctx->data_end; 441 - void *data = (void *)(long)ctx->data; 442 - u32 key = bpf_get_smp_processor_id(); 443 - struct ethhdr *eth = data; 444 - struct datarec *rec; 445 - u16 eth_proto = 0; 446 - u64 l3_offset = 0; 447 - u32 cpu_dest = 0; 448 - u32 cpu_idx = 0; 449 - u32 *cpu_lookup; 450 - u32 key0 = 0; 451 - u32 *cpu_max; 452 - u32 cpu_hash; 453 - 454 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 455 - if (!rec) 456 - return XDP_PASS; 457 - NO_TEAR_INC(rec->processed); 458 - 459 - cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 460 - if (!cpu_max) 461 - return XDP_ABORTED; 462 - 463 - if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 464 - return XDP_PASS; /* Just skip */ 465 - 466 - /* Hash for IPv4 and IPv6 */ 467 - switch (eth_proto) { 468 - case ETH_P_IP: 469 - cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); 470 - break; 471 - case ETH_P_IPV6: 472 - cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); 473 - break; 474 - case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ 475 - default: 476 - cpu_hash = 0; 477 - } 478 - 479 - /* Choose CPU based on hash */ 480 - cpu_idx = cpu_hash % *cpu_max; 481 - 482 - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 483 - if (!cpu_lookup) 484 - return XDP_ABORTED; 485 - cpu_dest = *cpu_lookup; 486 - 487 - if (cpu_dest >= nr_cpus) { 488 - NO_TEAR_INC(rec->issue); 489 - return XDP_ABORTED; 490 - } 491 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 492 - } 493 - 494 - SEC("xdp/cpumap") 495 - int xdp_redirect_cpu_devmap(struct xdp_md *ctx) 496 - { 497 - void *data_end = (void *)(long)ctx->data_end; 498 - void *data = (void *)(long)ctx->data; 499 - struct ethhdr *eth = data; 500 - u64 nh_off; 501 - 502 - nh_off = sizeof(*eth); 503 - if (data + nh_off > data_end) 504 - return XDP_DROP; 505 - 506 - swap_src_dst_mac(data); 507 - return bpf_redirect_map(&tx_port, 0, 0); 508 - } 509 - 510 - SEC("xdp/cpumap") 511 - int xdp_redirect_cpu_pass(struct xdp_md *ctx) 512 - { 513 - return XDP_PASS; 514 - } 515 - 516 - SEC("xdp/cpumap") 517 - int xdp_redirect_cpu_drop(struct xdp_md *ctx) 518 - { 519 - return XDP_DROP; 520 - } 521 - 522 - SEC("xdp/devmap") 523 - int xdp_redirect_egress_prog(struct xdp_md *ctx) 524 - { 525 - void *data_end = (void *)(long)ctx->data_end; 526 - void *data = (void *)(long)ctx->data; 527 - struct ethhdr *eth = data; 528 - u64 nh_off; 529 - 530 - nh_off = sizeof(*eth); 531 - if (data + nh_off > data_end) 532 - return XDP_DROP; 533 - 534 - __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); 535 - 536 - return XDP_PASS; 537 - } 538 - 539 - char _license[] SEC("license") = "GPL";
-559
samples/bpf/xdp_redirect_cpu_user.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 3 - */ 4 - static const char *__doc__ = 5 - "XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n" 6 - "Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n" 7 - "Valid specification for CPUMAP BPF program:\n" 8 - " --mprog-name/-e pass (use built-in XDP_PASS program)\n" 9 - " --mprog-name/-e drop (use built-in XDP_DROP program)\n" 10 - " --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n" 11 - " Custom CPUMAP BPF program:\n" 12 - " --mprog-filename/-f <filename> --mprog-name/-e <program>\n" 13 - " Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n" 14 - " to configure DEVMAP in BPF object <filename>\n"; 15 - 16 - #include <errno.h> 17 - #include <signal.h> 18 - #include <stdio.h> 19 - #include <stdlib.h> 20 - #include <stdbool.h> 21 - #include <string.h> 22 - #include <unistd.h> 23 - #include <locale.h> 24 - #include <sys/sysinfo.h> 25 - #include <getopt.h> 26 - #include <net/if.h> 27 - #include <time.h> 28 - #include <linux/limits.h> 29 - #include <arpa/inet.h> 30 - #include <linux/if_link.h> 31 - #include <bpf/bpf.h> 32 - #include <bpf/libbpf.h> 33 - #include "bpf_util.h" 34 - #include "xdp_sample_user.h" 35 - #include "xdp_redirect_cpu.skel.h" 36 - 37 - static int map_fd; 38 - static int avail_fd; 39 - static int count_fd; 40 - 41 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | 42 - SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT | 43 - SAMPLE_EXCEPTION_CNT; 44 - 45 - DEFINE_SAMPLE_INIT(xdp_redirect_cpu); 46 - 47 - static const struct option long_options[] = { 48 - { "help", no_argument, NULL, 'h' }, 49 - { "dev", required_argument, NULL, 'd' }, 50 - { "skb-mode", no_argument, NULL, 'S' }, 51 - { "progname", required_argument, NULL, 'p' }, 52 - { "qsize", required_argument, NULL, 'q' }, 53 - { "cpu", required_argument, NULL, 'c' }, 54 - { "stress-mode", no_argument, NULL, 'x' }, 55 - { "force", no_argument, NULL, 'F' }, 56 - { "interval", required_argument, NULL, 'i' }, 57 - { "verbose", no_argument, NULL, 'v' }, 58 - { "stats", no_argument, NULL, 's' }, 59 - { "mprog-name", required_argument, NULL, 'e' }, 60 - { "mprog-filename", required_argument, NULL, 'f' }, 61 - { "redirect-device", required_argument, NULL, 'r' }, 62 - { "redirect-map", required_argument, NULL, 'm' }, 63 - {} 64 - }; 65 - 66 - static void print_avail_progs(struct bpf_object *obj) 67 - { 68 - struct bpf_program *pos; 69 - 70 - printf(" Programs to be used for -p/--progname:\n"); 71 - bpf_object__for_each_program(pos, obj) { 72 - if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) { 73 - if (!strncmp(bpf_program__name(pos), "xdp_prognum", 74 - sizeof("xdp_prognum") - 1)) 75 - printf(" %s\n", bpf_program__name(pos)); 76 - } 77 - } 78 - } 79 - 80 - static void usage(char *argv[], const struct option *long_options, 81 - const char *doc, int mask, bool error, struct bpf_object *obj) 82 - { 83 - sample_usage(argv, long_options, doc, mask, error); 84 - print_avail_progs(obj); 85 - } 86 - 87 - static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, 88 - __u32 avail_idx, bool new) 89 - { 90 - __u32 curr_cpus_count = 0; 91 - __u32 key = 0; 92 - int ret; 93 - 94 - /* Add a CPU entry to cpumap, as this allocate a cpu entry in 95 - * the kernel for the cpu. 96 - */ 97 - ret = bpf_map_update_elem(map_fd, &cpu, value, 0); 98 - if (ret < 0) { 99 - fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno)); 100 - return ret; 101 - } 102 - 103 - /* Inform bpf_prog's that a new CPU is available to select 104 - * from via some control maps. 105 - */ 106 - ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0); 107 - if (ret < 0) { 108 - fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno)); 109 - return ret; 110 - } 111 - 112 - /* When not replacing/updating existing entry, bump the count */ 113 - ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count); 114 - if (ret < 0) { 115 - fprintf(stderr, "Failed reading curr cpus_count: %s\n", 116 - strerror(errno)); 117 - return ret; 118 - } 119 - if (new) { 120 - curr_cpus_count++; 121 - ret = bpf_map_update_elem(count_fd, &key, 122 - &curr_cpus_count, 0); 123 - if (ret < 0) { 124 - fprintf(stderr, "Failed write curr cpus_count: %s\n", 125 - strerror(errno)); 126 - return ret; 127 - } 128 - } 129 - 130 - printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n", 131 - new ? "Add new" : "Replace", cpu, avail_idx, 132 - value->qsize, value->bpf_prog.fd, curr_cpus_count); 133 - 134 - return 0; 135 - } 136 - 137 - /* CPUs are zero-indexed. Thus, add a special sentinel default value 138 - * in map cpus_available to mark CPU index'es not configured 139 - */ 140 - static int mark_cpus_unavailable(void) 141 - { 142 - int ret, i, n_cpus = libbpf_num_possible_cpus(); 143 - __u32 invalid_cpu = n_cpus; 144 - 145 - for (i = 0; i < n_cpus; i++) { 146 - ret = bpf_map_update_elem(avail_fd, &i, 147 - &invalid_cpu, 0); 148 - if (ret < 0) { 149 - fprintf(stderr, "Failed marking CPU unavailable: %s\n", 150 - strerror(errno)); 151 - return ret; 152 - } 153 - } 154 - 155 - return 0; 156 - } 157 - 158 - /* Stress cpumap management code by concurrently changing underlying cpumap */ 159 - static void stress_cpumap(void *ctx) 160 - { 161 - struct bpf_cpumap_val *value = ctx; 162 - 163 - /* Changing qsize will cause kernel to free and alloc a new 164 - * bpf_cpu_map_entry, with an associated/complicated tear-down 165 - * procedure. 166 - */ 167 - value->qsize = 1024; 168 - create_cpu_entry(1, value, 0, false); 169 - value->qsize = 8; 170 - create_cpu_entry(1, value, 0, false); 171 - value->qsize = 16000; 172 - create_cpu_entry(1, value, 0, false); 173 - } 174 - 175 - static int set_cpumap_prog(struct xdp_redirect_cpu *skel, 176 - const char *redir_interface, const char *redir_map, 177 - const char *mprog_filename, const char *mprog_name) 178 - { 179 - if (mprog_filename) { 180 - struct bpf_program *prog; 181 - struct bpf_object *obj; 182 - int ret; 183 - 184 - if (!mprog_name) { 185 - fprintf(stderr, "BPF program not specified for file %s\n", 186 - mprog_filename); 187 - goto end; 188 - } 189 - if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) { 190 - fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n", 191 - redir_interface ? "device" : "map", redir_interface ? "map" : "device"); 192 - goto end; 193 - } 194 - 195 - /* Custom BPF program */ 196 - obj = bpf_object__open_file(mprog_filename, NULL); 197 - if (!obj) { 198 - ret = -errno; 199 - fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n", 200 - strerror(errno)); 201 - return ret; 202 - } 203 - 204 - ret = bpf_object__load(obj); 205 - if (ret < 0) { 206 - ret = -errno; 207 - fprintf(stderr, "Failed to bpf_object__load: %s\n", 208 - strerror(errno)); 209 - return ret; 210 - } 211 - 212 - if (redir_map) { 213 - int err, redir_map_fd, ifindex_out, key = 0; 214 - 215 - redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); 216 - if (redir_map_fd < 0) { 217 - fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n", 218 - strerror(errno)); 219 - return redir_map_fd; 220 - } 221 - 222 - ifindex_out = if_nametoindex(redir_interface); 223 - if (!ifindex_out) 224 - ifindex_out = strtoul(redir_interface, NULL, 0); 225 - if (!ifindex_out) { 226 - fprintf(stderr, "Bad interface name or index\n"); 227 - return -EINVAL; 228 - } 229 - 230 - err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0); 231 - if (err < 0) 232 - return err; 233 - } 234 - 235 - prog = bpf_object__find_program_by_name(obj, mprog_name); 236 - if (!prog) { 237 - ret = -errno; 238 - fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n", 239 - strerror(errno)); 240 - return ret; 241 - } 242 - 243 - return bpf_program__fd(prog); 244 - } else { 245 - if (mprog_name) { 246 - if (redir_interface || redir_map) { 247 - fprintf(stderr, "Need to specify --mprog-filename/-f\n"); 248 - goto end; 249 - } 250 - if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) { 251 - /* Use built-in pass/drop programs */ 252 - return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass) 253 - : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop); 254 - } else { 255 - fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n", 256 - mprog_name); 257 - goto end; 258 - } 259 - } else { 260 - if (redir_map) { 261 - fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and" 262 - " --redirect-device with --redirect-map\n"); 263 - goto end; 264 - } 265 - if (redir_interface) { 266 - /* Use built-in devmap redirect */ 267 - struct bpf_devmap_val val = {}; 268 - int ifindex_out, err; 269 - __u32 key = 0; 270 - 271 - if (!redir_interface) 272 - return 0; 273 - 274 - ifindex_out = if_nametoindex(redir_interface); 275 - if (!ifindex_out) 276 - ifindex_out = strtoul(redir_interface, NULL, 0); 277 - if (!ifindex_out) { 278 - fprintf(stderr, "Bad interface name or index\n"); 279 - return -EINVAL; 280 - } 281 - 282 - if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) { 283 - printf("Get interface %d mac failed\n", ifindex_out); 284 - return -EINVAL; 285 - } 286 - 287 - val.ifindex = ifindex_out; 288 - val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog); 289 - err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0); 290 - if (err < 0) 291 - return -errno; 292 - 293 - return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap); 294 - } 295 - } 296 - } 297 - 298 - /* Disabled */ 299 - return 0; 300 - end: 301 - fprintf(stderr, "Invalid options for CPUMAP BPF program\n"); 302 - return -EINVAL; 303 - } 304 - 305 - int main(int argc, char **argv) 306 - { 307 - const char *redir_interface = NULL, *redir_map = NULL; 308 - const char *mprog_filename = NULL, *mprog_name = NULL; 309 - struct xdp_redirect_cpu *skel; 310 - struct bpf_map_info info = {}; 311 - struct bpf_cpumap_val value; 312 - __u32 infosz = sizeof(info); 313 - int ret = EXIT_FAIL_OPTION; 314 - unsigned long interval = 2; 315 - bool stress_mode = false; 316 - struct bpf_program *prog; 317 - const char *prog_name; 318 - bool generic = false; 319 - bool force = false; 320 - int added_cpus = 0; 321 - bool error = true; 322 - int longindex = 0; 323 - int add_cpu = -1; 324 - int ifindex = -1; 325 - int *cpu, i, opt; 326 - __u32 qsize; 327 - int n_cpus; 328 - 329 - n_cpus = libbpf_num_possible_cpus(); 330 - 331 - /* Notice: Choosing the queue size is very important when CPU is 332 - * configured with power-saving states. 333 - * 334 - * If deepest state take 133 usec to wakeup from (133/10^6). When link 335 - * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can 336 - * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) = 337 - * 166250 bytes. With MTU size packets this is 110 packets, and with 338 - * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets. 339 - * 340 - * Setting default cpumap queue to 2048 as worst-case (small packet) 341 - * should be +64 packet due kthread wakeup call (due to xdp_do_flush) 342 - * worst-case is 2043 packets. 343 - * 344 - * Sysadm can configured system to avoid deep-sleep via: 345 - * tuned-adm profile network-latency 346 - */ 347 - qsize = 2048; 348 - 349 - skel = xdp_redirect_cpu__open(); 350 - if (!skel) { 351 - fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n", 352 - strerror(errno)); 353 - ret = EXIT_FAIL_BPF; 354 - goto end; 355 - } 356 - 357 - ret = sample_init_pre_load(skel); 358 - if (ret < 0) { 359 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 360 - ret = EXIT_FAIL_BPF; 361 - goto end_destroy; 362 - } 363 - 364 - if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) { 365 - fprintf(stderr, "Failed to set max entries for cpu_map map: %s", 366 - strerror(errno)); 367 - ret = EXIT_FAIL_BPF; 368 - goto end_destroy; 369 - } 370 - 371 - if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) { 372 - fprintf(stderr, "Failed to set max entries for cpus_available map: %s", 373 - strerror(errno)); 374 - ret = EXIT_FAIL_BPF; 375 - goto end_destroy; 376 - } 377 - 378 - cpu = calloc(n_cpus, sizeof(int)); 379 - if (!cpu) { 380 - fprintf(stderr, "Failed to allocate cpu array\n"); 381 - goto end_destroy; 382 - } 383 - 384 - prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs; 385 - while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh", 386 - long_options, &longindex)) != -1) { 387 - switch (opt) { 388 - case 'd': 389 - if (strlen(optarg) >= IF_NAMESIZE) { 390 - fprintf(stderr, "-d/--dev name too long\n"); 391 - usage(argv, long_options, __doc__, mask, true, skel->obj); 392 - goto end_cpu; 393 - } 394 - ifindex = if_nametoindex(optarg); 395 - if (!ifindex) 396 - ifindex = strtoul(optarg, NULL, 0); 397 - if (!ifindex) { 398 - fprintf(stderr, "Bad interface index or name (%d): %s\n", 399 - errno, strerror(errno)); 400 - usage(argv, long_options, __doc__, mask, true, skel->obj); 401 - goto end_cpu; 402 - } 403 - break; 404 - case 's': 405 - mask |= SAMPLE_REDIRECT_MAP_CNT; 406 - break; 407 - case 'i': 408 - interval = strtoul(optarg, NULL, 0); 409 - break; 410 - case 'S': 411 - generic = true; 412 - break; 413 - case 'x': 414 - stress_mode = true; 415 - break; 416 - case 'p': 417 - /* Selecting eBPF prog to load */ 418 - prog_name = optarg; 419 - prog = bpf_object__find_program_by_name(skel->obj, 420 - prog_name); 421 - if (!prog) { 422 - fprintf(stderr, 423 - "Failed to find program %s specified by" 424 - " option -p/--progname\n", 425 - prog_name); 426 - print_avail_progs(skel->obj); 427 - goto end_cpu; 428 - } 429 - break; 430 - case 'f': 431 - mprog_filename = optarg; 432 - break; 433 - case 'e': 434 - mprog_name = optarg; 435 - break; 436 - case 'r': 437 - redir_interface = optarg; 438 - mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI; 439 - break; 440 - case 'm': 441 - redir_map = optarg; 442 - break; 443 - case 'c': 444 - /* Add multiple CPUs */ 445 - add_cpu = strtoul(optarg, NULL, 0); 446 - if (add_cpu >= n_cpus) { 447 - fprintf(stderr, 448 - "--cpu nr too large for cpumap err (%d):%s\n", 449 - errno, strerror(errno)); 450 - usage(argv, long_options, __doc__, mask, true, skel->obj); 451 - goto end_cpu; 452 - } 453 - cpu[added_cpus++] = add_cpu; 454 - break; 455 - case 'q': 456 - qsize = strtoul(optarg, NULL, 0); 457 - break; 458 - case 'F': 459 - force = true; 460 - break; 461 - case 'v': 462 - sample_switch_mode(); 463 - break; 464 - case 'h': 465 - error = false; 466 - default: 467 - usage(argv, long_options, __doc__, mask, error, skel->obj); 468 - goto end_cpu; 469 - } 470 - } 471 - 472 - ret = EXIT_FAIL_OPTION; 473 - if (ifindex == -1) { 474 - fprintf(stderr, "Required option --dev missing\n"); 475 - usage(argv, long_options, __doc__, mask, true, skel->obj); 476 - goto end_cpu; 477 - } 478 - 479 - if (add_cpu == -1) { 480 - fprintf(stderr, "Required option --cpu missing\n" 481 - "Specify multiple --cpu option to add more\n"); 482 - usage(argv, long_options, __doc__, mask, true, skel->obj); 483 - goto end_cpu; 484 - } 485 - 486 - skel->rodata->from_match[0] = ifindex; 487 - if (redir_interface) 488 - skel->rodata->to_match[0] = if_nametoindex(redir_interface); 489 - 490 - ret = xdp_redirect_cpu__load(skel); 491 - if (ret < 0) { 492 - fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n", 493 - strerror(errno)); 494 - goto end_cpu; 495 - } 496 - 497 - ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz); 498 - if (ret < 0) { 499 - fprintf(stderr, "Failed bpf_map_get_info_by_fd for cpumap: %s\n", 500 - strerror(errno)); 501 - goto end_cpu; 502 - } 503 - 504 - skel->bss->cpumap_map_id = info.id; 505 - 506 - map_fd = bpf_map__fd(skel->maps.cpu_map); 507 - avail_fd = bpf_map__fd(skel->maps.cpus_available); 508 - count_fd = bpf_map__fd(skel->maps.cpus_count); 509 - 510 - ret = mark_cpus_unavailable(); 511 - if (ret < 0) { 512 - fprintf(stderr, "Unable to mark CPUs as unavailable\n"); 513 - goto end_cpu; 514 - } 515 - 516 - ret = sample_init(skel, mask); 517 - if (ret < 0) { 518 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 519 - ret = EXIT_FAIL; 520 - goto end_cpu; 521 - } 522 - 523 - value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map, 524 - mprog_filename, mprog_name); 525 - if (value.bpf_prog.fd < 0) { 526 - fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n", 527 - strerror(-value.bpf_prog.fd)); 528 - usage(argv, long_options, __doc__, mask, true, skel->obj); 529 - ret = EXIT_FAIL_BPF; 530 - goto end_cpu; 531 - } 532 - value.qsize = qsize; 533 - 534 - for (i = 0; i < added_cpus; i++) { 535 - if (create_cpu_entry(cpu[i], &value, i, true) < 0) { 536 - fprintf(stderr, "Cannot proceed, exiting\n"); 537 - usage(argv, long_options, __doc__, mask, true, skel->obj); 538 - goto end_cpu; 539 - } 540 - } 541 - 542 - ret = EXIT_FAIL_XDP; 543 - if (sample_install_xdp(prog, ifindex, generic, force) < 0) 544 - goto end_cpu; 545 - 546 - ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value); 547 - if (ret < 0) { 548 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 549 - ret = EXIT_FAIL; 550 - goto end_cpu; 551 - } 552 - ret = EXIT_OK; 553 - end_cpu: 554 - free(cpu); 555 - end_destroy: 556 - xdp_redirect_cpu__destroy(skel); 557 - end: 558 - sample_exit(ret); 559 - }
-97
samples/bpf/xdp_redirect_map.bpf.c
··· 1 - /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io 2 - * 3 - * This program is free software; you can redistribute it and/or 4 - * modify it under the terms of version 2 of the GNU General Public 5 - * License as published by the Free Software Foundation. 6 - * 7 - * This program is distributed in the hope that it will be useful, but 8 - * WITHOUT ANY WARRANTY; without even the implied warranty of 9 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 - * General Public License for more details. 11 - */ 12 - #define KBUILD_MODNAME "foo" 13 - 14 - #include "vmlinux.h" 15 - #include "xdp_sample.bpf.h" 16 - #include "xdp_sample_shared.h" 17 - 18 - /* The 2nd xdp prog on egress does not support skb mode, so we define two 19 - * maps, tx_port_general and tx_port_native. 20 - */ 21 - struct { 22 - __uint(type, BPF_MAP_TYPE_DEVMAP); 23 - __uint(key_size, sizeof(int)); 24 - __uint(value_size, sizeof(int)); 25 - __uint(max_entries, 1); 26 - } tx_port_general SEC(".maps"); 27 - 28 - struct { 29 - __uint(type, BPF_MAP_TYPE_DEVMAP); 30 - __uint(key_size, sizeof(int)); 31 - __uint(value_size, sizeof(struct bpf_devmap_val)); 32 - __uint(max_entries, 1); 33 - } tx_port_native SEC(".maps"); 34 - 35 - /* store egress interface mac address */ 36 - const volatile __u8 tx_mac_addr[ETH_ALEN]; 37 - 38 - static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map) 39 - { 40 - void *data_end = (void *)(long)ctx->data_end; 41 - void *data = (void *)(long)ctx->data; 42 - u32 key = bpf_get_smp_processor_id(); 43 - struct ethhdr *eth = data; 44 - struct datarec *rec; 45 - u64 nh_off; 46 - 47 - nh_off = sizeof(*eth); 48 - if (data + nh_off > data_end) 49 - return XDP_DROP; 50 - 51 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 52 - if (!rec) 53 - return XDP_PASS; 54 - NO_TEAR_INC(rec->processed); 55 - swap_src_dst_mac(data); 56 - return bpf_redirect_map(redirect_map, 0, 0); 57 - } 58 - 59 - SEC("xdp") 60 - int xdp_redirect_map_general(struct xdp_md *ctx) 61 - { 62 - return xdp_redirect_map(ctx, &tx_port_general); 63 - } 64 - 65 - SEC("xdp") 66 - int xdp_redirect_map_native(struct xdp_md *ctx) 67 - { 68 - return xdp_redirect_map(ctx, &tx_port_native); 69 - } 70 - 71 - SEC("xdp/devmap") 72 - int xdp_redirect_map_egress(struct xdp_md *ctx) 73 - { 74 - void *data_end = (void *)(long)ctx->data_end; 75 - void *data = (void *)(long)ctx->data; 76 - u8 *mac_addr = (u8 *) tx_mac_addr; 77 - struct ethhdr *eth = data; 78 - u64 nh_off; 79 - 80 - nh_off = sizeof(*eth); 81 - if (data + nh_off > data_end) 82 - return XDP_DROP; 83 - 84 - barrier_var(mac_addr); /* prevent optimizing out memcpy */ 85 - __builtin_memcpy(eth->h_source, mac_addr, ETH_ALEN); 86 - 87 - return XDP_PASS; 88 - } 89 - 90 - /* Redirect require an XDP bpf_prog loaded on the TX device */ 91 - SEC("xdp") 92 - int xdp_redirect_dummy_prog(struct xdp_md *ctx) 93 - { 94 - return XDP_PASS; 95 - } 96 - 97 - char _license[] SEC("license") = "GPL";
-77
samples/bpf/xdp_redirect_map_multi.bpf.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #define KBUILD_MODNAME "foo" 3 - 4 - #include "vmlinux.h" 5 - #include "xdp_sample.bpf.h" 6 - #include "xdp_sample_shared.h" 7 - 8 - struct { 9 - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); 10 - __uint(key_size, sizeof(int)); 11 - __uint(value_size, sizeof(int)); 12 - __uint(max_entries, 32); 13 - } forward_map_general SEC(".maps"); 14 - 15 - struct { 16 - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); 17 - __uint(key_size, sizeof(int)); 18 - __uint(value_size, sizeof(struct bpf_devmap_val)); 19 - __uint(max_entries, 32); 20 - } forward_map_native SEC(".maps"); 21 - 22 - /* map to store egress interfaces mac addresses */ 23 - struct { 24 - __uint(type, BPF_MAP_TYPE_HASH); 25 - __type(key, u32); 26 - __type(value, __be64); 27 - __uint(max_entries, 32); 28 - } mac_map SEC(".maps"); 29 - 30 - static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map) 31 - { 32 - u32 key = bpf_get_smp_processor_id(); 33 - struct datarec *rec; 34 - 35 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 36 - if (!rec) 37 - return XDP_PASS; 38 - NO_TEAR_INC(rec->processed); 39 - 40 - return bpf_redirect_map(forward_map, 0, 41 - BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); 42 - } 43 - 44 - SEC("xdp") 45 - int xdp_redirect_map_general(struct xdp_md *ctx) 46 - { 47 - return xdp_redirect_map(ctx, &forward_map_general); 48 - } 49 - 50 - SEC("xdp") 51 - int xdp_redirect_map_native(struct xdp_md *ctx) 52 - { 53 - return xdp_redirect_map(ctx, &forward_map_native); 54 - } 55 - 56 - SEC("xdp/devmap") 57 - int xdp_devmap_prog(struct xdp_md *ctx) 58 - { 59 - void *data_end = (void *)(long)ctx->data_end; 60 - void *data = (void *)(long)ctx->data; 61 - u32 key = ctx->egress_ifindex; 62 - struct ethhdr *eth = data; 63 - __be64 *mac; 64 - u64 nh_off; 65 - 66 - nh_off = sizeof(*eth); 67 - if (data + nh_off > data_end) 68 - return XDP_DROP; 69 - 70 - mac = bpf_map_lookup_elem(&mac_map, &key); 71 - if (mac) 72 - __builtin_memcpy(eth->h_source, mac, ETH_ALEN); 73 - 74 - return XDP_PASS; 75 - } 76 - 77 - char _license[] SEC("license") = "GPL";
-232
samples/bpf/xdp_redirect_map_multi_user.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - static const char *__doc__ = 3 - "XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n" 4 - "Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n"; 5 - 6 - #include <linux/bpf.h> 7 - #include <linux/if_link.h> 8 - #include <assert.h> 9 - #include <getopt.h> 10 - #include <errno.h> 11 - #include <signal.h> 12 - #include <stdio.h> 13 - #include <stdlib.h> 14 - #include <string.h> 15 - #include <net/if.h> 16 - #include <unistd.h> 17 - #include <libgen.h> 18 - #include <sys/ioctl.h> 19 - #include <sys/types.h> 20 - #include <sys/socket.h> 21 - #include <netinet/in.h> 22 - #include <linux/if_ether.h> 23 - #include <bpf/bpf.h> 24 - #include <bpf/libbpf.h> 25 - #include "bpf_util.h" 26 - #include "xdp_sample_user.h" 27 - #include "xdp_redirect_map_multi.skel.h" 28 - 29 - #define MAX_IFACE_NUM 32 30 - static int ifaces[MAX_IFACE_NUM] = {}; 31 - 32 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | 33 - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT | 34 - SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING; 35 - 36 - DEFINE_SAMPLE_INIT(xdp_redirect_map_multi); 37 - 38 - static const struct option long_options[] = { 39 - { "help", no_argument, NULL, 'h' }, 40 - { "skb-mode", no_argument, NULL, 'S' }, 41 - { "force", no_argument, NULL, 'F' }, 42 - { "load-egress", no_argument, NULL, 'X' }, 43 - { "stats", no_argument, NULL, 's' }, 44 - { "interval", required_argument, NULL, 'i' }, 45 - { "verbose", no_argument, NULL, 'v' }, 46 - {} 47 - }; 48 - 49 - static int update_mac_map(struct bpf_map *map) 50 - { 51 - int mac_map_fd = bpf_map__fd(map); 52 - unsigned char mac_addr[6]; 53 - unsigned int ifindex; 54 - int i, ret = -1; 55 - 56 - for (i = 0; ifaces[i] > 0; i++) { 57 - ifindex = ifaces[i]; 58 - 59 - ret = get_mac_addr(ifindex, mac_addr); 60 - if (ret < 0) { 61 - fprintf(stderr, "get interface %d mac failed\n", 62 - ifindex); 63 - return ret; 64 - } 65 - 66 - ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0); 67 - if (ret < 0) { 68 - fprintf(stderr, "Failed to update mac address for ifindex %d\n", 69 - ifindex); 70 - return ret; 71 - } 72 - } 73 - 74 - return 0; 75 - } 76 - 77 - int main(int argc, char **argv) 78 - { 79 - struct bpf_devmap_val devmap_val = {}; 80 - struct xdp_redirect_map_multi *skel; 81 - struct bpf_program *ingress_prog; 82 - bool xdp_devmap_attached = false; 83 - struct bpf_map *forward_map; 84 - int ret = EXIT_FAIL_OPTION; 85 - unsigned long interval = 2; 86 - char ifname[IF_NAMESIZE]; 87 - unsigned int ifindex; 88 - bool generic = false; 89 - bool force = false; 90 - bool tried = false; 91 - bool error = true; 92 - int i, opt; 93 - 94 - while ((opt = getopt_long(argc, argv, "hSFXi:vs", 95 - long_options, NULL)) != -1) { 96 - switch (opt) { 97 - case 'S': 98 - generic = true; 99 - /* devmap_xmit tracepoint not available */ 100 - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | 101 - SAMPLE_DEVMAP_XMIT_CNT_MULTI); 102 - break; 103 - case 'F': 104 - force = true; 105 - break; 106 - case 'X': 107 - xdp_devmap_attached = true; 108 - break; 109 - case 'i': 110 - interval = strtoul(optarg, NULL, 0); 111 - break; 112 - case 'v': 113 - sample_switch_mode(); 114 - break; 115 - case 's': 116 - mask |= SAMPLE_REDIRECT_MAP_CNT; 117 - break; 118 - case 'h': 119 - error = false; 120 - default: 121 - sample_usage(argv, long_options, __doc__, mask, error); 122 - return ret; 123 - } 124 - } 125 - 126 - if (argc <= optind + 1) { 127 - sample_usage(argv, long_options, __doc__, mask, error); 128 - return ret; 129 - } 130 - 131 - skel = xdp_redirect_map_multi__open(); 132 - if (!skel) { 133 - fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n", 134 - strerror(errno)); 135 - ret = EXIT_FAIL_BPF; 136 - goto end; 137 - } 138 - 139 - ret = sample_init_pre_load(skel); 140 - if (ret < 0) { 141 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 142 - ret = EXIT_FAIL_BPF; 143 - goto end_destroy; 144 - } 145 - 146 - ret = EXIT_FAIL_OPTION; 147 - for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { 148 - ifaces[i] = if_nametoindex(argv[optind + i]); 149 - if (!ifaces[i]) 150 - ifaces[i] = strtoul(argv[optind + i], NULL, 0); 151 - if (!if_indextoname(ifaces[i], ifname)) { 152 - fprintf(stderr, "Bad interface index or name\n"); 153 - sample_usage(argv, long_options, __doc__, mask, true); 154 - goto end_destroy; 155 - } 156 - 157 - skel->rodata->from_match[i] = ifaces[i]; 158 - skel->rodata->to_match[i] = ifaces[i]; 159 - } 160 - 161 - ret = xdp_redirect_map_multi__load(skel); 162 - if (ret < 0) { 163 - fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n", 164 - strerror(errno)); 165 - ret = EXIT_FAIL_BPF; 166 - goto end_destroy; 167 - } 168 - 169 - if (xdp_devmap_attached) { 170 - /* Update mac_map with all egress interfaces' mac addr */ 171 - if (update_mac_map(skel->maps.mac_map) < 0) { 172 - fprintf(stderr, "Updating mac address failed\n"); 173 - ret = EXIT_FAIL; 174 - goto end_destroy; 175 - } 176 - } 177 - 178 - ret = sample_init(skel, mask); 179 - if (ret < 0) { 180 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 181 - ret = EXIT_FAIL; 182 - goto end_destroy; 183 - } 184 - 185 - ingress_prog = skel->progs.xdp_redirect_map_native; 186 - forward_map = skel->maps.forward_map_native; 187 - 188 - for (i = 0; ifaces[i] > 0; i++) { 189 - ifindex = ifaces[i]; 190 - 191 - ret = EXIT_FAIL_XDP; 192 - restart: 193 - /* bind prog_fd to each interface */ 194 - if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) { 195 - if (generic && !tried) { 196 - fprintf(stderr, 197 - "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); 198 - ingress_prog = skel->progs.xdp_redirect_map_general; 199 - forward_map = skel->maps.forward_map_general; 200 - tried = true; 201 - goto restart; 202 - } 203 - goto end_destroy; 204 - } 205 - 206 - /* Add all the interfaces to forward group and attach 207 - * egress devmap program if exist 208 - */ 209 - devmap_val.ifindex = ifindex; 210 - if (xdp_devmap_attached) 211 - devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog); 212 - ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0); 213 - if (ret < 0) { 214 - fprintf(stderr, "Failed to update devmap value: %s\n", 215 - strerror(errno)); 216 - ret = EXIT_FAIL_BPF; 217 - goto end_destroy; 218 - } 219 - } 220 - 221 - ret = sample_run(interval, NULL, NULL); 222 - if (ret < 0) { 223 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 224 - ret = EXIT_FAIL; 225 - goto end_destroy; 226 - } 227 - ret = EXIT_OK; 228 - end_destroy: 229 - xdp_redirect_map_multi__destroy(skel); 230 - end: 231 - sample_exit(ret); 232 - }
-228
samples/bpf/xdp_redirect_map_user.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io 3 - */ 4 - static const char *__doc__ = 5 - "XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n" 6 - "Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n"; 7 - 8 - #include <linux/bpf.h> 9 - #include <linux/if_link.h> 10 - #include <assert.h> 11 - #include <errno.h> 12 - #include <signal.h> 13 - #include <stdio.h> 14 - #include <stdlib.h> 15 - #include <stdbool.h> 16 - #include <string.h> 17 - #include <net/if.h> 18 - #include <unistd.h> 19 - #include <libgen.h> 20 - #include <getopt.h> 21 - #include <bpf/bpf.h> 22 - #include <bpf/libbpf.h> 23 - #include "bpf_util.h" 24 - #include "xdp_sample_user.h" 25 - #include "xdp_redirect_map.skel.h" 26 - 27 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | 28 - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; 29 - 30 - DEFINE_SAMPLE_INIT(xdp_redirect_map); 31 - 32 - static const struct option long_options[] = { 33 - { "help", no_argument, NULL, 'h' }, 34 - { "skb-mode", no_argument, NULL, 'S' }, 35 - { "force", no_argument, NULL, 'F' }, 36 - { "load-egress", no_argument, NULL, 'X' }, 37 - { "stats", no_argument, NULL, 's' }, 38 - { "interval", required_argument, NULL, 'i' }, 39 - { "verbose", no_argument, NULL, 'v' }, 40 - {} 41 - }; 42 - 43 - static int verbose = 0; 44 - 45 - int main(int argc, char **argv) 46 - { 47 - struct bpf_devmap_val devmap_val = {}; 48 - bool xdp_devmap_attached = false; 49 - struct xdp_redirect_map *skel; 50 - char str[2 * IF_NAMESIZE + 1]; 51 - char ifname_out[IF_NAMESIZE]; 52 - struct bpf_map *tx_port_map; 53 - char ifname_in[IF_NAMESIZE]; 54 - int ifindex_in, ifindex_out; 55 - unsigned long interval = 2; 56 - int ret = EXIT_FAIL_OPTION; 57 - struct bpf_program *prog; 58 - bool generic = false; 59 - bool force = false; 60 - bool tried = false; 61 - bool error = true; 62 - int opt, key = 0; 63 - 64 - while ((opt = getopt_long(argc, argv, "hSFXi:vs", 65 - long_options, NULL)) != -1) { 66 - switch (opt) { 67 - case 'S': 68 - generic = true; 69 - /* devmap_xmit tracepoint not available */ 70 - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | 71 - SAMPLE_DEVMAP_XMIT_CNT_MULTI); 72 - break; 73 - case 'F': 74 - force = true; 75 - break; 76 - case 'X': 77 - xdp_devmap_attached = true; 78 - break; 79 - case 'i': 80 - interval = strtoul(optarg, NULL, 0); 81 - break; 82 - case 'v': 83 - sample_switch_mode(); 84 - verbose = 1; 85 - break; 86 - case 's': 87 - mask |= SAMPLE_REDIRECT_MAP_CNT; 88 - break; 89 - case 'h': 90 - error = false; 91 - default: 92 - sample_usage(argv, long_options, __doc__, mask, error); 93 - return ret; 94 - } 95 - } 96 - 97 - if (argc <= optind + 1) { 98 - sample_usage(argv, long_options, __doc__, mask, true); 99 - goto end; 100 - } 101 - 102 - ifindex_in = if_nametoindex(argv[optind]); 103 - if (!ifindex_in) 104 - ifindex_in = strtoul(argv[optind], NULL, 0); 105 - 106 - ifindex_out = if_nametoindex(argv[optind + 1]); 107 - if (!ifindex_out) 108 - ifindex_out = strtoul(argv[optind + 1], NULL, 0); 109 - 110 - if (!ifindex_in || !ifindex_out) { 111 - fprintf(stderr, "Bad interface index or name\n"); 112 - sample_usage(argv, long_options, __doc__, mask, true); 113 - goto end; 114 - } 115 - 116 - skel = xdp_redirect_map__open(); 117 - if (!skel) { 118 - fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n", 119 - strerror(errno)); 120 - ret = EXIT_FAIL_BPF; 121 - goto end; 122 - } 123 - 124 - ret = sample_init_pre_load(skel); 125 - if (ret < 0) { 126 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 127 - ret = EXIT_FAIL_BPF; 128 - goto end_destroy; 129 - } 130 - 131 - /* Load 2nd xdp prog on egress. */ 132 - if (xdp_devmap_attached) { 133 - ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr); 134 - if (ret < 0) { 135 - fprintf(stderr, "Failed to get interface %d mac address: %s\n", 136 - ifindex_out, strerror(-ret)); 137 - ret = EXIT_FAIL; 138 - goto end_destroy; 139 - } 140 - if (verbose) 141 - printf("Egress ifindex:%d using src MAC %02x:%02x:%02x:%02x:%02x:%02x\n", 142 - ifindex_out, 143 - skel->rodata->tx_mac_addr[0], skel->rodata->tx_mac_addr[1], 144 - skel->rodata->tx_mac_addr[2], skel->rodata->tx_mac_addr[3], 145 - skel->rodata->tx_mac_addr[4], skel->rodata->tx_mac_addr[5]); 146 - } 147 - 148 - skel->rodata->from_match[0] = ifindex_in; 149 - skel->rodata->to_match[0] = ifindex_out; 150 - 151 - ret = xdp_redirect_map__load(skel); 152 - if (ret < 0) { 153 - fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n", 154 - strerror(errno)); 155 - ret = EXIT_FAIL_BPF; 156 - goto end_destroy; 157 - } 158 - 159 - ret = sample_init(skel, mask); 160 - if (ret < 0) { 161 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 162 - ret = EXIT_FAIL; 163 - goto end_destroy; 164 - } 165 - 166 - prog = skel->progs.xdp_redirect_map_native; 167 - tx_port_map = skel->maps.tx_port_native; 168 - restart: 169 - if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) { 170 - /* First try with struct bpf_devmap_val as value for generic 171 - * mode, then fallback to sizeof(int) for older kernels. 172 - */ 173 - fprintf(stderr, 174 - "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); 175 - if (generic && !tried) { 176 - prog = skel->progs.xdp_redirect_map_general; 177 - tx_port_map = skel->maps.tx_port_general; 178 - tried = true; 179 - goto restart; 180 - } 181 - ret = EXIT_FAIL_XDP; 182 - goto end_destroy; 183 - } 184 - 185 - /* Loading dummy XDP prog on out-device */ 186 - sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force); 187 - 188 - devmap_val.ifindex = ifindex_out; 189 - if (xdp_devmap_attached) 190 - devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress); 191 - ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0); 192 - if (ret < 0) { 193 - fprintf(stderr, "Failed to update devmap value: %s\n", 194 - strerror(errno)); 195 - ret = EXIT_FAIL_BPF; 196 - goto end_destroy; 197 - } 198 - 199 - ret = EXIT_FAIL; 200 - if (!if_indextoname(ifindex_in, ifname_in)) { 201 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, 202 - strerror(errno)); 203 - goto end_destroy; 204 - } 205 - 206 - if (!if_indextoname(ifindex_out, ifname_out)) { 207 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, 208 - strerror(errno)); 209 - goto end_destroy; 210 - } 211 - 212 - safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); 213 - printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", 214 - ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); 215 - snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); 216 - 217 - ret = sample_run(interval, NULL, NULL); 218 - if (ret < 0) { 219 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 220 - ret = EXIT_FAIL; 221 - goto end_destroy; 222 - } 223 - ret = EXIT_OK; 224 - end_destroy: 225 - xdp_redirect_map__destroy(skel); 226 - end: 227 - sample_exit(ret); 228 - }
-172
samples/bpf/xdp_redirect_user.c
··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> 3 - */ 4 - static const char *__doc__ = 5 - "XDP redirect tool, using bpf_redirect helper\n" 6 - "Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n"; 7 - 8 - #include <linux/bpf.h> 9 - #include <linux/if_link.h> 10 - #include <assert.h> 11 - #include <errno.h> 12 - #include <signal.h> 13 - #include <stdio.h> 14 - #include <stdlib.h> 15 - #include <stdbool.h> 16 - #include <string.h> 17 - #include <net/if.h> 18 - #include <unistd.h> 19 - #include <libgen.h> 20 - #include <getopt.h> 21 - #include <bpf/bpf.h> 22 - #include <bpf/libbpf.h> 23 - #include "bpf_util.h" 24 - #include "xdp_sample_user.h" 25 - #include "xdp_redirect.skel.h" 26 - 27 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT | 28 - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; 29 - 30 - DEFINE_SAMPLE_INIT(xdp_redirect); 31 - 32 - static const struct option long_options[] = { 33 - {"help", no_argument, NULL, 'h' }, 34 - {"skb-mode", no_argument, NULL, 'S' }, 35 - {"force", no_argument, NULL, 'F' }, 36 - {"stats", no_argument, NULL, 's' }, 37 - {"interval", required_argument, NULL, 'i' }, 38 - {"verbose", no_argument, NULL, 'v' }, 39 - {} 40 - }; 41 - 42 - int main(int argc, char **argv) 43 - { 44 - int ifindex_in, ifindex_out, opt; 45 - char str[2 * IF_NAMESIZE + 1]; 46 - char ifname_out[IF_NAMESIZE]; 47 - char ifname_in[IF_NAMESIZE]; 48 - int ret = EXIT_FAIL_OPTION; 49 - unsigned long interval = 2; 50 - struct xdp_redirect *skel; 51 - bool generic = false; 52 - bool force = false; 53 - bool error = true; 54 - 55 - while ((opt = getopt_long(argc, argv, "hSFi:vs", 56 - long_options, NULL)) != -1) { 57 - switch (opt) { 58 - case 'S': 59 - generic = true; 60 - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | 61 - SAMPLE_DEVMAP_XMIT_CNT_MULTI); 62 - break; 63 - case 'F': 64 - force = true; 65 - break; 66 - case 'i': 67 - interval = strtoul(optarg, NULL, 0); 68 - break; 69 - case 'v': 70 - sample_switch_mode(); 71 - break; 72 - case 's': 73 - mask |= SAMPLE_REDIRECT_CNT; 74 - break; 75 - case 'h': 76 - error = false; 77 - default: 78 - sample_usage(argv, long_options, __doc__, mask, error); 79 - return ret; 80 - } 81 - } 82 - 83 - if (argc <= optind + 1) { 84 - sample_usage(argv, long_options, __doc__, mask, true); 85 - return ret; 86 - } 87 - 88 - ifindex_in = if_nametoindex(argv[optind]); 89 - if (!ifindex_in) 90 - ifindex_in = strtoul(argv[optind], NULL, 0); 91 - 92 - ifindex_out = if_nametoindex(argv[optind + 1]); 93 - if (!ifindex_out) 94 - ifindex_out = strtoul(argv[optind + 1], NULL, 0); 95 - 96 - if (!ifindex_in || !ifindex_out) { 97 - fprintf(stderr, "Bad interface index or name\n"); 98 - sample_usage(argv, long_options, __doc__, mask, true); 99 - goto end; 100 - } 101 - 102 - skel = xdp_redirect__open(); 103 - if (!skel) { 104 - fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno)); 105 - ret = EXIT_FAIL_BPF; 106 - goto end; 107 - } 108 - 109 - ret = sample_init_pre_load(skel); 110 - if (ret < 0) { 111 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 112 - ret = EXIT_FAIL_BPF; 113 - goto end_destroy; 114 - } 115 - 116 - skel->rodata->from_match[0] = ifindex_in; 117 - skel->rodata->to_match[0] = ifindex_out; 118 - skel->rodata->ifindex_out = ifindex_out; 119 - 120 - ret = xdp_redirect__load(skel); 121 - if (ret < 0) { 122 - fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno)); 123 - ret = EXIT_FAIL_BPF; 124 - goto end_destroy; 125 - } 126 - 127 - ret = sample_init(skel, mask); 128 - if (ret < 0) { 129 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 130 - ret = EXIT_FAIL; 131 - goto end_destroy; 132 - } 133 - 134 - ret = EXIT_FAIL_XDP; 135 - if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in, 136 - generic, force) < 0) 137 - goto end_destroy; 138 - 139 - /* Loading dummy XDP prog on out-device */ 140 - sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, 141 - generic, force); 142 - 143 - ret = EXIT_FAIL; 144 - if (!if_indextoname(ifindex_in, ifname_in)) { 145 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, 146 - strerror(errno)); 147 - goto end_destroy; 148 - } 149 - 150 - if (!if_indextoname(ifindex_out, ifname_out)) { 151 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, 152 - strerror(errno)); 153 - goto end_destroy; 154 - } 155 - 156 - safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); 157 - printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", 158 - ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); 159 - snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); 160 - 161 - ret = sample_run(interval, NULL, NULL); 162 - if (ret < 0) { 163 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 164 - ret = EXIT_FAIL; 165 - goto end_destroy; 166 - } 167 - ret = EXIT_OK; 168 - end_destroy: 169 - xdp_redirect__destroy(skel); 170 - end: 171 - sample_exit(ret); 172 - }
-140
samples/bpf/xdp_rxq_info_kern.c
··· 1 - /* SPDX-License-Identifier: GPL-2.0 2 - * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 3 - * 4 - * Example howto extract XDP RX-queue info 5 - */ 6 - #include <uapi/linux/bpf.h> 7 - #include <uapi/linux/if_ether.h> 8 - #include <uapi/linux/in.h> 9 - #include <bpf/bpf_helpers.h> 10 - 11 - /* Config setup from with userspace 12 - * 13 - * User-side setup ifindex in config_map, to verify that 14 - * ctx->ingress_ifindex is correct (against configured ifindex) 15 - */ 16 - struct config { 17 - __u32 action; 18 - int ifindex; 19 - __u32 options; 20 - }; 21 - enum cfg_options_flags { 22 - NO_TOUCH = 0x0U, 23 - READ_MEM = 0x1U, 24 - SWAP_MAC = 0x2U, 25 - }; 26 - 27 - struct { 28 - __uint(type, BPF_MAP_TYPE_ARRAY); 29 - __type(key, int); 30 - __type(value, struct config); 31 - __uint(max_entries, 1); 32 - } config_map SEC(".maps"); 33 - 34 - /* Common stats data record (shared with userspace) */ 35 - struct datarec { 36 - __u64 processed; 37 - __u64 issue; 38 - }; 39 - 40 - struct { 41 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 42 - __type(key, u32); 43 - __type(value, struct datarec); 44 - __uint(max_entries, 1); 45 - } stats_global_map SEC(".maps"); 46 - 47 - #define MAX_RXQs 64 48 - 49 - /* Stats per rx_queue_index (per CPU) */ 50 - struct { 51 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 52 - __type(key, u32); 53 - __type(value, struct datarec); 54 - __uint(max_entries, MAX_RXQs + 1); 55 - } rx_queue_index_map SEC(".maps"); 56 - 57 - static __always_inline 58 - void swap_src_dst_mac(void *data) 59 - { 60 - unsigned short *p = data; 61 - unsigned short dst[3]; 62 - 63 - dst[0] = p[0]; 64 - dst[1] = p[1]; 65 - dst[2] = p[2]; 66 - p[0] = p[3]; 67 - p[1] = p[4]; 68 - p[2] = p[5]; 69 - p[3] = dst[0]; 70 - p[4] = dst[1]; 71 - p[5] = dst[2]; 72 - } 73 - 74 - SEC("xdp_prog0") 75 - int xdp_prognum0(struct xdp_md *ctx) 76 - { 77 - void *data_end = (void *)(long)ctx->data_end; 78 - void *data = (void *)(long)ctx->data; 79 - struct datarec *rec, *rxq_rec; 80 - int ingress_ifindex; 81 - struct config *config; 82 - u32 key = 0; 83 - 84 - /* Global stats record */ 85 - rec = bpf_map_lookup_elem(&stats_global_map, &key); 86 - if (!rec) 87 - return XDP_ABORTED; 88 - rec->processed++; 89 - 90 - /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF 91 - * instructions inside kernel to access xdp_rxq->dev->ifindex 92 - */ 93 - ingress_ifindex = ctx->ingress_ifindex; 94 - 95 - config = bpf_map_lookup_elem(&config_map, &key); 96 - if (!config) 97 - return XDP_ABORTED; 98 - 99 - /* Simple test: check ctx provided ifindex is as expected */ 100 - if (ingress_ifindex != config->ifindex) { 101 - /* count this error case */ 102 - rec->issue++; 103 - return XDP_ABORTED; 104 - } 105 - 106 - /* Update stats per rx_queue_index. Handle if rx_queue_index 107 - * is larger than stats map can contain info for. 108 - */ 109 - key = ctx->rx_queue_index; 110 - if (key >= MAX_RXQs) 111 - key = MAX_RXQs; 112 - rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key); 113 - if (!rxq_rec) 114 - return XDP_ABORTED; 115 - rxq_rec->processed++; 116 - if (key == MAX_RXQs) 117 - rxq_rec->issue++; 118 - 119 - /* Default: Don't touch packet data, only count packets */ 120 - if (unlikely(config->options & (READ_MEM|SWAP_MAC))) { 121 - struct ethhdr *eth = data; 122 - 123 - if (eth + 1 > data_end) 124 - return XDP_ABORTED; 125 - 126 - /* Avoid compiler removing this: Drop non 802.3 Ethertypes */ 127 - if (ntohs(eth->h_proto) < ETH_P_802_3_MIN) 128 - return XDP_ABORTED; 129 - 130 - /* XDP_TX requires changing MAC-addrs, else HW may drop. 131 - * Can also be enabled with --swapmac (for test purposes) 132 - */ 133 - if (unlikely(config->options & SWAP_MAC)) 134 - swap_src_dst_mac(data); 135 - } 136 - 137 - return config->action; 138 - } 139 - 140 - char _license[] SEC("license") = "GPL";
-614
samples/bpf/xdp_rxq_info_user.c
··· 1 - /* SPDX-License-Identifier: GPL-2.0 2 - * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 3 - */ 4 - static const char *__doc__ = " XDP RX-queue info extract example\n\n" 5 - "Monitor how many packets per sec (pps) are received\n" 6 - "per NIC RX queue index and which CPU processed the packet\n" 7 - ; 8 - 9 - #include <errno.h> 10 - #include <signal.h> 11 - #include <stdio.h> 12 - #include <stdlib.h> 13 - #include <stdbool.h> 14 - #include <string.h> 15 - #include <unistd.h> 16 - #include <locale.h> 17 - #include <getopt.h> 18 - #include <net/if.h> 19 - #include <time.h> 20 - #include <limits.h> 21 - #include <arpa/inet.h> 22 - #include <linux/if_link.h> 23 - 24 - #include <bpf/bpf.h> 25 - #include <bpf/libbpf.h> 26 - #include "bpf_util.h" 27 - 28 - static int ifindex = -1; 29 - static char ifname_buf[IF_NAMESIZE]; 30 - static char *ifname; 31 - static __u32 prog_id; 32 - 33 - static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 34 - 35 - static struct bpf_map *stats_global_map; 36 - static struct bpf_map *rx_queue_index_map; 37 - 38 - /* Exit return codes */ 39 - #define EXIT_OK 0 40 - #define EXIT_FAIL 1 41 - #define EXIT_FAIL_OPTION 2 42 - #define EXIT_FAIL_XDP 3 43 - #define EXIT_FAIL_BPF 4 44 - #define EXIT_FAIL_MEM 5 45 - 46 - #define FAIL_MEM_SIG INT_MAX 47 - #define FAIL_STAT_SIG (INT_MAX - 1) 48 - 49 - static const struct option long_options[] = { 50 - {"help", no_argument, NULL, 'h' }, 51 - {"dev", required_argument, NULL, 'd' }, 52 - {"skb-mode", no_argument, NULL, 'S' }, 53 - {"sec", required_argument, NULL, 's' }, 54 - {"no-separators", no_argument, NULL, 'z' }, 55 - {"action", required_argument, NULL, 'a' }, 56 - {"readmem", no_argument, NULL, 'r' }, 57 - {"swapmac", no_argument, NULL, 'm' }, 58 - {"force", no_argument, NULL, 'F' }, 59 - {0, 0, NULL, 0 } 60 - }; 61 - 62 - static void int_exit(int sig) 63 - { 64 - __u32 curr_prog_id = 0; 65 - 66 - if (ifindex > -1) { 67 - if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 68 - printf("bpf_xdp_query_id failed\n"); 69 - exit(EXIT_FAIL); 70 - } 71 - if (prog_id == curr_prog_id) { 72 - fprintf(stderr, 73 - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", 74 - ifindex, ifname); 75 - bpf_xdp_detach(ifindex, xdp_flags, NULL); 76 - } else if (!curr_prog_id) { 77 - printf("couldn't find a prog id on a given iface\n"); 78 - } else { 79 - printf("program on interface changed, not removing\n"); 80 - } 81 - } 82 - 83 - if (sig == FAIL_MEM_SIG) 84 - exit(EXIT_FAIL_MEM); 85 - else if (sig == FAIL_STAT_SIG) 86 - exit(EXIT_FAIL); 87 - 88 - exit(EXIT_OK); 89 - } 90 - 91 - struct config { 92 - __u32 action; 93 - int ifindex; 94 - __u32 options; 95 - }; 96 - enum cfg_options_flags { 97 - NO_TOUCH = 0x0U, 98 - READ_MEM = 0x1U, 99 - SWAP_MAC = 0x2U, 100 - }; 101 - #define XDP_ACTION_MAX (XDP_TX + 1) 102 - #define XDP_ACTION_MAX_STRLEN 11 103 - static const char *xdp_action_names[XDP_ACTION_MAX] = { 104 - [XDP_ABORTED] = "XDP_ABORTED", 105 - [XDP_DROP] = "XDP_DROP", 106 - [XDP_PASS] = "XDP_PASS", 107 - [XDP_TX] = "XDP_TX", 108 - }; 109 - 110 - static const char *action2str(int action) 111 - { 112 - if (action < XDP_ACTION_MAX) 113 - return xdp_action_names[action]; 114 - return NULL; 115 - } 116 - 117 - static int parse_xdp_action(char *action_str) 118 - { 119 - size_t maxlen; 120 - __u64 action = -1; 121 - int i; 122 - 123 - for (i = 0; i < XDP_ACTION_MAX; i++) { 124 - maxlen = XDP_ACTION_MAX_STRLEN; 125 - if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) { 126 - action = i; 127 - break; 128 - } 129 - } 130 - return action; 131 - } 132 - 133 - static void list_xdp_actions(void) 134 - { 135 - int i; 136 - 137 - printf("Available XDP --action <options>\n"); 138 - for (i = 0; i < XDP_ACTION_MAX; i++) 139 - printf("\t%s\n", xdp_action_names[i]); 140 - printf("\n"); 141 - } 142 - 143 - static char* options2str(enum cfg_options_flags flag) 144 - { 145 - if (flag == NO_TOUCH) 146 - return "no_touch"; 147 - if (flag & SWAP_MAC) 148 - return "swapmac"; 149 - if (flag & READ_MEM) 150 - return "read"; 151 - fprintf(stderr, "ERR: Unknown config option flags"); 152 - int_exit(FAIL_STAT_SIG); 153 - return "unknown"; 154 - } 155 - 156 - static void usage(char *argv[]) 157 - { 158 - int i; 159 - 160 - printf("\nDOCUMENTATION:\n%s\n", __doc__); 161 - printf(" Usage: %s (options-see-below)\n", argv[0]); 162 - printf(" Listing options:\n"); 163 - for (i = 0; long_options[i].name != 0; i++) { 164 - printf(" --%-12s", long_options[i].name); 165 - if (long_options[i].flag != NULL) 166 - printf(" flag (internal value:%d)", 167 - *long_options[i].flag); 168 - else 169 - printf(" short-option: -%c", 170 - long_options[i].val); 171 - printf("\n"); 172 - } 173 - printf("\n"); 174 - list_xdp_actions(); 175 - } 176 - 177 - #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ 178 - static __u64 gettime(void) 179 - { 180 - struct timespec t; 181 - int res; 182 - 183 - res = clock_gettime(CLOCK_MONOTONIC, &t); 184 - if (res < 0) { 185 - fprintf(stderr, "Error with gettimeofday! (%i)\n", res); 186 - int_exit(FAIL_STAT_SIG); 187 - } 188 - return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; 189 - } 190 - 191 - /* Common stats data record shared with _kern.c */ 192 - struct datarec { 193 - __u64 processed; 194 - __u64 issue; 195 - }; 196 - struct record { 197 - __u64 timestamp; 198 - struct datarec total; 199 - struct datarec *cpu; 200 - }; 201 - struct stats_record { 202 - struct record stats; 203 - struct record *rxq; 204 - }; 205 - 206 - static struct datarec *alloc_record_per_cpu(void) 207 - { 208 - unsigned int nr_cpus = bpf_num_possible_cpus(); 209 - struct datarec *array; 210 - 211 - array = calloc(nr_cpus, sizeof(struct datarec)); 212 - if (!array) { 213 - fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); 214 - int_exit(FAIL_MEM_SIG); 215 - } 216 - return array; 217 - } 218 - 219 - static struct record *alloc_record_per_rxq(void) 220 - { 221 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 222 - struct record *array; 223 - 224 - array = calloc(nr_rxqs, sizeof(struct record)); 225 - if (!array) { 226 - fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); 227 - int_exit(FAIL_MEM_SIG); 228 - } 229 - return array; 230 - } 231 - 232 - static struct stats_record *alloc_stats_record(void) 233 - { 234 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 235 - struct stats_record *rec; 236 - int i; 237 - 238 - rec = calloc(1, sizeof(struct stats_record)); 239 - if (!rec) { 240 - fprintf(stderr, "Mem alloc error\n"); 241 - int_exit(FAIL_MEM_SIG); 242 - } 243 - rec->rxq = alloc_record_per_rxq(); 244 - for (i = 0; i < nr_rxqs; i++) 245 - rec->rxq[i].cpu = alloc_record_per_cpu(); 246 - 247 - rec->stats.cpu = alloc_record_per_cpu(); 248 - return rec; 249 - } 250 - 251 - static void free_stats_record(struct stats_record *r) 252 - { 253 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 254 - int i; 255 - 256 - for (i = 0; i < nr_rxqs; i++) 257 - free(r->rxq[i].cpu); 258 - 259 - free(r->rxq); 260 - free(r->stats.cpu); 261 - free(r); 262 - } 263 - 264 - static bool map_collect_percpu(int fd, __u32 key, struct record *rec) 265 - { 266 - /* For percpu maps, userspace gets a value per possible CPU */ 267 - unsigned int nr_cpus = bpf_num_possible_cpus(); 268 - struct datarec values[nr_cpus]; 269 - __u64 sum_processed = 0; 270 - __u64 sum_issue = 0; 271 - int i; 272 - 273 - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 274 - fprintf(stderr, 275 - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 276 - return false; 277 - } 278 - /* Get time as close as possible to reading map contents */ 279 - rec->timestamp = gettime(); 280 - 281 - /* Record and sum values from each CPU */ 282 - for (i = 0; i < nr_cpus; i++) { 283 - rec->cpu[i].processed = values[i].processed; 284 - sum_processed += values[i].processed; 285 - rec->cpu[i].issue = values[i].issue; 286 - sum_issue += values[i].issue; 287 - } 288 - rec->total.processed = sum_processed; 289 - rec->total.issue = sum_issue; 290 - return true; 291 - } 292 - 293 - static void stats_collect(struct stats_record *rec) 294 - { 295 - int fd, i, max_rxqs; 296 - 297 - fd = bpf_map__fd(stats_global_map); 298 - map_collect_percpu(fd, 0, &rec->stats); 299 - 300 - fd = bpf_map__fd(rx_queue_index_map); 301 - max_rxqs = bpf_map__max_entries(rx_queue_index_map); 302 - for (i = 0; i < max_rxqs; i++) 303 - map_collect_percpu(fd, i, &rec->rxq[i]); 304 - } 305 - 306 - static double calc_period(struct record *r, struct record *p) 307 - { 308 - double period_ = 0; 309 - __u64 period = 0; 310 - 311 - period = r->timestamp - p->timestamp; 312 - if (period > 0) 313 - period_ = ((double) period / NANOSEC_PER_SEC); 314 - 315 - return period_; 316 - } 317 - 318 - static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) 319 - { 320 - __u64 packets = 0; 321 - __u64 pps = 0; 322 - 323 - if (period_ > 0) { 324 - packets = r->processed - p->processed; 325 - pps = packets / period_; 326 - } 327 - return pps; 328 - } 329 - 330 - static __u64 calc_errs_pps(struct datarec *r, 331 - struct datarec *p, double period_) 332 - { 333 - __u64 packets = 0; 334 - __u64 pps = 0; 335 - 336 - if (period_ > 0) { 337 - packets = r->issue - p->issue; 338 - pps = packets / period_; 339 - } 340 - return pps; 341 - } 342 - 343 - static void stats_print(struct stats_record *stats_rec, 344 - struct stats_record *stats_prev, 345 - int action, __u32 cfg_opt) 346 - { 347 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 348 - unsigned int nr_cpus = bpf_num_possible_cpus(); 349 - double pps = 0, err = 0; 350 - struct record *rec, *prev; 351 - double t; 352 - int rxq; 353 - int i; 354 - 355 - /* Header */ 356 - printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n", 357 - ifname, ifindex, action2str(action), options2str(cfg_opt)); 358 - 359 - /* stats_global_map */ 360 - { 361 - char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n"; 362 - char *fm2_rx = "%-15s %-7s %'-11.0f\n"; 363 - char *errstr = ""; 364 - 365 - printf("%-15s %-7s %-11s %-11s\n", 366 - "XDP stats", "CPU", "pps", "issue-pps"); 367 - 368 - rec = &stats_rec->stats; 369 - prev = &stats_prev->stats; 370 - t = calc_period(rec, prev); 371 - for (i = 0; i < nr_cpus; i++) { 372 - struct datarec *r = &rec->cpu[i]; 373 - struct datarec *p = &prev->cpu[i]; 374 - 375 - pps = calc_pps (r, p, t); 376 - err = calc_errs_pps(r, p, t); 377 - if (err > 0) 378 - errstr = "invalid-ifindex"; 379 - if (pps > 0) 380 - printf(fmt_rx, "XDP-RX CPU", 381 - i, pps, err, errstr); 382 - } 383 - pps = calc_pps (&rec->total, &prev->total, t); 384 - err = calc_errs_pps(&rec->total, &prev->total, t); 385 - printf(fm2_rx, "XDP-RX CPU", "total", pps, err); 386 - } 387 - 388 - /* rx_queue_index_map */ 389 - printf("\n%-15s %-7s %-11s %-11s\n", 390 - "RXQ stats", "RXQ:CPU", "pps", "issue-pps"); 391 - 392 - for (rxq = 0; rxq < nr_rxqs; rxq++) { 393 - char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n"; 394 - char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n"; 395 - char *errstr = ""; 396 - int rxq_ = rxq; 397 - 398 - /* Last RXQ in map catch overflows */ 399 - if (rxq_ == nr_rxqs - 1) 400 - rxq_ = -1; 401 - 402 - rec = &stats_rec->rxq[rxq]; 403 - prev = &stats_prev->rxq[rxq]; 404 - t = calc_period(rec, prev); 405 - for (i = 0; i < nr_cpus; i++) { 406 - struct datarec *r = &rec->cpu[i]; 407 - struct datarec *p = &prev->cpu[i]; 408 - 409 - pps = calc_pps (r, p, t); 410 - err = calc_errs_pps(r, p, t); 411 - if (err > 0) { 412 - if (rxq_ == -1) 413 - errstr = "map-overflow-RXQ"; 414 - else 415 - errstr = "err"; 416 - } 417 - if (pps > 0) 418 - printf(fmt_rx, "rx_queue_index", 419 - rxq_, i, pps, err, errstr); 420 - } 421 - pps = calc_pps (&rec->total, &prev->total, t); 422 - err = calc_errs_pps(&rec->total, &prev->total, t); 423 - if (pps || err) 424 - printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err); 425 - } 426 - } 427 - 428 - 429 - /* Pointer swap trick */ 430 - static inline void swap(struct stats_record **a, struct stats_record **b) 431 - { 432 - struct stats_record *tmp; 433 - 434 - tmp = *a; 435 - *a = *b; 436 - *b = tmp; 437 - } 438 - 439 - static void stats_poll(int interval, int action, __u32 cfg_opt) 440 - { 441 - struct stats_record *record, *prev; 442 - 443 - record = alloc_stats_record(); 444 - prev = alloc_stats_record(); 445 - stats_collect(record); 446 - 447 - while (1) { 448 - swap(&prev, &record); 449 - stats_collect(record); 450 - stats_print(record, prev, action, cfg_opt); 451 - sleep(interval); 452 - } 453 - 454 - free_stats_record(record); 455 - free_stats_record(prev); 456 - } 457 - 458 - 459 - int main(int argc, char **argv) 460 - { 461 - __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ 462 - struct bpf_prog_info info = {}; 463 - __u32 info_len = sizeof(info); 464 - int prog_fd, map_fd, opt, err; 465 - bool use_separators = true; 466 - struct config cfg = { 0 }; 467 - struct bpf_program *prog; 468 - struct bpf_object *obj; 469 - struct bpf_map *map; 470 - char filename[256]; 471 - int longindex = 0; 472 - int interval = 2; 473 - __u32 key = 0; 474 - 475 - 476 - char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 }; 477 - int action = XDP_PASS; /* Default action */ 478 - char *action_str = NULL; 479 - 480 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 481 - 482 - obj = bpf_object__open_file(filename, NULL); 483 - if (libbpf_get_error(obj)) 484 - return EXIT_FAIL; 485 - 486 - prog = bpf_object__next_program(obj, NULL); 487 - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); 488 - 489 - err = bpf_object__load(obj); 490 - if (err) 491 - return EXIT_FAIL; 492 - prog_fd = bpf_program__fd(prog); 493 - 494 - map = bpf_object__find_map_by_name(obj, "config_map"); 495 - stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map"); 496 - rx_queue_index_map = bpf_object__find_map_by_name(obj, "rx_queue_index_map"); 497 - if (!map || !stats_global_map || !rx_queue_index_map) { 498 - printf("finding a map in obj file failed\n"); 499 - return EXIT_FAIL; 500 - } 501 - map_fd = bpf_map__fd(map); 502 - 503 - if (!prog_fd) { 504 - fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno)); 505 - return EXIT_FAIL; 506 - } 507 - 508 - /* Parse commands line args */ 509 - while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:", 510 - long_options, &longindex)) != -1) { 511 - switch (opt) { 512 - case 'd': 513 - if (strlen(optarg) >= IF_NAMESIZE) { 514 - fprintf(stderr, "ERR: --dev name too long\n"); 515 - goto error; 516 - } 517 - ifname = (char *)&ifname_buf; 518 - strncpy(ifname, optarg, IF_NAMESIZE); 519 - ifindex = if_nametoindex(ifname); 520 - if (ifindex == 0) { 521 - fprintf(stderr, 522 - "ERR: --dev name unknown err(%d):%s\n", 523 - errno, strerror(errno)); 524 - goto error; 525 - } 526 - break; 527 - case 's': 528 - interval = atoi(optarg); 529 - break; 530 - case 'S': 531 - xdp_flags |= XDP_FLAGS_SKB_MODE; 532 - break; 533 - case 'z': 534 - use_separators = false; 535 - break; 536 - case 'a': 537 - action_str = (char *)&action_str_buf; 538 - strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN); 539 - break; 540 - case 'r': 541 - cfg_options |= READ_MEM; 542 - break; 543 - case 'm': 544 - cfg_options |= SWAP_MAC; 545 - break; 546 - case 'F': 547 - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 548 - break; 549 - case 'h': 550 - error: 551 - default: 552 - usage(argv); 553 - return EXIT_FAIL_OPTION; 554 - } 555 - } 556 - 557 - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 558 - xdp_flags |= XDP_FLAGS_DRV_MODE; 559 - 560 - /* Required option */ 561 - if (ifindex == -1) { 562 - fprintf(stderr, "ERR: required option --dev missing\n"); 563 - usage(argv); 564 - return EXIT_FAIL_OPTION; 565 - } 566 - cfg.ifindex = ifindex; 567 - 568 - /* Parse action string */ 569 - if (action_str) { 570 - action = parse_xdp_action(action_str); 571 - if (action < 0) { 572 - fprintf(stderr, "ERR: Invalid XDP --action: %s\n", 573 - action_str); 574 - list_xdp_actions(); 575 - return EXIT_FAIL_OPTION; 576 - } 577 - } 578 - cfg.action = action; 579 - 580 - /* XDP_TX requires changing MAC-addrs, else HW may drop */ 581 - if (action == XDP_TX) 582 - cfg_options |= SWAP_MAC; 583 - cfg.options = cfg_options; 584 - 585 - /* Trick to pretty printf with thousands separators use %' */ 586 - if (use_separators) 587 - setlocale(LC_NUMERIC, "en_US"); 588 - 589 - /* User-side setup ifindex in config_map */ 590 - err = bpf_map_update_elem(map_fd, &key, &cfg, 0); 591 - if (err) { 592 - fprintf(stderr, "Store config failed (err:%d)\n", err); 593 - exit(EXIT_FAIL_BPF); 594 - } 595 - 596 - /* Remove XDP program when program is interrupted or killed */ 597 - signal(SIGINT, int_exit); 598 - signal(SIGTERM, int_exit); 599 - 600 - if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 601 - fprintf(stderr, "link set xdp fd failed\n"); 602 - return EXIT_FAIL_XDP; 603 - } 604 - 605 - err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 606 - if (err) { 607 - printf("can't get prog info - %s\n", strerror(errno)); 608 - return err; 609 - } 610 - prog_id = info.id; 611 - 612 - stats_poll(interval, action, cfg_options); 613 - return EXIT_OK; 614 - }
-57
samples/bpf/xdp_sample_pkts_kern.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <linux/ptrace.h> 3 - #include <linux/version.h> 4 - #include <uapi/linux/bpf.h> 5 - #include <bpf/bpf_helpers.h> 6 - 7 - #define SAMPLE_SIZE 64ul 8 - 9 - struct { 10 - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 11 - __uint(key_size, sizeof(int)); 12 - __uint(value_size, sizeof(u32)); 13 - } my_map SEC(".maps"); 14 - 15 - SEC("xdp_sample") 16 - int xdp_sample_prog(struct xdp_md *ctx) 17 - { 18 - void *data_end = (void *)(long)ctx->data_end; 19 - void *data = (void *)(long)ctx->data; 20 - 21 - /* Metadata will be in the perf event before the packet data. */ 22 - struct S { 23 - u16 cookie; 24 - u16 pkt_len; 25 - } __packed metadata; 26 - 27 - if (data < data_end) { 28 - /* The XDP perf_event_output handler will use the upper 32 bits 29 - * of the flags argument as a number of bytes to include of the 30 - * packet payload in the event data. If the size is too big, the 31 - * call to bpf_perf_event_output will fail and return -EFAULT. 32 - * 33 - * See bpf_xdp_event_output in net/core/filter.c. 34 - * 35 - * The BPF_F_CURRENT_CPU flag means that the event output fd 36 - * will be indexed by the CPU number in the event map. 37 - */ 38 - u64 flags = BPF_F_CURRENT_CPU; 39 - u16 sample_size; 40 - int ret; 41 - 42 - metadata.cookie = 0xdead; 43 - metadata.pkt_len = (u16)(data_end - data); 44 - sample_size = min(metadata.pkt_len, SAMPLE_SIZE); 45 - flags |= (u64)sample_size << 32; 46 - 47 - ret = bpf_perf_event_output(ctx, &my_map, flags, 48 - &metadata, sizeof(metadata)); 49 - if (ret) 50 - bpf_printk("perf_event_output failed: %d\n", ret); 51 - } 52 - 53 - return XDP_PASS; 54 - } 55 - 56 - char _license[] SEC("license") = "GPL"; 57 - u32 _version SEC("version") = LINUX_VERSION_CODE;
-196
samples/bpf/xdp_sample_pkts_user.c
··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <stdio.h> 3 - #include <stdlib.h> 4 - #include <string.h> 5 - #include <linux/perf_event.h> 6 - #include <linux/bpf.h> 7 - #include <net/if.h> 8 - #include <errno.h> 9 - #include <assert.h> 10 - #include <sys/sysinfo.h> 11 - #include <sys/ioctl.h> 12 - #include <signal.h> 13 - #include <bpf/libbpf.h> 14 - #include <bpf/bpf.h> 15 - #include <libgen.h> 16 - #include <linux/if_link.h> 17 - 18 - #include "perf-sys.h" 19 - 20 - static int if_idx; 21 - static char *if_name; 22 - static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 23 - static __u32 prog_id; 24 - static struct perf_buffer *pb = NULL; 25 - 26 - static int do_attach(int idx, int fd, const char *name) 27 - { 28 - struct bpf_prog_info info = {}; 29 - __u32 info_len = sizeof(info); 30 - int err; 31 - 32 - err = bpf_xdp_attach(idx, fd, xdp_flags, NULL); 33 - if (err < 0) { 34 - printf("ERROR: failed to attach program to %s\n", name); 35 - return err; 36 - } 37 - 38 - err = bpf_prog_get_info_by_fd(fd, &info, &info_len); 39 - if (err) { 40 - printf("can't get prog info - %s\n", strerror(errno)); 41 - return err; 42 - } 43 - prog_id = info.id; 44 - 45 - return err; 46 - } 47 - 48 - static int do_detach(int idx, const char *name) 49 - { 50 - __u32 curr_prog_id = 0; 51 - int err = 0; 52 - 53 - err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id); 54 - if (err) { 55 - printf("bpf_xdp_query_id failed\n"); 56 - return err; 57 - } 58 - if (prog_id == curr_prog_id) { 59 - err = bpf_xdp_detach(idx, xdp_flags, NULL); 60 - if (err < 0) 61 - printf("ERROR: failed to detach prog from %s\n", name); 62 - } else if (!curr_prog_id) { 63 - printf("couldn't find a prog id on a %s\n", name); 64 - } else { 65 - printf("program on interface changed, not removing\n"); 66 - } 67 - 68 - return err; 69 - } 70 - 71 - #define SAMPLE_SIZE 64 72 - 73 - static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size) 74 - { 75 - struct { 76 - __u16 cookie; 77 - __u16 pkt_len; 78 - __u8 pkt_data[SAMPLE_SIZE]; 79 - } __packed *e = data; 80 - int i; 81 - 82 - if (e->cookie != 0xdead) { 83 - printf("BUG cookie %x sized %d\n", e->cookie, size); 84 - return; 85 - } 86 - 87 - printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len); 88 - for (i = 0; i < 14 && i < e->pkt_len; i++) 89 - printf("%02x ", e->pkt_data[i]); 90 - printf("\n"); 91 - } 92 - 93 - static void sig_handler(int signo) 94 - { 95 - do_detach(if_idx, if_name); 96 - perf_buffer__free(pb); 97 - exit(0); 98 - } 99 - 100 - static void usage(const char *prog) 101 - { 102 - fprintf(stderr, 103 - "%s: %s [OPTS] <ifname|ifindex>\n\n" 104 - "OPTS:\n" 105 - " -F force loading prog\n" 106 - " -S use skb-mode\n", 107 - __func__, prog); 108 - } 109 - 110 - int main(int argc, char **argv) 111 - { 112 - const char *optstr = "FS"; 113 - int prog_fd, map_fd, opt; 114 - struct bpf_program *prog; 115 - struct bpf_object *obj; 116 - struct bpf_map *map; 117 - char filename[256]; 118 - int ret, err; 119 - 120 - while ((opt = getopt(argc, argv, optstr)) != -1) { 121 - switch (opt) { 122 - case 'F': 123 - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 124 - break; 125 - case 'S': 126 - xdp_flags |= XDP_FLAGS_SKB_MODE; 127 - break; 128 - default: 129 - usage(basename(argv[0])); 130 - return 1; 131 - } 132 - } 133 - 134 - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 135 - xdp_flags |= XDP_FLAGS_DRV_MODE; 136 - 137 - if (optind == argc) { 138 - usage(basename(argv[0])); 139 - return 1; 140 - } 141 - 142 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 143 - 144 - obj = bpf_object__open_file(filename, NULL); 145 - if (libbpf_get_error(obj)) 146 - return 1; 147 - 148 - prog = bpf_object__next_program(obj, NULL); 149 - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); 150 - 151 - err = bpf_object__load(obj); 152 - if (err) 153 - return 1; 154 - 155 - prog_fd = bpf_program__fd(prog); 156 - 157 - map = bpf_object__next_map(obj, NULL); 158 - if (!map) { 159 - printf("finding a map in obj file failed\n"); 160 - return 1; 161 - } 162 - map_fd = bpf_map__fd(map); 163 - 164 - if_idx = if_nametoindex(argv[optind]); 165 - if (!if_idx) 166 - if_idx = strtoul(argv[optind], NULL, 0); 167 - 168 - if (!if_idx) { 169 - fprintf(stderr, "Invalid ifname\n"); 170 - return 1; 171 - } 172 - if_name = argv[optind]; 173 - err = do_attach(if_idx, prog_fd, if_name); 174 - if (err) 175 - return err; 176 - 177 - if (signal(SIGINT, sig_handler) || 178 - signal(SIGHUP, sig_handler) || 179 - signal(SIGTERM, sig_handler)) { 180 - perror("signal"); 181 - return 1; 182 - } 183 - 184 - pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); 185 - err = libbpf_get_error(pb); 186 - if (err) { 187 - perror("perf_buffer setup failed"); 188 - return 1; 189 - } 190 - 191 - while ((ret = perf_buffer__poll(pb, 1000)) >= 0) { 192 - } 193 - 194 - kill(0, SIGINT); 195 - return ret; 196 - }
+21 -1
tools/include/uapi/linux/bpf.h
··· 1039 1039 BPF_NETFILTER, 1040 1040 BPF_TCX_INGRESS, 1041 1041 BPF_TCX_EGRESS, 1042 + BPF_TRACE_UPROBE_MULTI, 1042 1043 __MAX_BPF_ATTACH_TYPE 1043 1044 }; 1044 1045 ··· 1058 1057 BPF_LINK_TYPE_STRUCT_OPS = 9, 1059 1058 BPF_LINK_TYPE_NETFILTER = 10, 1060 1059 BPF_LINK_TYPE_TCX = 11, 1060 + BPF_LINK_TYPE_UPROBE_MULTI = 12, 1061 1061 MAX_BPF_LINK_TYPE, 1062 1062 }; 1063 1063 ··· 1188 1186 /* link_create.kprobe_multi.flags used in LINK_CREATE command for 1189 1187 * BPF_TRACE_KPROBE_MULTI attach type to create return probe. 1190 1188 */ 1191 - #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) 1189 + enum { 1190 + BPF_F_KPROBE_MULTI_RETURN = (1U << 0) 1191 + }; 1192 + 1193 + /* link_create.uprobe_multi.flags used in LINK_CREATE command for 1194 + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. 1195 + */ 1196 + enum { 1197 + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) 1198 + }; 1192 1199 1193 1200 /* link_create.netfilter.flags used in LINK_CREATE command for 1194 1201 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. ··· 1635 1624 }; 1636 1625 __u64 expected_revision; 1637 1626 } tcx; 1627 + struct { 1628 + __aligned_u64 path; 1629 + __aligned_u64 offsets; 1630 + __aligned_u64 ref_ctr_offsets; 1631 + __aligned_u64 cookies; 1632 + __u32 cnt; 1633 + __u32 flags; 1634 + __u32 pid; 1635 + } uprobe_multi; 1638 1636 }; 1639 1637 } link_create; 1640 1638
+1 -1
tools/lib/bpf/Build
··· 1 1 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ 2 2 netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ 3 3 btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ 4 - usdt.o zip.o 4 + usdt.o zip.o elf.o
+11
tools/lib/bpf/bpf.c
··· 767 767 if (!OPTS_ZEROED(opts, kprobe_multi)) 768 768 return libbpf_err(-EINVAL); 769 769 break; 770 + case BPF_TRACE_UPROBE_MULTI: 771 + attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0); 772 + attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0); 773 + attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0)); 774 + attr.link_create.uprobe_multi.offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.offsets, 0)); 775 + attr.link_create.uprobe_multi.ref_ctr_offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.ref_ctr_offsets, 0)); 776 + attr.link_create.uprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, uprobe_multi.cookies, 0)); 777 + attr.link_create.uprobe_multi.pid = OPTS_GET(opts, uprobe_multi.pid, 0); 778 + if (!OPTS_ZEROED(opts, uprobe_multi)) 779 + return libbpf_err(-EINVAL); 780 + break; 770 781 case BPF_TRACE_FENTRY: 771 782 case BPF_TRACE_FEXIT: 772 783 case BPF_MODIFY_RETURN:
+10 -1
tools/lib/bpf/bpf.h
··· 393 393 const __u64 *cookies; 394 394 } kprobe_multi; 395 395 struct { 396 + __u32 flags; 397 + __u32 cnt; 398 + const char *path; 399 + const unsigned long *offsets; 400 + const unsigned long *ref_ctr_offsets; 401 + const __u64 *cookies; 402 + __u32 pid; 403 + } uprobe_multi; 404 + struct { 396 405 __u64 cookie; 397 406 } tracing; 398 407 struct { ··· 418 409 }; 419 410 size_t :0; 420 411 }; 421 - #define bpf_link_create_opts__last_field kprobe_multi.cookies 412 + #define bpf_link_create_opts__last_field uprobe_multi.pid 422 413 423 414 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, 424 415 enum bpf_attach_type attach_type,
+440
tools/lib/bpf/elf.c
··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + 3 + #include <libelf.h> 4 + #include <gelf.h> 5 + #include <fcntl.h> 6 + #include <linux/kernel.h> 7 + 8 + #include "libbpf_internal.h" 9 + #include "str_error.h" 10 + 11 + #define STRERR_BUFSIZE 128 12 + 13 + int elf_open(const char *binary_path, struct elf_fd *elf_fd) 14 + { 15 + char errmsg[STRERR_BUFSIZE]; 16 + int fd, ret; 17 + Elf *elf; 18 + 19 + if (elf_version(EV_CURRENT) == EV_NONE) { 20 + pr_warn("elf: failed to init libelf for %s\n", binary_path); 21 + return -LIBBPF_ERRNO__LIBELF; 22 + } 23 + fd = open(binary_path, O_RDONLY | O_CLOEXEC); 24 + if (fd < 0) { 25 + ret = -errno; 26 + pr_warn("elf: failed to open %s: %s\n", binary_path, 27 + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); 28 + return ret; 29 + } 30 + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 31 + if (!elf) { 32 + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); 33 + close(fd); 34 + return -LIBBPF_ERRNO__FORMAT; 35 + } 36 + elf_fd->fd = fd; 37 + elf_fd->elf = elf; 38 + return 0; 39 + } 40 + 41 + void elf_close(struct elf_fd *elf_fd) 42 + { 43 + if (!elf_fd) 44 + return; 45 + elf_end(elf_fd->elf); 46 + close(elf_fd->fd); 47 + } 48 + 49 + /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ 50 + static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) 51 + { 52 + while ((scn = elf_nextscn(elf, scn)) != NULL) { 53 + GElf_Shdr sh; 54 + 55 + if (!gelf_getshdr(scn, &sh)) 56 + continue; 57 + if (sh.sh_type == sh_type) 58 + return scn; 59 + } 60 + return NULL; 61 + } 62 + 63 + struct elf_sym { 64 + const char *name; 65 + GElf_Sym sym; 66 + GElf_Shdr sh; 67 + }; 68 + 69 + struct elf_sym_iter { 70 + Elf *elf; 71 + Elf_Data *syms; 72 + size_t nr_syms; 73 + size_t strtabidx; 74 + size_t next_sym_idx; 75 + struct elf_sym sym; 76 + int st_type; 77 + }; 78 + 79 + static int elf_sym_iter_new(struct elf_sym_iter *iter, 80 + Elf *elf, const char *binary_path, 81 + int sh_type, int st_type) 82 + { 83 + Elf_Scn *scn = NULL; 84 + GElf_Ehdr ehdr; 85 + GElf_Shdr sh; 86 + 87 + memset(iter, 0, sizeof(*iter)); 88 + 89 + if (!gelf_getehdr(elf, &ehdr)) { 90 + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); 91 + return -EINVAL; 92 + } 93 + 94 + scn = elf_find_next_scn_by_type(elf, sh_type, NULL); 95 + if (!scn) { 96 + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", 97 + binary_path); 98 + return -ENOENT; 99 + } 100 + 101 + if (!gelf_getshdr(scn, &sh)) 102 + return -EINVAL; 103 + 104 + iter->strtabidx = sh.sh_link; 105 + iter->syms = elf_getdata(scn, 0); 106 + if (!iter->syms) { 107 + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", 108 + binary_path, elf_errmsg(-1)); 109 + return -EINVAL; 110 + } 111 + iter->nr_syms = iter->syms->d_size / sh.sh_entsize; 112 + iter->elf = elf; 113 + iter->st_type = st_type; 114 + return 0; 115 + } 116 + 117 + static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) 118 + { 119 + struct elf_sym *ret = &iter->sym; 120 + GElf_Sym *sym = &ret->sym; 121 + const char *name = NULL; 122 + Elf_Scn *sym_scn; 123 + size_t idx; 124 + 125 + for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) { 126 + if (!gelf_getsym(iter->syms, idx, sym)) 127 + continue; 128 + if (GELF_ST_TYPE(sym->st_info) != iter->st_type) 129 + continue; 130 + name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name); 131 + if (!name) 132 + continue; 133 + sym_scn = elf_getscn(iter->elf, sym->st_shndx); 134 + if (!sym_scn) 135 + continue; 136 + if (!gelf_getshdr(sym_scn, &ret->sh)) 137 + continue; 138 + 139 + iter->next_sym_idx = idx + 1; 140 + ret->name = name; 141 + return ret; 142 + } 143 + 144 + return NULL; 145 + } 146 + 147 + 148 + /* Transform symbol's virtual address (absolute for binaries and relative 149 + * for shared libs) into file offset, which is what kernel is expecting 150 + * for uprobe/uretprobe attachment. 151 + * See Documentation/trace/uprobetracer.rst for more details. This is done 152 + * by looking up symbol's containing section's header and using iter's virtual 153 + * address (sh_addr) and corresponding file offset (sh_offset) to transform 154 + * sym.st_value (virtual address) into desired final file offset. 155 + */ 156 + static unsigned long elf_sym_offset(struct elf_sym *sym) 157 + { 158 + return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset; 159 + } 160 + 161 + /* Find offset of function name in the provided ELF object. "binary_path" is 162 + * the path to the ELF binary represented by "elf", and only used for error 163 + * reporting matters. "name" matches symbol name or name@@LIB for library 164 + * functions. 165 + */ 166 + long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) 167 + { 168 + int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; 169 + bool is_shared_lib, is_name_qualified; 170 + long ret = -ENOENT; 171 + size_t name_len; 172 + GElf_Ehdr ehdr; 173 + 174 + if (!gelf_getehdr(elf, &ehdr)) { 175 + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); 176 + ret = -LIBBPF_ERRNO__FORMAT; 177 + goto out; 178 + } 179 + /* for shared lib case, we do not need to calculate relative offset */ 180 + is_shared_lib = ehdr.e_type == ET_DYN; 181 + 182 + name_len = strlen(name); 183 + /* Does name specify "@@LIB"? */ 184 + is_name_qualified = strstr(name, "@@") != NULL; 185 + 186 + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if 187 + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically 188 + * linked binary may not have SHT_DYMSYM, so absence of a section should not be 189 + * reported as a warning/error. 190 + */ 191 + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 192 + struct elf_sym_iter iter; 193 + struct elf_sym *sym; 194 + int last_bind = -1; 195 + int cur_bind; 196 + 197 + ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC); 198 + if (ret == -ENOENT) 199 + continue; 200 + if (ret) 201 + goto out; 202 + 203 + while ((sym = elf_sym_iter_next(&iter))) { 204 + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ 205 + if (strncmp(sym->name, name, name_len) != 0) 206 + continue; 207 + /* ...but we don't want a search for "foo" to match 'foo2" also, so any 208 + * additional characters in sname should be of the form "@@LIB". 209 + */ 210 + if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') 211 + continue; 212 + 213 + cur_bind = GELF_ST_BIND(sym->sym.st_info); 214 + 215 + if (ret > 0) { 216 + /* handle multiple matches */ 217 + if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { 218 + /* Only accept one non-weak bind. */ 219 + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", 220 + sym->name, name, binary_path); 221 + ret = -LIBBPF_ERRNO__FORMAT; 222 + goto out; 223 + } else if (cur_bind == STB_WEAK) { 224 + /* already have a non-weak bind, and 225 + * this is a weak bind, so ignore. 226 + */ 227 + continue; 228 + } 229 + } 230 + 231 + ret = elf_sym_offset(sym); 232 + last_bind = cur_bind; 233 + } 234 + if (ret > 0) 235 + break; 236 + } 237 + 238 + if (ret > 0) { 239 + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, 240 + ret); 241 + } else { 242 + if (ret == 0) { 243 + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, 244 + is_shared_lib ? "should not be 0 in a shared library" : 245 + "try using shared library path instead"); 246 + ret = -ENOENT; 247 + } else { 248 + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); 249 + } 250 + } 251 + out: 252 + return ret; 253 + } 254 + 255 + /* Find offset of function name in ELF object specified by path. "name" matches 256 + * symbol name or name@@LIB for library functions. 257 + */ 258 + long elf_find_func_offset_from_file(const char *binary_path, const char *name) 259 + { 260 + struct elf_fd elf_fd; 261 + long ret = -ENOENT; 262 + 263 + ret = elf_open(binary_path, &elf_fd); 264 + if (ret) 265 + return ret; 266 + ret = elf_find_func_offset(elf_fd.elf, binary_path, name); 267 + elf_close(&elf_fd); 268 + return ret; 269 + } 270 + 271 + struct symbol { 272 + const char *name; 273 + int bind; 274 + int idx; 275 + }; 276 + 277 + static int symbol_cmp(const void *a, const void *b) 278 + { 279 + const struct symbol *sym_a = a; 280 + const struct symbol *sym_b = b; 281 + 282 + return strcmp(sym_a->name, sym_b->name); 283 + } 284 + 285 + /* 286 + * Return offsets in @poffsets for symbols specified in @syms array argument. 287 + * On success returns 0 and offsets are returned in allocated array with @cnt 288 + * size, that needs to be released by the caller. 289 + */ 290 + int elf_resolve_syms_offsets(const char *binary_path, int cnt, 291 + const char **syms, unsigned long **poffsets) 292 + { 293 + int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; 294 + int err = 0, i, cnt_done = 0; 295 + unsigned long *offsets; 296 + struct symbol *symbols; 297 + struct elf_fd elf_fd; 298 + 299 + err = elf_open(binary_path, &elf_fd); 300 + if (err) 301 + return err; 302 + 303 + offsets = calloc(cnt, sizeof(*offsets)); 304 + symbols = calloc(cnt, sizeof(*symbols)); 305 + 306 + if (!offsets || !symbols) { 307 + err = -ENOMEM; 308 + goto out; 309 + } 310 + 311 + for (i = 0; i < cnt; i++) { 312 + symbols[i].name = syms[i]; 313 + symbols[i].idx = i; 314 + } 315 + 316 + qsort(symbols, cnt, sizeof(*symbols), symbol_cmp); 317 + 318 + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 319 + struct elf_sym_iter iter; 320 + struct elf_sym *sym; 321 + 322 + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); 323 + if (err == -ENOENT) 324 + continue; 325 + if (err) 326 + goto out; 327 + 328 + while ((sym = elf_sym_iter_next(&iter))) { 329 + unsigned long sym_offset = elf_sym_offset(sym); 330 + int bind = GELF_ST_BIND(sym->sym.st_info); 331 + struct symbol *found, tmp = { 332 + .name = sym->name, 333 + }; 334 + unsigned long *offset; 335 + 336 + found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp); 337 + if (!found) 338 + continue; 339 + 340 + offset = &offsets[found->idx]; 341 + if (*offset > 0) { 342 + /* same offset, no problem */ 343 + if (*offset == sym_offset) 344 + continue; 345 + /* handle multiple matches */ 346 + if (found->bind != STB_WEAK && bind != STB_WEAK) { 347 + /* Only accept one non-weak bind. */ 348 + pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n", 349 + sym->name, sym_offset, binary_path, *offset); 350 + err = -ESRCH; 351 + goto out; 352 + } else if (bind == STB_WEAK) { 353 + /* already have a non-weak bind, and 354 + * this is a weak bind, so ignore. 355 + */ 356 + continue; 357 + } 358 + } else { 359 + cnt_done++; 360 + } 361 + *offset = sym_offset; 362 + found->bind = bind; 363 + } 364 + } 365 + 366 + if (cnt != cnt_done) { 367 + err = -ENOENT; 368 + goto out; 369 + } 370 + 371 + *poffsets = offsets; 372 + 373 + out: 374 + free(symbols); 375 + if (err) 376 + free(offsets); 377 + elf_close(&elf_fd); 378 + return err; 379 + } 380 + 381 + /* 382 + * Return offsets in @poffsets for symbols specified by @pattern argument. 383 + * On success returns 0 and offsets are returned in allocated @poffsets 384 + * array with the @pctn size, that needs to be released by the caller. 385 + */ 386 + int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, 387 + unsigned long **poffsets, size_t *pcnt) 388 + { 389 + int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM }; 390 + unsigned long *offsets = NULL; 391 + size_t cap = 0, cnt = 0; 392 + struct elf_fd elf_fd; 393 + int err = 0, i; 394 + 395 + err = elf_open(binary_path, &elf_fd); 396 + if (err) 397 + return err; 398 + 399 + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 400 + struct elf_sym_iter iter; 401 + struct elf_sym *sym; 402 + 403 + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); 404 + if (err == -ENOENT) 405 + continue; 406 + if (err) 407 + goto out; 408 + 409 + while ((sym = elf_sym_iter_next(&iter))) { 410 + if (!glob_match(sym->name, pattern)) 411 + continue; 412 + 413 + err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets), 414 + cnt + 1); 415 + if (err) 416 + goto out; 417 + 418 + offsets[cnt++] = elf_sym_offset(sym); 419 + } 420 + 421 + /* If we found anything in the first symbol section, 422 + * do not search others to avoid duplicates. 423 + */ 424 + if (cnt) 425 + break; 426 + } 427 + 428 + if (cnt) { 429 + *poffsets = offsets; 430 + *pcnt = cnt; 431 + } else { 432 + err = -ENOENT; 433 + } 434 + 435 + out: 436 + if (err) 437 + free(offsets); 438 + elf_close(&elf_fd); 439 + return err; 440 + }
+235 -189
tools/lib/bpf/libbpf.c
··· 120 120 [BPF_NETFILTER] = "netfilter", 121 121 [BPF_TCX_INGRESS] = "tcx_ingress", 122 122 [BPF_TCX_EGRESS] = "tcx_egress", 123 + [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 123 124 }; 124 125 125 126 static const char * const link_type_name[] = { ··· 136 135 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 137 136 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 138 137 [BPF_LINK_TYPE_TCX] = "tcx", 138 + [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 139 139 }; 140 140 141 141 static const char * const map_type_name[] = { ··· 367 365 SEC_SLEEPABLE = 8, 368 366 /* BPF program support non-linear XDP buffer */ 369 367 SEC_XDP_FRAGS = 16, 368 + /* Setup proper attach type for usdt probes. */ 369 + SEC_USDT = 32, 370 370 }; 371 371 372 372 struct bpf_sec_def { ··· 554 550 int btf_id; 555 551 int sec_btf_id; 556 552 const char *name; 553 + char *essent_name; 557 554 bool is_set; 558 555 bool is_weak; 559 556 union { ··· 3775 3770 struct extern_desc *ext; 3776 3771 int i, n, off, dummy_var_btf_id; 3777 3772 const char *ext_name, *sec_name; 3773 + size_t ext_essent_len; 3778 3774 Elf_Scn *scn; 3779 3775 Elf64_Shdr *sh; 3780 3776 ··· 3824 3818 ext->name = btf__name_by_offset(obj->btf, t->name_off); 3825 3819 ext->sym_idx = i; 3826 3820 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 3821 + 3822 + ext_essent_len = bpf_core_essential_name_len(ext->name); 3823 + ext->essent_name = NULL; 3824 + if (ext_essent_len != strlen(ext->name)) { 3825 + ext->essent_name = strndup(ext->name, ext_essent_len); 3826 + if (!ext->essent_name) 3827 + return -ENOMEM; 3828 + } 3827 3829 3828 3830 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 3829 3831 if (ext->sec_btf_id <= 0) { ··· 4831 4817 return link_fd < 0 && err == -EBADF; 4832 4818 } 4833 4819 4820 + static int probe_uprobe_multi_link(void) 4821 + { 4822 + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, 4823 + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, 4824 + ); 4825 + LIBBPF_OPTS(bpf_link_create_opts, link_opts); 4826 + struct bpf_insn insns[] = { 4827 + BPF_MOV64_IMM(BPF_REG_0, 0), 4828 + BPF_EXIT_INSN(), 4829 + }; 4830 + int prog_fd, link_fd, err; 4831 + unsigned long offset = 0; 4832 + 4833 + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", 4834 + insns, ARRAY_SIZE(insns), &load_opts); 4835 + if (prog_fd < 0) 4836 + return -errno; 4837 + 4838 + /* Creating uprobe in '/' binary should fail with -EBADF. */ 4839 + link_opts.uprobe_multi.path = "/"; 4840 + link_opts.uprobe_multi.offsets = &offset; 4841 + link_opts.uprobe_multi.cnt = 1; 4842 + 4843 + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); 4844 + err = -errno; /* close() can clobber errno */ 4845 + 4846 + if (link_fd >= 0) 4847 + close(link_fd); 4848 + close(prog_fd); 4849 + 4850 + return link_fd < 0 && err == -EBADF; 4851 + } 4852 + 4834 4853 static int probe_kern_bpf_cookie(void) 4835 4854 { 4836 4855 struct bpf_insn insns[] = { ··· 4959 4912 }, 4960 4913 [FEAT_SYSCALL_WRAPPER] = { 4961 4914 "Kernel using syscall wrapper", probe_kern_syscall_wrapper, 4915 + }, 4916 + [FEAT_UPROBE_MULTI_LINK] = { 4917 + "BPF multi-uprobe link support", probe_uprobe_multi_link, 4962 4918 }, 4963 4919 }; 4964 4920 ··· 6830 6780 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 6831 6781 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 6832 6782 6783 + /* special check for usdt to use uprobe_multi link */ 6784 + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) 6785 + prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 6786 + 6833 6787 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 6834 6788 int btf_obj_fd = 0, btf_type_id = 0, err; 6835 6789 const char *attach_name; ··· 6902 6848 if (!insns || !insns_cnt) 6903 6849 return -EINVAL; 6904 6850 6905 - load_attr.expected_attach_type = prog->expected_attach_type; 6906 6851 if (kernel_supports(obj, FEAT_PROG_NAME)) 6907 6852 prog_name = prog->name; 6908 6853 load_attr.attach_prog_fd = prog->attach_prog_fd; ··· 6936 6883 insns = prog->insns; 6937 6884 insns_cnt = prog->insns_cnt; 6938 6885 } 6886 + 6887 + /* allow prog_prepare_load_fn to change expected_attach_type */ 6888 + load_attr.expected_attach_type = prog->expected_attach_type; 6939 6889 6940 6890 if (obj->gen_loader) { 6941 6891 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, ··· 7680 7624 7681 7625 local_func_proto_id = ext->ksym.type_id; 7682 7626 7683 - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); 7627 + kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 7628 + &mod_btf); 7684 7629 if (kfunc_id < 0) { 7685 7630 if (kfunc_id == -ESRCH && ext->is_weak) 7686 7631 return 0; ··· 7696 7639 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 7697 7640 kern_btf, kfunc_proto_id); 7698 7641 if (ret <= 0) { 7642 + if (ext->is_weak) 7643 + return 0; 7644 + 7699 7645 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 7700 7646 ext->name, local_func_proto_id, 7701 7647 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); ··· 8376 8316 return 0; 8377 8317 } 8378 8318 8319 + int bpf_object__unpin(struct bpf_object *obj, const char *path) 8320 + { 8321 + int err; 8322 + 8323 + err = bpf_object__unpin_programs(obj, path); 8324 + if (err) 8325 + return libbpf_err(err); 8326 + 8327 + err = bpf_object__unpin_maps(obj, path); 8328 + if (err) 8329 + return libbpf_err(err); 8330 + 8331 + return 0; 8332 + } 8333 + 8379 8334 static void bpf_map__destroy(struct bpf_map *map) 8380 8335 { 8381 8336 if (map->inner_map) { ··· 8438 8363 bpf_object__elf_finish(obj); 8439 8364 bpf_object_unload(obj); 8440 8365 btf__free(obj->btf); 8366 + btf__free(obj->btf_vmlinux); 8441 8367 btf_ext__free(obj->btf_ext); 8442 8368 8443 8369 for (i = 0; i < obj->nr_maps; i++) ··· 8446 8370 8447 8371 zfree(&obj->btf_custom_path); 8448 8372 zfree(&obj->kconfig); 8373 + 8374 + for (i = 0; i < obj->nr_extern; i++) 8375 + zfree(&obj->externs[i].essent_name); 8376 + 8449 8377 zfree(&obj->externs); 8450 8378 obj->nr_extern = 0; 8451 8379 ··· 8761 8681 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8762 8682 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8763 8683 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8684 + static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8764 8685 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8765 8686 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8766 8687 ··· 8777 8696 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 8778 8697 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8779 8698 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8699 + SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 8700 + SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 8701 + SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 8702 + SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 8780 8703 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8781 8704 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8782 - SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), 8705 + SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 8706 + SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 8783 8707 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 8784 8708 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 8785 8709 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), ··· 10635 10549 } 10636 10550 10637 10551 /* Adapted from perf/util/string.c */ 10638 - static bool glob_match(const char *str, const char *pat) 10552 + bool glob_match(const char *str, const char *pat) 10639 10553 { 10640 10554 while (*str && *pat && *pat != '*') { 10641 10555 if (*pat == '?') { /* Matches any single character */ ··· 10988 10902 return libbpf_get_error(*link); 10989 10903 } 10990 10904 10905 + static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 10906 + { 10907 + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 10908 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 10909 + int n, ret = -EINVAL; 10910 + 10911 + *link = NULL; 10912 + 10913 + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", 10914 + &probe_type, &binary_path, &func_name); 10915 + switch (n) { 10916 + case 1: 10917 + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 10918 + ret = 0; 10919 + break; 10920 + case 3: 10921 + opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; 10922 + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 10923 + ret = libbpf_get_error(*link); 10924 + break; 10925 + default: 10926 + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 10927 + prog->sec_name); 10928 + break; 10929 + } 10930 + free(probe_type); 10931 + free(binary_path); 10932 + free(func_name); 10933 + return ret; 10934 + } 10935 + 10991 10936 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, 10992 10937 const char *binary_path, uint64_t offset) 10993 10938 { ··· 11099 10982 /* Clear the newly added legacy uprobe_event */ 11100 10983 remove_uprobe_event_legacy(probe_name, retprobe); 11101 10984 return err; 11102 - } 11103 - 11104 - /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ 11105 - static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) 11106 - { 11107 - while ((scn = elf_nextscn(elf, scn)) != NULL) { 11108 - GElf_Shdr sh; 11109 - 11110 - if (!gelf_getshdr(scn, &sh)) 11111 - continue; 11112 - if (sh.sh_type == sh_type) 11113 - return scn; 11114 - } 11115 - return NULL; 11116 - } 11117 - 11118 - /* Find offset of function name in the provided ELF object. "binary_path" is 11119 - * the path to the ELF binary represented by "elf", and only used for error 11120 - * reporting matters. "name" matches symbol name or name@@LIB for library 11121 - * functions. 11122 - */ 11123 - static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) 11124 - { 11125 - int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; 11126 - bool is_shared_lib, is_name_qualified; 11127 - long ret = -ENOENT; 11128 - size_t name_len; 11129 - GElf_Ehdr ehdr; 11130 - 11131 - if (!gelf_getehdr(elf, &ehdr)) { 11132 - pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); 11133 - ret = -LIBBPF_ERRNO__FORMAT; 11134 - goto out; 11135 - } 11136 - /* for shared lib case, we do not need to calculate relative offset */ 11137 - is_shared_lib = ehdr.e_type == ET_DYN; 11138 - 11139 - name_len = strlen(name); 11140 - /* Does name specify "@@LIB"? */ 11141 - is_name_qualified = strstr(name, "@@") != NULL; 11142 - 11143 - /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if 11144 - * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically 11145 - * linked binary may not have SHT_DYMSYM, so absence of a section should not be 11146 - * reported as a warning/error. 11147 - */ 11148 - for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 11149 - size_t nr_syms, strtabidx, idx; 11150 - Elf_Data *symbols = NULL; 11151 - Elf_Scn *scn = NULL; 11152 - int last_bind = -1; 11153 - const char *sname; 11154 - GElf_Shdr sh; 11155 - 11156 - scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); 11157 - if (!scn) { 11158 - pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", 11159 - binary_path); 11160 - continue; 11161 - } 11162 - if (!gelf_getshdr(scn, &sh)) 11163 - continue; 11164 - strtabidx = sh.sh_link; 11165 - symbols = elf_getdata(scn, 0); 11166 - if (!symbols) { 11167 - pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", 11168 - binary_path, elf_errmsg(-1)); 11169 - ret = -LIBBPF_ERRNO__FORMAT; 11170 - goto out; 11171 - } 11172 - nr_syms = symbols->d_size / sh.sh_entsize; 11173 - 11174 - for (idx = 0; idx < nr_syms; idx++) { 11175 - int curr_bind; 11176 - GElf_Sym sym; 11177 - Elf_Scn *sym_scn; 11178 - GElf_Shdr sym_sh; 11179 - 11180 - if (!gelf_getsym(symbols, idx, &sym)) 11181 - continue; 11182 - 11183 - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) 11184 - continue; 11185 - 11186 - sname = elf_strptr(elf, strtabidx, sym.st_name); 11187 - if (!sname) 11188 - continue; 11189 - 11190 - curr_bind = GELF_ST_BIND(sym.st_info); 11191 - 11192 - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ 11193 - if (strncmp(sname, name, name_len) != 0) 11194 - continue; 11195 - /* ...but we don't want a search for "foo" to match 'foo2" also, so any 11196 - * additional characters in sname should be of the form "@@LIB". 11197 - */ 11198 - if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') 11199 - continue; 11200 - 11201 - if (ret >= 0) { 11202 - /* handle multiple matches */ 11203 - if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { 11204 - /* Only accept one non-weak bind. */ 11205 - pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", 11206 - sname, name, binary_path); 11207 - ret = -LIBBPF_ERRNO__FORMAT; 11208 - goto out; 11209 - } else if (curr_bind == STB_WEAK) { 11210 - /* already have a non-weak bind, and 11211 - * this is a weak bind, so ignore. 11212 - */ 11213 - continue; 11214 - } 11215 - } 11216 - 11217 - /* Transform symbol's virtual address (absolute for 11218 - * binaries and relative for shared libs) into file 11219 - * offset, which is what kernel is expecting for 11220 - * uprobe/uretprobe attachment. 11221 - * See Documentation/trace/uprobetracer.rst for more 11222 - * details. 11223 - * This is done by looking up symbol's containing 11224 - * section's header and using it's virtual address 11225 - * (sh_addr) and corresponding file offset (sh_offset) 11226 - * to transform sym.st_value (virtual address) into 11227 - * desired final file offset. 11228 - */ 11229 - sym_scn = elf_getscn(elf, sym.st_shndx); 11230 - if (!sym_scn) 11231 - continue; 11232 - if (!gelf_getshdr(sym_scn, &sym_sh)) 11233 - continue; 11234 - 11235 - ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset; 11236 - last_bind = curr_bind; 11237 - } 11238 - if (ret > 0) 11239 - break; 11240 - } 11241 - 11242 - if (ret > 0) { 11243 - pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, 11244 - ret); 11245 - } else { 11246 - if (ret == 0) { 11247 - pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, 11248 - is_shared_lib ? "should not be 0 in a shared library" : 11249 - "try using shared library path instead"); 11250 - ret = -ENOENT; 11251 - } else { 11252 - pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); 11253 - } 11254 - } 11255 - out: 11256 - return ret; 11257 - } 11258 - 11259 - /* Find offset of function name in ELF object specified by path. "name" matches 11260 - * symbol name or name@@LIB for library functions. 11261 - */ 11262 - static long elf_find_func_offset_from_file(const char *binary_path, const char *name) 11263 - { 11264 - char errmsg[STRERR_BUFSIZE]; 11265 - long ret = -ENOENT; 11266 - Elf *elf; 11267 - int fd; 11268 - 11269 - fd = open(binary_path, O_RDONLY | O_CLOEXEC); 11270 - if (fd < 0) { 11271 - ret = -errno; 11272 - pr_warn("failed to open %s: %s\n", binary_path, 11273 - libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); 11274 - return ret; 11275 - } 11276 - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 11277 - if (!elf) { 11278 - pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); 11279 - close(fd); 11280 - return -LIBBPF_ERRNO__FORMAT; 11281 - } 11282 - 11283 - ret = elf_find_func_offset(elf, binary_path, name); 11284 - elf_end(elf); 11285 - close(fd); 11286 - return ret; 11287 10985 } 11288 10986 11289 10987 /* Find offset of function name in archive specified by path. Currently ··· 11241 11309 } 11242 11310 } 11243 11311 return -ENOENT; 11312 + } 11313 + 11314 + struct bpf_link * 11315 + bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 11316 + pid_t pid, 11317 + const char *path, 11318 + const char *func_pattern, 11319 + const struct bpf_uprobe_multi_opts *opts) 11320 + { 11321 + const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 11322 + LIBBPF_OPTS(bpf_link_create_opts, lopts); 11323 + unsigned long *resolved_offsets = NULL; 11324 + int err = 0, link_fd, prog_fd; 11325 + struct bpf_link *link = NULL; 11326 + char errmsg[STRERR_BUFSIZE]; 11327 + char full_path[PATH_MAX]; 11328 + const __u64 *cookies; 11329 + const char **syms; 11330 + size_t cnt; 11331 + 11332 + if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 11333 + return libbpf_err_ptr(-EINVAL); 11334 + 11335 + syms = OPTS_GET(opts, syms, NULL); 11336 + offsets = OPTS_GET(opts, offsets, NULL); 11337 + ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 11338 + cookies = OPTS_GET(opts, cookies, NULL); 11339 + cnt = OPTS_GET(opts, cnt, 0); 11340 + 11341 + /* 11342 + * User can specify 2 mutually exclusive set of inputs: 11343 + * 11344 + * 1) use only path/func_pattern/pid arguments 11345 + * 11346 + * 2) use path/pid with allowed combinations of: 11347 + * syms/offsets/ref_ctr_offsets/cookies/cnt 11348 + * 11349 + * - syms and offsets are mutually exclusive 11350 + * - ref_ctr_offsets and cookies are optional 11351 + * 11352 + * Any other usage results in error. 11353 + */ 11354 + 11355 + if (!path) 11356 + return libbpf_err_ptr(-EINVAL); 11357 + if (!func_pattern && cnt == 0) 11358 + return libbpf_err_ptr(-EINVAL); 11359 + 11360 + if (func_pattern) { 11361 + if (syms || offsets || ref_ctr_offsets || cookies || cnt) 11362 + return libbpf_err_ptr(-EINVAL); 11363 + } else { 11364 + if (!!syms == !!offsets) 11365 + return libbpf_err_ptr(-EINVAL); 11366 + } 11367 + 11368 + if (func_pattern) { 11369 + if (!strchr(path, '/')) { 11370 + err = resolve_full_path(path, full_path, sizeof(full_path)); 11371 + if (err) { 11372 + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11373 + prog->name, path, err); 11374 + return libbpf_err_ptr(err); 11375 + } 11376 + path = full_path; 11377 + } 11378 + 11379 + err = elf_resolve_pattern_offsets(path, func_pattern, 11380 + &resolved_offsets, &cnt); 11381 + if (err < 0) 11382 + return libbpf_err_ptr(err); 11383 + offsets = resolved_offsets; 11384 + } else if (syms) { 11385 + err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets); 11386 + if (err < 0) 11387 + return libbpf_err_ptr(err); 11388 + offsets = resolved_offsets; 11389 + } 11390 + 11391 + lopts.uprobe_multi.path = path; 11392 + lopts.uprobe_multi.offsets = offsets; 11393 + lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 11394 + lopts.uprobe_multi.cookies = cookies; 11395 + lopts.uprobe_multi.cnt = cnt; 11396 + lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; 11397 + 11398 + if (pid == 0) 11399 + pid = getpid(); 11400 + if (pid > 0) 11401 + lopts.uprobe_multi.pid = pid; 11402 + 11403 + link = calloc(1, sizeof(*link)); 11404 + if (!link) { 11405 + err = -ENOMEM; 11406 + goto error; 11407 + } 11408 + link->detach = &bpf_link__detach_fd; 11409 + 11410 + prog_fd = bpf_program__fd(prog); 11411 + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); 11412 + if (link_fd < 0) { 11413 + err = -errno; 11414 + pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 11415 + prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11416 + goto error; 11417 + } 11418 + link->fd = link_fd; 11419 + free(resolved_offsets); 11420 + return link; 11421 + 11422 + error: 11423 + free(resolved_offsets); 11424 + free(link); 11425 + return libbpf_err_ptr(err); 11244 11426 } 11245 11427 11246 11428 LIBBPF_API struct bpf_link *
+52
tools/lib/bpf/libbpf.h
··· 266 266 LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, 267 267 const char *path); 268 268 LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); 269 + LIBBPF_API int bpf_object__unpin(struct bpf_object *object, const char *path); 269 270 270 271 LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); 271 272 LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); ··· 529 528 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 530 529 const char *pattern, 531 530 const struct bpf_kprobe_multi_opts *opts); 531 + 532 + struct bpf_uprobe_multi_opts { 533 + /* size of this struct, for forward/backward compatibility */ 534 + size_t sz; 535 + /* array of function symbols to attach to */ 536 + const char **syms; 537 + /* array of function addresses to attach to */ 538 + const unsigned long *offsets; 539 + /* optional, array of associated ref counter offsets */ 540 + const unsigned long *ref_ctr_offsets; 541 + /* optional, array of associated BPF cookies */ 542 + const __u64 *cookies; 543 + /* number of elements in syms/addrs/cookies arrays */ 544 + size_t cnt; 545 + /* create return uprobes */ 546 + bool retprobe; 547 + size_t :0; 548 + }; 549 + 550 + #define bpf_uprobe_multi_opts__last_field retprobe 551 + 552 + /** 553 + * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program 554 + * to multiple uprobes with uprobe_multi link. 555 + * 556 + * User can specify 2 mutually exclusive set of inputs: 557 + * 558 + * 1) use only path/func_pattern/pid arguments 559 + * 560 + * 2) use path/pid with allowed combinations of 561 + * syms/offsets/ref_ctr_offsets/cookies/cnt 562 + * 563 + * - syms and offsets are mutually exclusive 564 + * - ref_ctr_offsets and cookies are optional 565 + * 566 + * 567 + * @param prog BPF program to attach 568 + * @param pid Process ID to attach the uprobe to, 0 for self (own process), 569 + * -1 for all processes 570 + * @param binary_path Path to binary 571 + * @param func_pattern Regular expression to specify functions to attach 572 + * BPF program to 573 + * @param opts Additional options (see **struct bpf_uprobe_multi_opts**) 574 + * @return 0, on success; negative error code, otherwise 575 + */ 576 + LIBBPF_API struct bpf_link * 577 + bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 578 + pid_t pid, 579 + const char *binary_path, 580 + const char *func_pattern, 581 + const struct bpf_uprobe_multi_opts *opts); 532 582 533 583 struct bpf_ksyscall_opts { 534 584 /* size of this struct, for forward/backward compatibility */
+2
tools/lib/bpf/libbpf.map
··· 395 395 LIBBPF_1.3.0 { 396 396 global: 397 397 bpf_obj_pin_opts; 398 + bpf_object__unpin; 398 399 bpf_prog_detach_opts; 399 400 bpf_program__attach_netfilter; 400 401 bpf_program__attach_tcx; 402 + bpf_program__attach_uprobe_multi; 401 403 } LIBBPF_1.2.0;
+21
tools/lib/bpf/libbpf_internal.h
··· 15 15 #include <linux/err.h> 16 16 #include <fcntl.h> 17 17 #include <unistd.h> 18 + #include <libelf.h> 18 19 #include "relo_core.h" 19 20 20 21 /* make sure libbpf doesn't use kernel-only integer typedefs */ ··· 355 354 FEAT_BTF_ENUM64, 356 355 /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ 357 356 FEAT_SYSCALL_WRAPPER, 357 + /* BPF multi-uprobe link support */ 358 + FEAT_UPROBE_MULTI_LINK, 358 359 __FEAT_CNT, 359 360 }; 360 361 ··· 579 576 580 577 #define PROG_LOAD_ATTEMPTS 5 581 578 int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts); 579 + 580 + bool glob_match(const char *str, const char *pat); 581 + 582 + long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name); 583 + long elf_find_func_offset_from_file(const char *binary_path, const char *name); 584 + 585 + struct elf_fd { 586 + Elf *elf; 587 + int fd; 588 + }; 589 + 590 + int elf_open(const char *binary_path, struct elf_fd *elf_fd); 591 + void elf_close(struct elf_fd *elf_fd); 592 + 593 + int elf_resolve_syms_offsets(const char *binary_path, int cnt, 594 + const char **syms, unsigned long **poffsets); 595 + int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, 596 + unsigned long **poffsets, size_t *pcnt); 582 597 583 598 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
+1 -1
tools/lib/bpf/relo_core.c
··· 776 776 break; 777 777 case BPF_CORE_FIELD_SIGNED: 778 778 *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) || 779 - (btf_int_encoding(mt) & BTF_INT_SIGNED); 779 + (btf_is_int(mt) && (btf_int_encoding(mt) & BTF_INT_SIGNED)); 780 780 if (validate) 781 781 *validate = true; /* signedness is never ambiguous */ 782 782 break;
+79 -37
tools/lib/bpf/usdt.c
··· 250 250 251 251 bool has_bpf_cookie; 252 252 bool has_sema_refcnt; 253 + bool has_uprobe_multi; 253 254 }; 254 255 255 256 struct usdt_manager *usdt_manager_new(struct bpf_object *obj) ··· 285 284 */ 286 285 man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0; 287 286 287 + /* 288 + * Detect kernel support for uprobe multi link to be used for attaching 289 + * usdt probes. 290 + */ 291 + man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); 288 292 return man; 289 293 } 290 294 ··· 814 808 long abs_ip; 815 809 struct bpf_link *link; 816 810 } *uprobes; 811 + 812 + struct bpf_link *multi_link; 817 813 }; 818 814 819 815 static int bpf_link_usdt_detach(struct bpf_link *link) ··· 824 816 struct usdt_manager *man = usdt_link->usdt_man; 825 817 int i; 826 818 819 + bpf_link__destroy(usdt_link->multi_link); 820 + 821 + /* When having multi_link, uprobe_cnt is 0 */ 827 822 for (i = 0; i < usdt_link->uprobe_cnt; i++) { 828 823 /* detach underlying uprobe link */ 829 824 bpf_link__destroy(usdt_link->uprobes[i].link); ··· 957 946 const char *usdt_provider, const char *usdt_name, 958 947 __u64 usdt_cookie) 959 948 { 960 - int i, fd, err, spec_map_fd, ip_map_fd; 949 + unsigned long *offsets = NULL, *ref_ctr_offsets = NULL; 950 + int i, err, spec_map_fd, ip_map_fd; 961 951 LIBBPF_OPTS(bpf_uprobe_opts, opts); 962 952 struct hashmap *specs_hash = NULL; 963 953 struct bpf_link_usdt *link = NULL; 964 954 struct usdt_target *targets = NULL; 955 + __u64 *cookies = NULL; 956 + struct elf_fd elf_fd; 965 957 size_t target_cnt; 966 - Elf *elf; 967 958 968 959 spec_map_fd = bpf_map__fd(man->specs_map); 969 960 ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); 970 961 971 - fd = open(path, O_RDONLY | O_CLOEXEC); 972 - if (fd < 0) { 973 - err = -errno; 974 - pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); 962 + err = elf_open(path, &elf_fd); 963 + if (err) 975 964 return libbpf_err_ptr(err); 976 - } 977 965 978 - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 979 - if (!elf) { 980 - err = -EBADF; 981 - pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); 982 - goto err_out; 983 - } 984 - 985 - err = sanity_check_usdt_elf(elf, path); 966 + err = sanity_check_usdt_elf(elf_fd.elf, path); 986 967 if (err) 987 968 goto err_out; 988 969 ··· 987 984 /* discover USDT in given binary, optionally limiting 988 985 * activations to a given PID, if pid > 0 989 986 */ 990 - err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, 987 + err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name, 991 988 usdt_cookie, &targets, &target_cnt); 992 989 if (err <= 0) { 993 990 err = (err == 0) ? -ENOENT : err; ··· 1010 1007 link->link.detach = &bpf_link_usdt_detach; 1011 1008 link->link.dealloc = &bpf_link_usdt_dealloc; 1012 1009 1013 - link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); 1014 - if (!link->uprobes) { 1015 - err = -ENOMEM; 1016 - goto err_out; 1010 + if (man->has_uprobe_multi) { 1011 + offsets = calloc(target_cnt, sizeof(*offsets)); 1012 + cookies = calloc(target_cnt, sizeof(*cookies)); 1013 + ref_ctr_offsets = calloc(target_cnt, sizeof(*ref_ctr_offsets)); 1014 + 1015 + if (!offsets || !ref_ctr_offsets || !cookies) { 1016 + err = -ENOMEM; 1017 + goto err_out; 1018 + } 1019 + } else { 1020 + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); 1021 + if (!link->uprobes) { 1022 + err = -ENOMEM; 1023 + goto err_out; 1024 + } 1017 1025 } 1018 1026 1019 1027 for (i = 0; i < target_cnt; i++) { ··· 1065 1051 goto err_out; 1066 1052 } 1067 1053 1068 - opts.ref_ctr_offset = target->sema_off; 1069 - opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; 1070 - uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, 1071 - target->rel_ip, &opts); 1072 - err = libbpf_get_error(uprobe_link); 1073 - if (err) { 1074 - pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", 1075 - i, usdt_provider, usdt_name, path, err); 1054 + if (man->has_uprobe_multi) { 1055 + offsets[i] = target->rel_ip; 1056 + ref_ctr_offsets[i] = target->sema_off; 1057 + cookies[i] = spec_id; 1058 + } else { 1059 + opts.ref_ctr_offset = target->sema_off; 1060 + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; 1061 + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, 1062 + target->rel_ip, &opts); 1063 + err = libbpf_get_error(uprobe_link); 1064 + if (err) { 1065 + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", 1066 + i, usdt_provider, usdt_name, path, err); 1067 + goto err_out; 1068 + } 1069 + 1070 + link->uprobes[i].link = uprobe_link; 1071 + link->uprobes[i].abs_ip = target->abs_ip; 1072 + link->uprobe_cnt++; 1073 + } 1074 + } 1075 + 1076 + if (man->has_uprobe_multi) { 1077 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts_multi, 1078 + .ref_ctr_offsets = ref_ctr_offsets, 1079 + .offsets = offsets, 1080 + .cookies = cookies, 1081 + .cnt = target_cnt, 1082 + ); 1083 + 1084 + link->multi_link = bpf_program__attach_uprobe_multi(prog, pid, path, 1085 + NULL, &opts_multi); 1086 + if (!link->multi_link) { 1087 + err = -errno; 1088 + pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %d\n", 1089 + usdt_provider, usdt_name, path, err); 1076 1090 goto err_out; 1077 1091 } 1078 1092 1079 - link->uprobes[i].link = uprobe_link; 1080 - link->uprobes[i].abs_ip = target->abs_ip; 1081 - link->uprobe_cnt++; 1093 + free(offsets); 1094 + free(ref_ctr_offsets); 1095 + free(cookies); 1082 1096 } 1083 1097 1084 1098 free(targets); 1085 1099 hashmap__free(specs_hash); 1086 - elf_end(elf); 1087 - close(fd); 1088 - 1100 + elf_close(&elf_fd); 1089 1101 return &link->link; 1090 1102 1091 1103 err_out: 1104 + free(offsets); 1105 + free(ref_ctr_offsets); 1106 + free(cookies); 1107 + 1092 1108 if (link) 1093 1109 bpf_link__destroy(&link->link); 1094 1110 free(targets); 1095 1111 hashmap__free(specs_hash); 1096 - if (elf) 1097 - elf_end(elf); 1098 - close(fd); 1112 + elf_close(&elf_fd); 1099 1113 return libbpf_err_ptr(err); 1100 1114 } 1101 1115
+1
tools/testing/selftests/bpf/.gitignore
··· 44 44 /bench 45 45 /veristat 46 46 /sign-file 47 + /uprobe_multi 47 48 *.ko 48 49 *.tmp 49 50 xskxceiver
+5
tools/testing/selftests/bpf/Makefile
··· 585 585 $(OUTPUT)/liburandom_read.so \ 586 586 $(OUTPUT)/xdp_synproxy \ 587 587 $(OUTPUT)/sign-file \ 588 + $(OUTPUT)/uprobe_multi \ 588 589 ima_setup.sh \ 589 590 verify_sig_setup.sh \ 590 591 $(wildcard progs/btf_dump_test_case_*.c) \ ··· 698 697 $(OUTPUT)/veristat: $(OUTPUT)/veristat.o 699 698 $(call msg,BINARY,,$@) 700 699 $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ 700 + 701 + $(OUTPUT)/uprobe_multi: uprobe_multi.c 702 + $(call msg,BINARY,,$@) 703 + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ 701 704 702 705 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ 703 706 prog_tests/tests.h map_tests/tests.h verifier/tests.h \
-9
tools/testing/selftests/bpf/bench.h
··· 81 81 void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, 82 82 struct basic_stats *gp_stat); 83 83 84 - static inline __u64 get_time_ns(void) 85 - { 86 - struct timespec t; 87 - 88 - clock_gettime(CLOCK_MONOTONIC, &t); 89 - 90 - return (u64)t.tv_sec * 1000000000 + t.tv_nsec; 91 - } 92 - 93 84 static inline void atomic_inc(long *value) 94 85 { 95 86 (void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);
+2
tools/testing/selftests/bpf/config
··· 16 16 CONFIG_DEBUG_INFO=y 17 17 CONFIG_DEBUG_INFO_BTF=y 18 18 CONFIG_DEBUG_INFO_DWARF4=y 19 + CONFIG_DUMMY=y 19 20 CONFIG_DYNAMIC_FTRACE=y 20 21 CONFIG_FPROBE=y 21 22 CONFIG_FTRACE_SYSCALLS=y ··· 60 59 CONFIG_NET_IPGRE_DEMUX=y 61 60 CONFIG_NET_IPIP=y 62 61 CONFIG_NET_MPLS_GSO=y 62 + CONFIG_NET_SCH_FQ=y 63 63 CONFIG_NET_SCH_INGRESS=y 64 64 CONFIG_NET_SCHED=y 65 65 CONFIG_NETDEVSIM=y
+78
tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
··· 11 11 #include <bpf/btf.h> 12 12 #include "test_bpf_cookie.skel.h" 13 13 #include "kprobe_multi.skel.h" 14 + #include "uprobe_multi.skel.h" 14 15 15 16 /* uprobe attach point */ 16 17 static noinline void trigger_func(void) ··· 240 239 bpf_link__destroy(link1); 241 240 kprobe_multi__destroy(skel); 242 241 } 242 + 243 + /* defined in prog_tests/uprobe_multi_test.c */ 244 + void uprobe_multi_func_1(void); 245 + void uprobe_multi_func_2(void); 246 + void uprobe_multi_func_3(void); 247 + 248 + static void uprobe_multi_test_run(struct uprobe_multi *skel) 249 + { 250 + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; 251 + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; 252 + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; 253 + 254 + skel->bss->pid = getpid(); 255 + skel->bss->test_cookie = true; 256 + 257 + uprobe_multi_func_1(); 258 + uprobe_multi_func_2(); 259 + uprobe_multi_func_3(); 260 + 261 + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 1, "uprobe_multi_func_1_result"); 262 + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 1, "uprobe_multi_func_2_result"); 263 + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 1, "uprobe_multi_func_3_result"); 264 + 265 + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 1, "uretprobe_multi_func_1_result"); 266 + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 1, "uretprobe_multi_func_2_result"); 267 + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 1, "uretprobe_multi_func_3_result"); 268 + } 269 + 270 + static void uprobe_multi_attach_api_subtest(void) 271 + { 272 + struct bpf_link *link1 = NULL, *link2 = NULL; 273 + struct uprobe_multi *skel = NULL; 274 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 275 + const char *syms[3] = { 276 + "uprobe_multi_func_1", 277 + "uprobe_multi_func_2", 278 + "uprobe_multi_func_3", 279 + }; 280 + __u64 cookies[3]; 281 + 282 + cookies[0] = 3; /* uprobe_multi_func_1 */ 283 + cookies[1] = 1; /* uprobe_multi_func_2 */ 284 + cookies[2] = 2; /* uprobe_multi_func_3 */ 285 + 286 + opts.syms = syms; 287 + opts.cnt = ARRAY_SIZE(syms); 288 + opts.cookies = &cookies[0]; 289 + 290 + skel = uprobe_multi__open_and_load(); 291 + if (!ASSERT_OK_PTR(skel, "uprobe_multi")) 292 + goto cleanup; 293 + 294 + link1 = bpf_program__attach_uprobe_multi(skel->progs.uprobe, -1, 295 + "/proc/self/exe", NULL, &opts); 296 + if (!ASSERT_OK_PTR(link1, "bpf_program__attach_uprobe_multi")) 297 + goto cleanup; 298 + 299 + cookies[0] = 2; /* uprobe_multi_func_1 */ 300 + cookies[1] = 3; /* uprobe_multi_func_2 */ 301 + cookies[2] = 1; /* uprobe_multi_func_3 */ 302 + 303 + opts.retprobe = true; 304 + link2 = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, -1, 305 + "/proc/self/exe", NULL, &opts); 306 + if (!ASSERT_OK_PTR(link2, "bpf_program__attach_uprobe_multi_retprobe")) 307 + goto cleanup; 308 + 309 + uprobe_multi_test_run(skel); 310 + 311 + cleanup: 312 + bpf_link__destroy(link2); 313 + bpf_link__destroy(link1); 314 + uprobe_multi__destroy(skel); 315 + } 316 + 243 317 static void uprobe_subtest(struct test_bpf_cookie *skel) 244 318 { 245 319 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); ··· 591 515 kprobe_multi_attach_api_subtest(); 592 516 if (test__start_subtest("uprobe")) 593 517 uprobe_subtest(skel); 518 + if (test__start_subtest("multi_uprobe_attach_api")) 519 + uprobe_multi_attach_api_subtest(); 594 520 if (test__start_subtest("tracepoint")) 595 521 tp_subtest(skel); 596 522 if (test__start_subtest("perf_event"))
-8
tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
··· 304 304 kprobe_multi__destroy(skel); 305 305 } 306 306 307 - static inline __u64 get_time_ns(void) 308 - { 309 - struct timespec t; 310 - 311 - clock_gettime(CLOCK_MONOTONIC, &t); 312 - return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; 313 - } 314 - 315 307 static size_t symbol_hash(long key, void *ctx __maybe_unused) 316 308 { 317 309 return str_hash((const char *) key);
+32 -1
tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
··· 5 5 #include <network_helpers.h> 6 6 7 7 #include "local_kptr_stash.skel.h" 8 + #include "local_kptr_stash_fail.skel.h" 8 9 static void test_local_kptr_stash_simple(void) 9 10 { 10 11 LIBBPF_OPTS(bpf_test_run_opts, opts, ··· 23 22 ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts); 24 23 ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); 25 24 ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval"); 25 + 26 + local_kptr_stash__destroy(skel); 27 + } 28 + 29 + static void test_local_kptr_stash_plain(void) 30 + { 31 + LIBBPF_OPTS(bpf_test_run_opts, opts, 32 + .data_in = &pkt_v4, 33 + .data_size_in = sizeof(pkt_v4), 34 + .repeat = 1, 35 + ); 36 + struct local_kptr_stash *skel; 37 + int ret; 38 + 39 + skel = local_kptr_stash__open_and_load(); 40 + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) 41 + return; 42 + 43 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_plain), &opts); 44 + ASSERT_OK(ret, "local_kptr_stash_add_plain run"); 45 + ASSERT_OK(opts.retval, "local_kptr_stash_add_plain retval"); 26 46 27 47 local_kptr_stash__destroy(skel); 28 48 } ··· 73 51 local_kptr_stash__destroy(skel); 74 52 } 75 53 76 - void test_local_kptr_stash_success(void) 54 + static void test_local_kptr_stash_fail(void) 55 + { 56 + RUN_TESTS(local_kptr_stash_fail); 57 + } 58 + 59 + void test_local_kptr_stash(void) 77 60 { 78 61 if (test__start_subtest("local_kptr_stash_simple")) 79 62 test_local_kptr_stash_simple(); 63 + if (test__start_subtest("local_kptr_stash_plain")) 64 + test_local_kptr_stash_plain(); 80 65 if (test__start_subtest("local_kptr_stash_unstash")) 81 66 test_local_kptr_stash_unstash(); 67 + if (test__start_subtest("local_kptr_stash_fail")) 68 + test_local_kptr_stash_fail(); 82 69 }
+139
tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef __LWT_HELPERS_H 4 + #define __LWT_HELPERS_H 5 + 6 + #include <time.h> 7 + #include <net/if.h> 8 + #include <linux/if_tun.h> 9 + #include <linux/icmp.h> 10 + 11 + #include "test_progs.h" 12 + 13 + #define log_err(MSG, ...) \ 14 + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 15 + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) 16 + 17 + #define RUN_TEST(name) \ 18 + ({ \ 19 + if (test__start_subtest(#name)) \ 20 + if (ASSERT_OK(netns_create(), "netns_create")) { \ 21 + struct nstoken *token = open_netns(NETNS); \ 22 + if (ASSERT_OK_PTR(token, "setns")) { \ 23 + test_ ## name(); \ 24 + close_netns(token); \ 25 + } \ 26 + netns_delete(); \ 27 + } \ 28 + }) 29 + 30 + #define NETNS "ns_lwt" 31 + 32 + static inline int netns_create(void) 33 + { 34 + return system("ip netns add " NETNS); 35 + } 36 + 37 + static inline int netns_delete(void) 38 + { 39 + return system("ip netns del " NETNS ">/dev/null 2>&1"); 40 + } 41 + 42 + static int open_tuntap(const char *dev_name, bool need_mac) 43 + { 44 + int err = 0; 45 + struct ifreq ifr; 46 + int fd = open("/dev/net/tun", O_RDWR); 47 + 48 + if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)")) 49 + return -1; 50 + 51 + ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); 52 + memcpy(ifr.ifr_name, dev_name, IFNAMSIZ); 53 + 54 + err = ioctl(fd, TUNSETIFF, &ifr); 55 + if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { 56 + close(fd); 57 + return -1; 58 + } 59 + 60 + err = fcntl(fd, F_SETFL, O_NONBLOCK); 61 + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 62 + close(fd); 63 + return -1; 64 + } 65 + 66 + return fd; 67 + } 68 + 69 + #define ICMP_PAYLOAD_SIZE 100 70 + 71 + /* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */ 72 + static int __expect_icmp_ipv4(char *buf, ssize_t len) 73 + { 74 + struct iphdr *ip = (struct iphdr *)buf; 75 + struct icmphdr *icmp = (struct icmphdr *)(ip + 1); 76 + ssize_t min_header_len = sizeof(*ip) + sizeof(*icmp); 77 + 78 + if (len < min_header_len) 79 + return -1; 80 + 81 + if (ip->protocol != IPPROTO_ICMP) 82 + return -1; 83 + 84 + if (icmp->type != ICMP_ECHO) 85 + return -1; 86 + 87 + return len == ICMP_PAYLOAD_SIZE + min_header_len; 88 + } 89 + 90 + typedef int (*filter_t) (char *, ssize_t); 91 + 92 + /* wait_for_packet - wait for a packet that matches the filter 93 + * 94 + * @fd: tun fd/packet socket to read packet 95 + * @filter: filter function, returning 1 if matches 96 + * @timeout: timeout to wait for the packet 97 + * 98 + * Returns 1 if a matching packet is read, 0 if timeout expired, -1 on error. 99 + */ 100 + static int wait_for_packet(int fd, filter_t filter, struct timeval *timeout) 101 + { 102 + char buf[4096]; 103 + int max_retry = 5; /* in case we read some spurious packets */ 104 + fd_set fds; 105 + 106 + FD_ZERO(&fds); 107 + while (max_retry--) { 108 + /* Linux modifies timeout arg... So make a copy */ 109 + struct timeval copied_timeout = *timeout; 110 + ssize_t ret = -1; 111 + 112 + FD_SET(fd, &fds); 113 + 114 + ret = select(1 + fd, &fds, NULL, NULL, &copied_timeout); 115 + if (ret <= 0) { 116 + if (errno == EINTR) 117 + continue; 118 + else if (errno == EAGAIN || ret == 0) 119 + return 0; 120 + 121 + log_err("select failed"); 122 + return -1; 123 + } 124 + 125 + ret = read(fd, buf, sizeof(buf)); 126 + 127 + if (ret <= 0) { 128 + log_err("read(dev): %ld", ret); 129 + return -1; 130 + } 131 + 132 + if (filter && filter(buf, ret) > 0) 133 + return 1; 134 + } 135 + 136 + return 0; 137 + } 138 + 139 + #endif /* __LWT_HELPERS_H */
+330
tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + 3 + /* 4 + * Test suite of lwt_xmit BPF programs that redirect packets 5 + * The file tests focus not only if these programs work as expected normally, 6 + * but also if they can handle abnormal situations gracefully. 7 + * 8 + * WARNING 9 + * ------- 10 + * This test suite may crash the kernel, thus should be run in a VM. 11 + * 12 + * Setup: 13 + * --------- 14 + * All tests are performed in a single netns. Two lwt encap routes are setup for 15 + * each subtest: 16 + * 17 + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err 18 + * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err 19 + * 20 + * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section 21 + * of this object holds a program entry to test. The BPF object is built from 22 + * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the 23 + * attachment for lwt programs are not supported by libbpf yet. 24 + * 25 + * For testing, ping commands are run in the test netns: 26 + * 27 + * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100 28 + * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100 29 + * 30 + * Scenarios: 31 + * -------------------------------- 32 + * 1. Redirect to a running tap/tun device 33 + * 2. Redirect to a down tap/tun device 34 + * 3. Redirect to a vlan device with lower layer down 35 + * 36 + * Case 1, ping packets should be received by packet socket on target device 37 + * when redirected to ingress, and by tun/tap fd when redirected to egress. 38 + * 39 + * Case 2,3 are considered successful as long as they do not crash the kernel 40 + * as a regression. 41 + * 42 + * Case 1,2 use tap device to test redirect to device that requires MAC 43 + * header, and tun device to test the case with no MAC header added. 44 + */ 45 + #include <sys/socket.h> 46 + #include <net/if.h> 47 + #include <linux/if_ether.h> 48 + #include <linux/if_packet.h> 49 + #include <linux/if_tun.h> 50 + #include <linux/icmp.h> 51 + #include <arpa/inet.h> 52 + #include <unistd.h> 53 + #include <errno.h> 54 + #include <stdbool.h> 55 + #include <stdlib.h> 56 + 57 + #include "lwt_helpers.h" 58 + #include "test_progs.h" 59 + #include "network_helpers.h" 60 + 61 + #define BPF_OBJECT "test_lwt_redirect.bpf.o" 62 + #define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac") 63 + #define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac") 64 + #define LOCAL_SRC "10.0.0.1" 65 + #define CIDR_TO_INGRESS "10.0.0.0/24" 66 + #define CIDR_TO_EGRESS "20.0.0.0/24" 67 + 68 + /* ping to redirect toward given dev, with last byte of dest IP being the target 69 + * device index. 70 + * 71 + * Note: ping command inside BPF-CI is busybox version, so it does not have certain 72 + * function, such like -m option to set packet mark. 73 + */ 74 + static void ping_dev(const char *dev, bool is_ingress) 75 + { 76 + int link_index = if_nametoindex(dev); 77 + char ip[256]; 78 + 79 + if (!ASSERT_GE(link_index, 0, "if_nametoindex")) 80 + return; 81 + 82 + if (is_ingress) 83 + snprintf(ip, sizeof(ip), "10.0.0.%d", link_index); 84 + else 85 + snprintf(ip, sizeof(ip), "20.0.0.%d", link_index); 86 + 87 + /* We won't get a reply. Don't fail here */ 88 + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", 89 + ip, ICMP_PAYLOAD_SIZE); 90 + } 91 + 92 + static int new_packet_sock(const char *ifname) 93 + { 94 + int err = 0; 95 + int ignore_outgoing = 1; 96 + int ifindex = -1; 97 + int s = -1; 98 + 99 + s = socket(AF_PACKET, SOCK_RAW, 0); 100 + if (!ASSERT_GE(s, 0, "socket(AF_PACKET)")) 101 + return -1; 102 + 103 + ifindex = if_nametoindex(ifname); 104 + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) { 105 + close(s); 106 + return -1; 107 + } 108 + 109 + struct sockaddr_ll addr = { 110 + .sll_family = AF_PACKET, 111 + .sll_protocol = htons(ETH_P_IP), 112 + .sll_ifindex = ifindex, 113 + }; 114 + 115 + err = bind(s, (struct sockaddr *)&addr, sizeof(addr)); 116 + if (!ASSERT_OK(err, "bind(AF_PACKET)")) { 117 + close(s); 118 + return -1; 119 + } 120 + 121 + /* Use packet socket to capture only the ingress, so we can distinguish 122 + * the case where a regression that actually redirects the packet to 123 + * the egress. 124 + */ 125 + err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING, 126 + &ignore_outgoing, sizeof(ignore_outgoing)); 127 + if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) { 128 + close(s); 129 + return -1; 130 + } 131 + 132 + err = fcntl(s, F_SETFL, O_NONBLOCK); 133 + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 134 + close(s); 135 + return -1; 136 + } 137 + 138 + return s; 139 + } 140 + 141 + static int expect_icmp(char *buf, ssize_t len) 142 + { 143 + struct ethhdr *eth = (struct ethhdr *)buf; 144 + 145 + if (len < (ssize_t)sizeof(*eth)) 146 + return -1; 147 + 148 + if (eth->h_proto == htons(ETH_P_IP)) 149 + return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth)); 150 + 151 + return -1; 152 + } 153 + 154 + static int expect_icmp_nomac(char *buf, ssize_t len) 155 + { 156 + return __expect_icmp_ipv4(buf, len); 157 + } 158 + 159 + static void send_and_capture_test_packets(const char *test_name, int tap_fd, 160 + const char *target_dev, bool need_mac) 161 + { 162 + int psock = -1; 163 + struct timeval timeo = { 164 + .tv_sec = 0, 165 + .tv_usec = 250000, 166 + }; 167 + int ret = -1; 168 + 169 + filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac; 170 + 171 + ping_dev(target_dev, false); 172 + 173 + ret = wait_for_packet(tap_fd, filter, &timeo); 174 + if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) { 175 + log_err("%s egress test fails", test_name); 176 + goto out; 177 + } 178 + 179 + psock = new_packet_sock(target_dev); 180 + ping_dev(target_dev, true); 181 + 182 + ret = wait_for_packet(psock, filter, &timeo); 183 + if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) { 184 + log_err("%s ingress test fails", test_name); 185 + goto out; 186 + } 187 + 188 + out: 189 + if (psock >= 0) 190 + close(psock); 191 + } 192 + 193 + static int setup_redirect_target(const char *target_dev, bool need_mac) 194 + { 195 + int target_index = -1; 196 + int tap_fd = -1; 197 + 198 + tap_fd = open_tuntap(target_dev, need_mac); 199 + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) 200 + goto fail; 201 + 202 + target_index = if_nametoindex(target_dev); 203 + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) 204 + goto fail; 205 + 206 + SYS(fail, "ip link add link_err type dummy"); 207 + SYS(fail, "ip link set lo up"); 208 + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); 209 + SYS(fail, "ip link set link_err up"); 210 + SYS(fail, "ip link set %s up", target_dev); 211 + 212 + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", 213 + CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac)); 214 + 215 + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", 216 + CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac)); 217 + 218 + return tap_fd; 219 + 220 + fail: 221 + if (tap_fd >= 0) 222 + close(tap_fd); 223 + return -1; 224 + } 225 + 226 + static void test_lwt_redirect_normal(void) 227 + { 228 + const char *target_dev = "tap0"; 229 + int tap_fd = -1; 230 + bool need_mac = true; 231 + 232 + tap_fd = setup_redirect_target(target_dev, need_mac); 233 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 234 + return; 235 + 236 + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); 237 + close(tap_fd); 238 + } 239 + 240 + static void test_lwt_redirect_normal_nomac(void) 241 + { 242 + const char *target_dev = "tun0"; 243 + int tap_fd = -1; 244 + bool need_mac = false; 245 + 246 + tap_fd = setup_redirect_target(target_dev, need_mac); 247 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 248 + return; 249 + 250 + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); 251 + close(tap_fd); 252 + } 253 + 254 + /* This test aims to prevent regression of future. As long as the kernel does 255 + * not panic, it is considered as success. 256 + */ 257 + static void __test_lwt_redirect_dev_down(bool need_mac) 258 + { 259 + const char *target_dev = "tap0"; 260 + int tap_fd = -1; 261 + 262 + tap_fd = setup_redirect_target(target_dev, need_mac); 263 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 264 + return; 265 + 266 + SYS(out, "ip link set %s down", target_dev); 267 + ping_dev(target_dev, true); 268 + ping_dev(target_dev, false); 269 + 270 + out: 271 + close(tap_fd); 272 + } 273 + 274 + static void test_lwt_redirect_dev_down(void) 275 + { 276 + __test_lwt_redirect_dev_down(true); 277 + } 278 + 279 + static void test_lwt_redirect_dev_down_nomac(void) 280 + { 281 + __test_lwt_redirect_dev_down(false); 282 + } 283 + 284 + /* This test aims to prevent regression of future. As long as the kernel does 285 + * not panic, it is considered as success. 286 + */ 287 + static void test_lwt_redirect_dev_carrier_down(void) 288 + { 289 + const char *lower_dev = "tap0"; 290 + const char *vlan_dev = "vlan100"; 291 + int tap_fd = -1; 292 + 293 + tap_fd = setup_redirect_target(lower_dev, true); 294 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 295 + return; 296 + 297 + SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev); 298 + SYS(out, "ip link set %s up", vlan_dev); 299 + SYS(out, "ip link set %s down", lower_dev); 300 + ping_dev(vlan_dev, true); 301 + ping_dev(vlan_dev, false); 302 + 303 + out: 304 + close(tap_fd); 305 + } 306 + 307 + static void *test_lwt_redirect_run(void *arg) 308 + { 309 + netns_delete(); 310 + RUN_TEST(lwt_redirect_normal); 311 + RUN_TEST(lwt_redirect_normal_nomac); 312 + RUN_TEST(lwt_redirect_dev_down); 313 + RUN_TEST(lwt_redirect_dev_down_nomac); 314 + RUN_TEST(lwt_redirect_dev_carrier_down); 315 + return NULL; 316 + } 317 + 318 + void test_lwt_redirect(void) 319 + { 320 + pthread_t test_thread; 321 + int err; 322 + 323 + /* Run the tests in their own thread to isolate the namespace changes 324 + * so they do not affect the environment of other tests. 325 + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 326 + */ 327 + err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL); 328 + if (ASSERT_OK(err, "pthread_create")) 329 + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 330 + }
+262
tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + 3 + /* 4 + * Test suite of lwt BPF programs that reroutes packets 5 + * The file tests focus not only if these programs work as expected normally, 6 + * but also if they can handle abnormal situations gracefully. This test 7 + * suite currently only covers lwt_xmit hook. lwt_in tests have not been 8 + * implemented. 9 + * 10 + * WARNING 11 + * ------- 12 + * This test suite can crash the kernel, thus should be run in a VM. 13 + * 14 + * Setup: 15 + * --------- 16 + * all tests are performed in a single netns. A lwt encap route is setup for 17 + * each subtest: 18 + * 19 + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err 20 + * 21 + * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains 22 + * a single test program entry. This program sets packet mark by last byte of 23 + * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb 24 + * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped 25 + * to avoid route loop. We didn't use generated BPF skeleton since the 26 + * attachment for lwt programs are not supported by libbpf yet. 27 + * 28 + * The test program will bring up a tun device, and sets up the following 29 + * routes: 30 + * 31 + * ip rule add pref 100 from all fwmark <tun_index> lookup 100 32 + * ip route add table 100 default dev tun0 33 + * 34 + * For normal testing, a ping command is running in the test netns: 35 + * 36 + * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100 37 + * 38 + * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP 39 + * socket will try to overflow the fq queue and trigger qdisc drop error. 40 + * 41 + * Scenarios: 42 + * -------------------------------- 43 + * 1. Reroute to a running tun device 44 + * 2. Reroute to a device where qdisc drop 45 + * 46 + * For case 1, ping packets should be received by the tun device. 47 + * 48 + * For case 2, force UDP packets to overflow fq limit. As long as kernel 49 + * is not crashed, it is considered successful. 50 + */ 51 + #include "lwt_helpers.h" 52 + #include "network_helpers.h" 53 + #include <linux/net_tstamp.h> 54 + 55 + #define BPF_OBJECT "test_lwt_reroute.bpf.o" 56 + #define LOCAL_SRC "10.0.0.1" 57 + #define TEST_CIDR "10.0.0.0/24" 58 + #define XMIT_HOOK "xmit" 59 + #define XMIT_SECTION "lwt_xmit" 60 + #define NSEC_PER_SEC 1000000000ULL 61 + 62 + /* send a ping to be rerouted to the target device */ 63 + static void ping_once(const char *ip) 64 + { 65 + /* We won't get a reply. Don't fail here */ 66 + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", 67 + ip, ICMP_PAYLOAD_SIZE); 68 + } 69 + 70 + /* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop 71 + * error. This is done via TX tstamp to force buffering delayed packets. 72 + */ 73 + static int overflow_fq(int snd_target, const char *target_ip) 74 + { 75 + struct sockaddr_in addr = { 76 + .sin_family = AF_INET, 77 + .sin_port = htons(1234), 78 + }; 79 + 80 + char data_buf[8]; /* only #pkts matter, so use a random small buffer */ 81 + char control_buf[CMSG_SPACE(sizeof(uint64_t))]; 82 + struct iovec iov = { 83 + .iov_base = data_buf, 84 + .iov_len = sizeof(data_buf), 85 + }; 86 + int err = -1; 87 + int s = -1; 88 + struct sock_txtime txtime_on = { 89 + .clockid = CLOCK_MONOTONIC, 90 + .flags = 0, 91 + }; 92 + struct msghdr msg = { 93 + .msg_name = &addr, 94 + .msg_namelen = sizeof(addr), 95 + .msg_control = control_buf, 96 + .msg_controllen = sizeof(control_buf), 97 + .msg_iovlen = 1, 98 + .msg_iov = &iov, 99 + }; 100 + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 101 + 102 + memset(data_buf, 0, sizeof(data_buf)); 103 + 104 + s = socket(AF_INET, SOCK_DGRAM, 0); 105 + if (!ASSERT_GE(s, 0, "socket")) 106 + goto out; 107 + 108 + err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on)); 109 + if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)")) 110 + goto out; 111 + 112 + err = inet_pton(AF_INET, target_ip, &addr.sin_addr); 113 + if (!ASSERT_EQ(err, 1, "inet_pton")) 114 + goto out; 115 + 116 + while (snd_target > 0) { 117 + struct timespec now; 118 + 119 + memset(control_buf, 0, sizeof(control_buf)); 120 + cmsg->cmsg_type = SCM_TXTIME; 121 + cmsg->cmsg_level = SOL_SOCKET; 122 + cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t)); 123 + 124 + err = clock_gettime(CLOCK_MONOTONIC, &now); 125 + if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) { 126 + err = -1; 127 + goto out; 128 + } 129 + 130 + *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC + 131 + now.tv_nsec; 132 + 133 + /* we will intentionally send more than fq limit, so ignore 134 + * the error here. 135 + */ 136 + sendmsg(s, &msg, MSG_NOSIGNAL); 137 + snd_target--; 138 + } 139 + 140 + /* no kernel crash so far is considered success */ 141 + err = 0; 142 + 143 + out: 144 + if (s >= 0) 145 + close(s); 146 + 147 + return err; 148 + } 149 + 150 + static int setup(const char *tun_dev) 151 + { 152 + int target_index = -1; 153 + int tap_fd = -1; 154 + 155 + tap_fd = open_tuntap(tun_dev, false); 156 + if (!ASSERT_GE(tap_fd, 0, "open_tun")) 157 + return -1; 158 + 159 + target_index = if_nametoindex(tun_dev); 160 + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) 161 + return -1; 162 + 163 + SYS(fail, "ip link add link_err type dummy"); 164 + SYS(fail, "ip link set lo up"); 165 + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); 166 + SYS(fail, "ip link set link_err up"); 167 + SYS(fail, "ip link set %s up", tun_dev); 168 + 169 + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit", 170 + TEST_CIDR, BPF_OBJECT); 171 + 172 + SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100", 173 + target_index); 174 + SYS(fail, "ip route add t 100 default dev %s", tun_dev); 175 + 176 + return tap_fd; 177 + 178 + fail: 179 + if (tap_fd >= 0) 180 + close(tap_fd); 181 + return -1; 182 + } 183 + 184 + static void test_lwt_reroute_normal_xmit(void) 185 + { 186 + const char *tun_dev = "tun0"; 187 + int tun_fd = -1; 188 + int ifindex = -1; 189 + char ip[256]; 190 + struct timeval timeo = { 191 + .tv_sec = 0, 192 + .tv_usec = 250000, 193 + }; 194 + 195 + tun_fd = setup(tun_dev); 196 + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) 197 + return; 198 + 199 + ifindex = if_nametoindex(tun_dev); 200 + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) 201 + return; 202 + 203 + snprintf(ip, 256, "10.0.0.%d", ifindex); 204 + 205 + /* ping packets should be received by the tun device */ 206 + ping_once(ip); 207 + 208 + if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1, 209 + "wait_for_packet")) 210 + log_err("%s xmit", __func__); 211 + } 212 + 213 + /* 214 + * Test the failure case when the skb is dropped at the qdisc. This is a 215 + * regression prevention at the xmit hook only. 216 + */ 217 + static void test_lwt_reroute_qdisc_dropped(void) 218 + { 219 + const char *tun_dev = "tun0"; 220 + int tun_fd = -1; 221 + int ifindex = -1; 222 + char ip[256]; 223 + 224 + tun_fd = setup(tun_dev); 225 + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) 226 + goto fail; 227 + 228 + SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev); 229 + 230 + ifindex = if_nametoindex(tun_dev); 231 + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) 232 + return; 233 + 234 + snprintf(ip, 256, "10.0.0.%d", ifindex); 235 + ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq"); 236 + 237 + fail: 238 + if (tun_fd >= 0) 239 + close(tun_fd); 240 + } 241 + 242 + static void *test_lwt_reroute_run(void *arg) 243 + { 244 + netns_delete(); 245 + RUN_TEST(lwt_reroute_normal_xmit); 246 + RUN_TEST(lwt_reroute_qdisc_dropped); 247 + return NULL; 248 + } 249 + 250 + void test_lwt_reroute(void) 251 + { 252 + pthread_t test_thread; 253 + int err; 254 + 255 + /* Run the tests in their own thread to isolate the namespace changes 256 + * so they do not affect the environment of other tests. 257 + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 258 + */ 259 + err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL); 260 + if (ASSERT_OK(err, "pthread_create")) 261 + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 262 + }
+26
tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
··· 9 9 10 10 void test_refcounted_kptr(void) 11 11 { 12 + RUN_TESTS(refcounted_kptr); 12 13 } 13 14 14 15 void test_refcounted_kptr_fail(void) 15 16 { 17 + RUN_TESTS(refcounted_kptr_fail); 16 18 } 17 19 18 20 void test_refcounted_kptr_wrong_owner(void) 19 21 { 22 + LIBBPF_OPTS(bpf_test_run_opts, opts, 23 + .data_in = &pkt_v4, 24 + .data_size_in = sizeof(pkt_v4), 25 + .repeat = 1, 26 + ); 27 + struct refcounted_kptr *skel; 28 + int ret; 29 + 30 + skel = refcounted_kptr__open_and_load(); 31 + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) 32 + return; 33 + 34 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a1), &opts); 35 + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a1"); 36 + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a1 retval"); 37 + 38 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_b), &opts); 39 + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_b"); 40 + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_b retval"); 41 + 42 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a2), &opts); 43 + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a2"); 44 + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval"); 45 + refcounted_kptr__destroy(skel); 20 46 }
+2
tools/testing/selftests/bpf/prog_tests/task_kfunc.c
··· 79 79 "test_task_from_pid_current", 80 80 "test_task_from_pid_invalid", 81 81 "task_kfunc_acquire_trusted_walked", 82 + "test_task_kfunc_flavor_relo", 83 + "test_task_kfunc_flavor_relo_not_found", 82 84 }; 83 85 84 86 void test_task_kfunc(void)
+35 -1
tools/testing/selftests/bpf/prog_tests/tc_bpf.c
··· 3 3 #include <test_progs.h> 4 4 #include <linux/pkt_cls.h> 5 5 6 + #include "cap_helpers.h" 6 7 #include "test_tc_bpf.skel.h" 7 8 8 9 #define LO_IFINDEX 1 ··· 328 327 return 0; 329 328 } 330 329 331 - void test_tc_bpf(void) 330 + void tc_bpf_root(void) 332 331 { 333 332 DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, 334 333 .attach_point = BPF_TC_INGRESS); ··· 393 392 bpf_tc_hook_destroy(&hook); 394 393 } 395 394 test_tc_bpf__destroy(skel); 395 + } 396 + 397 + void tc_bpf_non_root(void) 398 + { 399 + struct test_tc_bpf *skel = NULL; 400 + __u64 caps = 0; 401 + int ret; 402 + 403 + /* In case CAP_BPF and CAP_PERFMON is not set */ 404 + ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps); 405 + if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin")) 406 + return; 407 + ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL); 408 + if (!ASSERT_OK(ret, "disable_cap_sys_admin")) 409 + goto restore_cap; 410 + 411 + skel = test_tc_bpf__open_and_load(); 412 + if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load")) 413 + goto restore_cap; 414 + 415 + test_tc_bpf__destroy(skel); 416 + 417 + restore_cap: 418 + if (caps) 419 + cap_enable_effective(caps, NULL); 420 + } 421 + 422 + void test_tc_bpf(void) 423 + { 424 + if (test__start_subtest("tc_bpf_root")) 425 + tc_bpf_root(); 426 + if (test__start_subtest("tc_bpf_non_root")) 427 + tc_bpf_non_root(); 396 428 }
+415
tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <unistd.h> 4 + #include <test_progs.h> 5 + #include "uprobe_multi.skel.h" 6 + #include "uprobe_multi_bench.skel.h" 7 + #include "uprobe_multi_usdt.skel.h" 8 + #include "bpf/libbpf_internal.h" 9 + #include "testing_helpers.h" 10 + 11 + static char test_data[] = "test_data"; 12 + 13 + noinline void uprobe_multi_func_1(void) 14 + { 15 + asm volatile (""); 16 + } 17 + 18 + noinline void uprobe_multi_func_2(void) 19 + { 20 + asm volatile (""); 21 + } 22 + 23 + noinline void uprobe_multi_func_3(void) 24 + { 25 + asm volatile (""); 26 + } 27 + 28 + struct child { 29 + int go[2]; 30 + int pid; 31 + }; 32 + 33 + static void release_child(struct child *child) 34 + { 35 + int child_status; 36 + 37 + if (!child) 38 + return; 39 + close(child->go[1]); 40 + close(child->go[0]); 41 + if (child->pid > 0) 42 + waitpid(child->pid, &child_status, 0); 43 + } 44 + 45 + static void kick_child(struct child *child) 46 + { 47 + char c = 1; 48 + 49 + if (child) { 50 + write(child->go[1], &c, 1); 51 + release_child(child); 52 + } 53 + fflush(NULL); 54 + } 55 + 56 + static struct child *spawn_child(void) 57 + { 58 + static struct child child; 59 + int err; 60 + int c; 61 + 62 + /* pipe to notify child to execute the trigger functions */ 63 + if (pipe(child.go)) 64 + return NULL; 65 + 66 + child.pid = fork(); 67 + if (child.pid < 0) { 68 + release_child(&child); 69 + errno = EINVAL; 70 + return NULL; 71 + } 72 + 73 + /* child */ 74 + if (child.pid == 0) { 75 + close(child.go[1]); 76 + 77 + /* wait for parent's kick */ 78 + err = read(child.go[0], &c, 1); 79 + if (err != 1) 80 + exit(err); 81 + 82 + uprobe_multi_func_1(); 83 + uprobe_multi_func_2(); 84 + uprobe_multi_func_3(); 85 + 86 + exit(errno); 87 + } 88 + 89 + return &child; 90 + } 91 + 92 + static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child) 93 + { 94 + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; 95 + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; 96 + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; 97 + 98 + skel->bss->user_ptr = test_data; 99 + 100 + /* 101 + * Disable pid check in bpf program if we are pid filter test, 102 + * because the probe should be executed only by child->pid 103 + * passed at the probe attach. 104 + */ 105 + skel->bss->pid = child ? 0 : getpid(); 106 + 107 + if (child) 108 + kick_child(child); 109 + 110 + /* trigger all probes */ 111 + uprobe_multi_func_1(); 112 + uprobe_multi_func_2(); 113 + uprobe_multi_func_3(); 114 + 115 + /* 116 + * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123] 117 + * function and each slepable probe (6) increments uprobe_multi_sleep_result. 118 + */ 119 + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 2, "uprobe_multi_func_1_result"); 120 + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 2, "uprobe_multi_func_2_result"); 121 + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 2, "uprobe_multi_func_3_result"); 122 + 123 + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 2, "uretprobe_multi_func_1_result"); 124 + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 2, "uretprobe_multi_func_2_result"); 125 + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 2, "uretprobe_multi_func_3_result"); 126 + 127 + ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result"); 128 + 129 + if (child) 130 + ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid"); 131 + } 132 + 133 + static void test_skel_api(void) 134 + { 135 + struct uprobe_multi *skel = NULL; 136 + int err; 137 + 138 + skel = uprobe_multi__open_and_load(); 139 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) 140 + goto cleanup; 141 + 142 + err = uprobe_multi__attach(skel); 143 + if (!ASSERT_OK(err, "uprobe_multi__attach")) 144 + goto cleanup; 145 + 146 + uprobe_multi_test_run(skel, NULL); 147 + 148 + cleanup: 149 + uprobe_multi__destroy(skel); 150 + } 151 + 152 + static void 153 + __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts, 154 + struct child *child) 155 + { 156 + pid_t pid = child ? child->pid : -1; 157 + struct uprobe_multi *skel = NULL; 158 + 159 + skel = uprobe_multi__open_and_load(); 160 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) 161 + goto cleanup; 162 + 163 + opts->retprobe = false; 164 + skel->links.uprobe = bpf_program__attach_uprobe_multi(skel->progs.uprobe, pid, 165 + binary, pattern, opts); 166 + if (!ASSERT_OK_PTR(skel->links.uprobe, "bpf_program__attach_uprobe_multi")) 167 + goto cleanup; 168 + 169 + opts->retprobe = true; 170 + skel->links.uretprobe = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, pid, 171 + binary, pattern, opts); 172 + if (!ASSERT_OK_PTR(skel->links.uretprobe, "bpf_program__attach_uprobe_multi")) 173 + goto cleanup; 174 + 175 + opts->retprobe = false; 176 + skel->links.uprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uprobe_sleep, pid, 177 + binary, pattern, opts); 178 + if (!ASSERT_OK_PTR(skel->links.uprobe_sleep, "bpf_program__attach_uprobe_multi")) 179 + goto cleanup; 180 + 181 + opts->retprobe = true; 182 + skel->links.uretprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uretprobe_sleep, 183 + pid, binary, pattern, opts); 184 + if (!ASSERT_OK_PTR(skel->links.uretprobe_sleep, "bpf_program__attach_uprobe_multi")) 185 + goto cleanup; 186 + 187 + opts->retprobe = false; 188 + skel->links.uprobe_extra = bpf_program__attach_uprobe_multi(skel->progs.uprobe_extra, -1, 189 + binary, pattern, opts); 190 + if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi")) 191 + goto cleanup; 192 + 193 + uprobe_multi_test_run(skel, child); 194 + 195 + cleanup: 196 + uprobe_multi__destroy(skel); 197 + } 198 + 199 + static void 200 + test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts) 201 + { 202 + struct child *child; 203 + 204 + /* no pid filter */ 205 + __test_attach_api(binary, pattern, opts, NULL); 206 + 207 + /* pid filter */ 208 + child = spawn_child(); 209 + if (!ASSERT_OK_PTR(child, "spawn_child")) 210 + return; 211 + 212 + __test_attach_api(binary, pattern, opts, child); 213 + } 214 + 215 + static void test_attach_api_pattern(void) 216 + { 217 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 218 + 219 + test_attach_api("/proc/self/exe", "uprobe_multi_func_*", &opts); 220 + test_attach_api("/proc/self/exe", "uprobe_multi_func_?", &opts); 221 + } 222 + 223 + static void test_attach_api_syms(void) 224 + { 225 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 226 + const char *syms[3] = { 227 + "uprobe_multi_func_1", 228 + "uprobe_multi_func_2", 229 + "uprobe_multi_func_3", 230 + }; 231 + 232 + opts.syms = syms; 233 + opts.cnt = ARRAY_SIZE(syms); 234 + test_attach_api("/proc/self/exe", NULL, &opts); 235 + } 236 + 237 + static void __test_link_api(struct child *child) 238 + { 239 + int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1; 240 + LIBBPF_OPTS(bpf_link_create_opts, opts); 241 + const char *path = "/proc/self/exe"; 242 + struct uprobe_multi *skel = NULL; 243 + unsigned long *offsets = NULL; 244 + const char *syms[3] = { 245 + "uprobe_multi_func_1", 246 + "uprobe_multi_func_2", 247 + "uprobe_multi_func_3", 248 + }; 249 + int link_extra_fd = -1; 250 + int err; 251 + 252 + err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets); 253 + if (!ASSERT_OK(err, "elf_resolve_syms_offsets")) 254 + return; 255 + 256 + opts.uprobe_multi.path = path; 257 + opts.uprobe_multi.offsets = offsets; 258 + opts.uprobe_multi.cnt = ARRAY_SIZE(syms); 259 + opts.uprobe_multi.pid = child ? child->pid : 0; 260 + 261 + skel = uprobe_multi__open_and_load(); 262 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) 263 + goto cleanup; 264 + 265 + opts.kprobe_multi.flags = 0; 266 + prog_fd = bpf_program__fd(skel->progs.uprobe); 267 + link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 268 + if (!ASSERT_GE(link1_fd, 0, "link1_fd")) 269 + goto cleanup; 270 + 271 + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; 272 + prog_fd = bpf_program__fd(skel->progs.uretprobe); 273 + link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 274 + if (!ASSERT_GE(link2_fd, 0, "link2_fd")) 275 + goto cleanup; 276 + 277 + opts.kprobe_multi.flags = 0; 278 + prog_fd = bpf_program__fd(skel->progs.uprobe_sleep); 279 + link3_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 280 + if (!ASSERT_GE(link3_fd, 0, "link3_fd")) 281 + goto cleanup; 282 + 283 + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; 284 + prog_fd = bpf_program__fd(skel->progs.uretprobe_sleep); 285 + link4_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 286 + if (!ASSERT_GE(link4_fd, 0, "link4_fd")) 287 + goto cleanup; 288 + 289 + opts.kprobe_multi.flags = 0; 290 + opts.uprobe_multi.pid = 0; 291 + prog_fd = bpf_program__fd(skel->progs.uprobe_extra); 292 + link_extra_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 293 + if (!ASSERT_GE(link_extra_fd, 0, "link_extra_fd")) 294 + goto cleanup; 295 + 296 + uprobe_multi_test_run(skel, child); 297 + 298 + cleanup: 299 + if (link1_fd >= 0) 300 + close(link1_fd); 301 + if (link2_fd >= 0) 302 + close(link2_fd); 303 + if (link3_fd >= 0) 304 + close(link3_fd); 305 + if (link4_fd >= 0) 306 + close(link4_fd); 307 + if (link_extra_fd >= 0) 308 + close(link_extra_fd); 309 + 310 + uprobe_multi__destroy(skel); 311 + free(offsets); 312 + } 313 + 314 + void test_link_api(void) 315 + { 316 + struct child *child; 317 + 318 + /* no pid filter */ 319 + __test_link_api(NULL); 320 + 321 + /* pid filter */ 322 + child = spawn_child(); 323 + if (!ASSERT_OK_PTR(child, "spawn_child")) 324 + return; 325 + 326 + __test_link_api(child); 327 + } 328 + 329 + static void test_bench_attach_uprobe(void) 330 + { 331 + long attach_start_ns = 0, attach_end_ns = 0; 332 + struct uprobe_multi_bench *skel = NULL; 333 + long detach_start_ns, detach_end_ns; 334 + double attach_delta, detach_delta; 335 + int err; 336 + 337 + skel = uprobe_multi_bench__open_and_load(); 338 + if (!ASSERT_OK_PTR(skel, "uprobe_multi_bench__open_and_load")) 339 + goto cleanup; 340 + 341 + attach_start_ns = get_time_ns(); 342 + 343 + err = uprobe_multi_bench__attach(skel); 344 + if (!ASSERT_OK(err, "uprobe_multi_bench__attach")) 345 + goto cleanup; 346 + 347 + attach_end_ns = get_time_ns(); 348 + 349 + system("./uprobe_multi bench"); 350 + 351 + ASSERT_EQ(skel->bss->count, 50000, "uprobes_count"); 352 + 353 + cleanup: 354 + detach_start_ns = get_time_ns(); 355 + uprobe_multi_bench__destroy(skel); 356 + detach_end_ns = get_time_ns(); 357 + 358 + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; 359 + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; 360 + 361 + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); 362 + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); 363 + } 364 + 365 + static void test_bench_attach_usdt(void) 366 + { 367 + long attach_start_ns = 0, attach_end_ns = 0; 368 + struct uprobe_multi_usdt *skel = NULL; 369 + long detach_start_ns, detach_end_ns; 370 + double attach_delta, detach_delta; 371 + 372 + skel = uprobe_multi_usdt__open_and_load(); 373 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open")) 374 + goto cleanup; 375 + 376 + attach_start_ns = get_time_ns(); 377 + 378 + skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, -1, "./uprobe_multi", 379 + "test", "usdt", NULL); 380 + if (!ASSERT_OK_PTR(skel->links.usdt0, "bpf_program__attach_usdt")) 381 + goto cleanup; 382 + 383 + attach_end_ns = get_time_ns(); 384 + 385 + system("./uprobe_multi usdt"); 386 + 387 + ASSERT_EQ(skel->bss->count, 50000, "usdt_count"); 388 + 389 + cleanup: 390 + detach_start_ns = get_time_ns(); 391 + uprobe_multi_usdt__destroy(skel); 392 + detach_end_ns = get_time_ns(); 393 + 394 + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; 395 + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; 396 + 397 + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); 398 + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); 399 + } 400 + 401 + void test_uprobe_multi_test(void) 402 + { 403 + if (test__start_subtest("skel_api")) 404 + test_skel_api(); 405 + if (test__start_subtest("attach_api_pattern")) 406 + test_attach_api_pattern(); 407 + if (test__start_subtest("attach_api_syms")) 408 + test_attach_api_syms(); 409 + if (test__start_subtest("link_api")) 410 + test_link_api(); 411 + if (test__start_subtest("bench_uprobe")) 412 + test_bench_attach_uprobe(); 413 + if (test__start_subtest("bench_usdt")) 414 + test_bench_attach_usdt(); 415 + }
+28
tools/testing/selftests/bpf/progs/local_kptr_stash.c
··· 14 14 struct bpf_rb_node node; 15 15 }; 16 16 17 + struct plain_local { 18 + long key; 19 + long data; 20 + }; 21 + 17 22 struct map_value { 18 23 struct prog_test_ref_kfunc *not_kptr; 19 24 struct prog_test_ref_kfunc __kptr *val; 20 25 struct node_data __kptr *node; 26 + struct plain_local __kptr *plain; 21 27 }; 22 28 23 29 /* This is necessary so that LLVM generates BTF for node_data struct ··· 70 64 long stash_rb_nodes(void *ctx) 71 65 { 72 66 return create_and_stash(0, 41) ?: create_and_stash(1, 42); 67 + } 68 + 69 + SEC("tc") 70 + long stash_plain(void *ctx) 71 + { 72 + struct map_value *mapval; 73 + struct plain_local *res; 74 + int idx = 0; 75 + 76 + mapval = bpf_map_lookup_elem(&some_nodes, &idx); 77 + if (!mapval) 78 + return 1; 79 + 80 + res = bpf_obj_new(typeof(*res)); 81 + if (!res) 82 + return 1; 83 + res->key = 41; 84 + 85 + res = bpf_kptr_xchg(&mapval->plain, res); 86 + if (res) 87 + bpf_obj_drop(res); 88 + return 0; 73 89 } 74 90 75 91 SEC("tc")
+85
tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <vmlinux.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + #include <bpf/bpf_core_read.h> 8 + #include "../bpf_experimental.h" 9 + #include "bpf_misc.h" 10 + 11 + struct node_data { 12 + long key; 13 + long data; 14 + struct bpf_rb_node node; 15 + }; 16 + 17 + struct map_value { 18 + struct node_data __kptr *node; 19 + }; 20 + 21 + struct node_data2 { 22 + long key[4]; 23 + }; 24 + 25 + /* This is necessary so that LLVM generates BTF for node_data struct 26 + * If it's not included, a fwd reference for node_data will be generated but 27 + * no struct. Example BTF of "node" field in map_value when not included: 28 + * 29 + * [10] PTR '(anon)' type_id=35 30 + * [34] FWD 'node_data' fwd_kind=struct 31 + * [35] TYPE_TAG 'kptr_ref' type_id=34 32 + */ 33 + struct node_data *just_here_because_btf_bug; 34 + 35 + struct { 36 + __uint(type, BPF_MAP_TYPE_ARRAY); 37 + __type(key, int); 38 + __type(value, struct map_value); 39 + __uint(max_entries, 2); 40 + } some_nodes SEC(".maps"); 41 + 42 + SEC("tc") 43 + __failure __msg("invalid kptr access, R2 type=ptr_node_data2 expected=ptr_node_data") 44 + long stash_rb_nodes(void *ctx) 45 + { 46 + struct map_value *mapval; 47 + struct node_data2 *res; 48 + int idx = 0; 49 + 50 + mapval = bpf_map_lookup_elem(&some_nodes, &idx); 51 + if (!mapval) 52 + return 1; 53 + 54 + res = bpf_obj_new(typeof(*res)); 55 + if (!res) 56 + return 1; 57 + res->key[0] = 40; 58 + 59 + res = bpf_kptr_xchg(&mapval->node, res); 60 + if (res) 61 + bpf_obj_drop(res); 62 + return 0; 63 + } 64 + 65 + SEC("tc") 66 + __failure __msg("R1 must have zero offset when passed to release func") 67 + long drop_rb_node_off(void *ctx) 68 + { 69 + struct map_value *mapval; 70 + struct node_data *res; 71 + int idx = 0; 72 + 73 + mapval = bpf_map_lookup_elem(&some_nodes, &idx); 74 + if (!mapval) 75 + return 1; 76 + 77 + res = bpf_obj_new(typeof(*res)); 78 + if (!res) 79 + return 1; 80 + /* Try releasing with graph node offset */ 81 + bpf_obj_drop(&res->node); 82 + return 0; 83 + } 84 + 85 + char _license[] SEC("license") = "GPL";
+71
tools/testing/selftests/bpf/progs/refcounted_kptr.c
··· 8 8 #include "bpf_misc.h" 9 9 #include "bpf_experimental.h" 10 10 11 + extern void bpf_rcu_read_lock(void) __ksym; 12 + extern void bpf_rcu_read_unlock(void) __ksym; 13 + 11 14 struct node_data { 12 15 long key; 13 16 long list_data; ··· 497 494 bpf_obj_drop(container_of(res, struct node_data, r)); 498 495 return 3; 499 496 } 497 + return 0; 498 + } 499 + 500 + SEC("?fentry.s/bpf_testmod_test_read") 501 + __success 502 + int BPF_PROG(rbtree_sleepable_rcu, 503 + struct file *file, struct kobject *kobj, 504 + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) 505 + { 506 + struct bpf_rb_node *rb; 507 + struct node_data *n, *m = NULL; 508 + 509 + n = bpf_obj_new(typeof(*n)); 510 + if (!n) 511 + return 0; 512 + 513 + bpf_rcu_read_lock(); 514 + bpf_spin_lock(&lock); 515 + bpf_rbtree_add(&root, &n->r, less); 516 + rb = bpf_rbtree_first(&root); 517 + if (!rb) 518 + goto err_out; 519 + 520 + rb = bpf_rbtree_remove(&root, rb); 521 + if (!rb) 522 + goto err_out; 523 + 524 + m = container_of(rb, struct node_data, r); 525 + 526 + err_out: 527 + bpf_spin_unlock(&lock); 528 + bpf_rcu_read_unlock(); 529 + if (m) 530 + bpf_obj_drop(m); 531 + return 0; 532 + } 533 + 534 + SEC("?fentry.s/bpf_testmod_test_read") 535 + __success 536 + int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock, 537 + struct file *file, struct kobject *kobj, 538 + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) 539 + { 540 + struct bpf_rb_node *rb; 541 + struct node_data *n, *m = NULL; 542 + 543 + n = bpf_obj_new(typeof(*n)); 544 + if (!n) 545 + return 0; 546 + 547 + /* No explicit bpf_rcu_read_lock */ 548 + bpf_spin_lock(&lock); 549 + bpf_rbtree_add(&root, &n->r, less); 550 + rb = bpf_rbtree_first(&root); 551 + if (!rb) 552 + goto err_out; 553 + 554 + rb = bpf_rbtree_remove(&root, rb); 555 + if (!rb) 556 + goto err_out; 557 + 558 + m = container_of(rb, struct node_data, r); 559 + 560 + err_out: 561 + bpf_spin_unlock(&lock); 562 + /* No explicit bpf_rcu_read_unlock */ 563 + if (m) 564 + bpf_obj_drop(m); 500 565 return 0; 501 566 } 502 567
+28
tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
··· 13 13 struct bpf_refcount refcount; 14 14 }; 15 15 16 + extern void bpf_rcu_read_lock(void) __ksym; 17 + extern void bpf_rcu_read_unlock(void) __ksym; 18 + 16 19 #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) 17 20 private(A) struct bpf_spin_lock glock; 18 21 private(A) struct bpf_rb_root groot __contains(node_acquire, node); ··· 70 67 bpf_spin_lock(&glock); 71 68 bpf_rbtree_add(&groot, &n->node, less); 72 69 bpf_spin_unlock(&glock); 70 + 71 + return 0; 72 + } 73 + 74 + SEC("?fentry.s/bpf_testmod_test_read") 75 + __failure __msg("function calls are not allowed while holding a lock") 76 + int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu, 77 + struct file *file, struct kobject *kobj, 78 + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) 79 + { 80 + struct node_acquire *n; 81 + 82 + n = bpf_obj_new(typeof(*n)); 83 + if (!n) 84 + return 0; 85 + 86 + /* spin_{lock,unlock} are in different RCU CS */ 87 + bpf_rcu_read_lock(); 88 + bpf_spin_lock(&glock); 89 + bpf_rbtree_add(&groot, &n->node, less); 90 + bpf_rcu_read_unlock(); 91 + 92 + bpf_rcu_read_lock(); 93 + bpf_spin_unlock(&glock); 94 + bpf_rcu_read_unlock(); 73 95 74 96 return 0; 75 97 }
+51
tools/testing/selftests/bpf/progs/task_kfunc_success.c
··· 18 18 */ 19 19 20 20 struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; 21 + 22 + struct task_struct *bpf_task_acquire___one(struct task_struct *task) __ksym __weak; 23 + /* The two-param bpf_task_acquire doesn't exist */ 24 + struct task_struct *bpf_task_acquire___two(struct task_struct *p, void *ctx) __ksym __weak; 25 + /* Incorrect type for first param */ 26 + struct task_struct *bpf_task_acquire___three(void *ctx) __ksym __weak; 27 + 21 28 void invalid_kfunc(void) __ksym __weak; 22 29 void bpf_testmod_test_mod_kfunc(int i) __ksym __weak; 23 30 ··· 58 51 bpf_task_release(acquired); 59 52 else 60 53 err = 6; 54 + 55 + return 0; 56 + } 57 + 58 + SEC("tp_btf/task_newtask") 59 + int BPF_PROG(test_task_kfunc_flavor_relo, struct task_struct *task, u64 clone_flags) 60 + { 61 + struct task_struct *acquired = NULL; 62 + int fake_ctx = 42; 63 + 64 + if (bpf_ksym_exists(bpf_task_acquire___one)) { 65 + acquired = bpf_task_acquire___one(task); 66 + } else if (bpf_ksym_exists(bpf_task_acquire___two)) { 67 + /* Here, bpf_object__resolve_ksym_func_btf_id's find_ksym_btf_id 68 + * call will find vmlinux's bpf_task_acquire, but subsequent 69 + * bpf_core_types_are_compat will fail 70 + */ 71 + acquired = bpf_task_acquire___two(task, &fake_ctx); 72 + err = 3; 73 + return 0; 74 + } else if (bpf_ksym_exists(bpf_task_acquire___three)) { 75 + /* bpf_core_types_are_compat will fail similarly to above case */ 76 + acquired = bpf_task_acquire___three(&fake_ctx); 77 + err = 4; 78 + return 0; 79 + } 80 + 81 + if (acquired) 82 + bpf_task_release(acquired); 83 + else 84 + err = 5; 85 + return 0; 86 + } 87 + 88 + SEC("tp_btf/task_newtask") 89 + int BPF_PROG(test_task_kfunc_flavor_relo_not_found, struct task_struct *task, u64 clone_flags) 90 + { 91 + /* Neither symbol should successfully resolve. 92 + * Success or failure of one ___flavor should not affect others 93 + */ 94 + if (bpf_ksym_exists(bpf_task_acquire___two)) 95 + err = 1; 96 + else if (bpf_ksym_exists(bpf_task_acquire___three)) 97 + err = 2; 61 98 62 99 return 0; 63 100 }
+2 -1
tools/testing/selftests/bpf/progs/test_ldsx_insn.c
··· 5 5 #include <bpf/bpf_helpers.h> 6 6 #include <bpf/bpf_tracing.h> 7 7 8 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 8 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 9 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 9 10 const volatile int skip = 0; 10 11 #else 11 12 const volatile int skip = 1;
+90
tools/testing/selftests/bpf/progs/test_lwt_redirect.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_endian.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include <linux/ip.h> 6 + #include "bpf_tracing_net.h" 7 + 8 + /* We don't care about whether the packet can be received by network stack. 9 + * Just care if the packet is sent to the correct device at correct direction 10 + * and not panic the kernel. 11 + */ 12 + static int prepend_dummy_mac(struct __sk_buff *skb) 13 + { 14 + char mac[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xf, 15 + 0xe, 0xd, 0xc, 0xb, 0xa, 0x08, 0x00}; 16 + 17 + if (bpf_skb_change_head(skb, ETH_HLEN, 0)) 18 + return -1; 19 + 20 + if (bpf_skb_store_bytes(skb, 0, mac, sizeof(mac), 0)) 21 + return -1; 22 + 23 + return 0; 24 + } 25 + 26 + /* Use the last byte of IP address to redirect the packet */ 27 + static int get_redirect_target(struct __sk_buff *skb) 28 + { 29 + struct iphdr *iph = NULL; 30 + void *start = (void *)(long)skb->data; 31 + void *end = (void *)(long)skb->data_end; 32 + 33 + if (start + sizeof(*iph) > end) 34 + return -1; 35 + 36 + iph = (struct iphdr *)start; 37 + return bpf_ntohl(iph->daddr) & 0xff; 38 + } 39 + 40 + SEC("redir_ingress") 41 + int test_lwt_redirect_in(struct __sk_buff *skb) 42 + { 43 + int target = get_redirect_target(skb); 44 + 45 + if (target < 0) 46 + return BPF_OK; 47 + 48 + if (prepend_dummy_mac(skb)) 49 + return BPF_DROP; 50 + 51 + return bpf_redirect(target, BPF_F_INGRESS); 52 + } 53 + 54 + SEC("redir_egress") 55 + int test_lwt_redirect_out(struct __sk_buff *skb) 56 + { 57 + int target = get_redirect_target(skb); 58 + 59 + if (target < 0) 60 + return BPF_OK; 61 + 62 + if (prepend_dummy_mac(skb)) 63 + return BPF_DROP; 64 + 65 + return bpf_redirect(target, 0); 66 + } 67 + 68 + SEC("redir_egress_nomac") 69 + int test_lwt_redirect_out_nomac(struct __sk_buff *skb) 70 + { 71 + int target = get_redirect_target(skb); 72 + 73 + if (target < 0) 74 + return BPF_OK; 75 + 76 + return bpf_redirect(target, 0); 77 + } 78 + 79 + SEC("redir_ingress_nomac") 80 + int test_lwt_redirect_in_nomac(struct __sk_buff *skb) 81 + { 82 + int target = get_redirect_target(skb); 83 + 84 + if (target < 0) 85 + return BPF_OK; 86 + 87 + return bpf_redirect(target, BPF_F_INGRESS); 88 + } 89 + 90 + char _license[] SEC("license") = "GPL";
+36
tools/testing/selftests/bpf/progs/test_lwt_reroute.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <inttypes.h> 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_endian.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <linux/if_ether.h> 7 + #include <linux/ip.h> 8 + 9 + /* This function extracts the last byte of the daddr, and uses it 10 + * as output dev index. 11 + */ 12 + SEC("lwt_xmit") 13 + int test_lwt_reroute(struct __sk_buff *skb) 14 + { 15 + struct iphdr *iph = NULL; 16 + void *start = (void *)(long)skb->data; 17 + void *end = (void *)(long)skb->data_end; 18 + 19 + /* set mark at most once */ 20 + if (skb->mark != 0) 21 + return BPF_OK; 22 + 23 + if (start + sizeof(*iph) > end) 24 + return BPF_DROP; 25 + 26 + iph = (struct iphdr *)start; 27 + skb->mark = bpf_ntohl(iph->daddr) & 0xff; 28 + 29 + /* do not reroute x.x.x.0 packets */ 30 + if (skb->mark == 0) 31 + return BPF_OK; 32 + 33 + return BPF_LWT_REROUTE; 34 + } 35 + 36 + char _license[] SEC("license") = "GPL";
+13
tools/testing/selftests/bpf/progs/test_tc_bpf.c
··· 2 2 3 3 #include <linux/bpf.h> 4 4 #include <bpf/bpf_helpers.h> 5 + #include <linux/if_ether.h> 6 + #include <linux/ip.h> 5 7 6 8 /* Dummy prog to test TC-BPF API */ 7 9 8 10 SEC("tc") 9 11 int cls(struct __sk_buff *skb) 10 12 { 13 + return 0; 14 + } 15 + 16 + /* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */ 17 + SEC("tcx/ingress") 18 + int pkt_ptr(struct __sk_buff *skb) 19 + { 20 + struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr); 21 + 22 + if ((long)(iph + 1) > (long)skb->data_end) 23 + return 1; 11 24 return 0; 12 25 }
+101
tools/testing/selftests/bpf/progs/uprobe_multi.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + #include <stdbool.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + __u64 uprobe_multi_func_1_addr = 0; 10 + __u64 uprobe_multi_func_2_addr = 0; 11 + __u64 uprobe_multi_func_3_addr = 0; 12 + 13 + __u64 uprobe_multi_func_1_result = 0; 14 + __u64 uprobe_multi_func_2_result = 0; 15 + __u64 uprobe_multi_func_3_result = 0; 16 + 17 + __u64 uretprobe_multi_func_1_result = 0; 18 + __u64 uretprobe_multi_func_2_result = 0; 19 + __u64 uretprobe_multi_func_3_result = 0; 20 + 21 + __u64 uprobe_multi_sleep_result = 0; 22 + 23 + int pid = 0; 24 + int child_pid = 0; 25 + 26 + bool test_cookie = false; 27 + void *user_ptr = 0; 28 + 29 + static __always_inline bool verify_sleepable_user_copy(void) 30 + { 31 + char data[9]; 32 + 33 + bpf_copy_from_user(data, sizeof(data), user_ptr); 34 + return bpf_strncmp(data, sizeof(data), "test_data") == 0; 35 + } 36 + 37 + static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep) 38 + { 39 + child_pid = bpf_get_current_pid_tgid() >> 32; 40 + 41 + if (pid && child_pid != pid) 42 + return; 43 + 44 + __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0; 45 + __u64 addr = bpf_get_func_ip(ctx); 46 + 47 + #define SET(__var, __addr, __cookie) ({ \ 48 + if (addr == __addr && \ 49 + (!test_cookie || (cookie == __cookie))) \ 50 + __var += 1; \ 51 + }) 52 + 53 + if (is_return) { 54 + SET(uretprobe_multi_func_1_result, uprobe_multi_func_1_addr, 2); 55 + SET(uretprobe_multi_func_2_result, uprobe_multi_func_2_addr, 3); 56 + SET(uretprobe_multi_func_3_result, uprobe_multi_func_3_addr, 1); 57 + } else { 58 + SET(uprobe_multi_func_1_result, uprobe_multi_func_1_addr, 3); 59 + SET(uprobe_multi_func_2_result, uprobe_multi_func_2_addr, 1); 60 + SET(uprobe_multi_func_3_result, uprobe_multi_func_3_addr, 2); 61 + } 62 + 63 + #undef SET 64 + 65 + if (is_sleep && verify_sleepable_user_copy()) 66 + uprobe_multi_sleep_result += 1; 67 + } 68 + 69 + SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") 70 + int uprobe(struct pt_regs *ctx) 71 + { 72 + uprobe_multi_check(ctx, false, false); 73 + return 0; 74 + } 75 + 76 + SEC("uretprobe.multi//proc/self/exe:uprobe_multi_func_*") 77 + int uretprobe(struct pt_regs *ctx) 78 + { 79 + uprobe_multi_check(ctx, true, false); 80 + return 0; 81 + } 82 + 83 + SEC("uprobe.multi.s//proc/self/exe:uprobe_multi_func_*") 84 + int uprobe_sleep(struct pt_regs *ctx) 85 + { 86 + uprobe_multi_check(ctx, false, true); 87 + return 0; 88 + } 89 + 90 + SEC("uretprobe.multi.s//proc/self/exe:uprobe_multi_func_*") 91 + int uretprobe_sleep(struct pt_regs *ctx) 92 + { 93 + uprobe_multi_check(ctx, true, true); 94 + return 0; 95 + } 96 + 97 + SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") 98 + int uprobe_extra(struct pt_regs *ctx) 99 + { 100 + return 0; 101 + }
+15
tools/testing/selftests/bpf/progs/uprobe_multi_bench.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + int count; 9 + 10 + SEC("uprobe.multi/./uprobe_multi:uprobe_multi_func_*") 11 + int uprobe_bench(struct pt_regs *ctx) 12 + { 13 + count++; 14 + return 0; 15 + }
+16
tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/usdt.bpf.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + int count; 10 + 11 + SEC("usdt") 12 + int usdt0(struct pt_regs *ctx) 13 + { 14 + count++; 15 + return 0; 16 + }
+2 -1
tools/testing/selftests/bpf/progs/verifier_bswap.c
··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("BSWAP, 16")
+2 -1
tools/testing/selftests/bpf/progs/verifier_gotol.c
··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("gotol, small_imm")
+2 -1
tools/testing/selftests/bpf/progs/verifier_ldsx.c
··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("LDSX, S8")
+2 -1
tools/testing/selftests/bpf/progs/verifier_movsx.c
··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("MOV32SX, S8")
+2 -1
tools/testing/selftests/bpf/progs/verifier_sdiv.c
··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("SDIV32, non-zero imm divisor, check 1")
+10
tools/testing/selftests/bpf/testing_helpers.h
··· 7 7 #include <stdbool.h> 8 8 #include <bpf/bpf.h> 9 9 #include <bpf/libbpf.h> 10 + #include <time.h> 10 11 11 12 int parse_num_list(const char *s, bool **set, int *set_len); 12 13 __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); ··· 33 32 int load_bpf_testmod(bool verbose); 34 33 int unload_bpf_testmod(bool verbose); 35 34 int kern_sync_rcu(void); 35 + 36 + static inline __u64 get_time_ns(void) 37 + { 38 + struct timespec t; 39 + 40 + clock_gettime(CLOCK_MONOTONIC, &t); 41 + 42 + return (u64)t.tv_sec * 1000000000 + t.tv_nsec; 43 + } 36 44 37 45 #endif /* __TESTING_HELPERS_H */
+91
tools/testing/selftests/bpf/uprobe_multi.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <stdio.h> 4 + #include <string.h> 5 + #include <sdt.h> 6 + 7 + #define __PASTE(a, b) a##b 8 + #define PASTE(a, b) __PASTE(a, b) 9 + 10 + #define NAME(name, idx) PASTE(name, idx) 11 + 12 + #define DEF(name, idx) int NAME(name, idx)(void) { return 0; } 13 + #define CALL(name, idx) NAME(name, idx)(); 14 + 15 + #define F(body, name, idx) body(name, idx) 16 + 17 + #define F10(body, name, idx) \ 18 + F(body, PASTE(name, idx), 0) F(body, PASTE(name, idx), 1) F(body, PASTE(name, idx), 2) \ 19 + F(body, PASTE(name, idx), 3) F(body, PASTE(name, idx), 4) F(body, PASTE(name, idx), 5) \ 20 + F(body, PASTE(name, idx), 6) F(body, PASTE(name, idx), 7) F(body, PASTE(name, idx), 8) \ 21 + F(body, PASTE(name, idx), 9) 22 + 23 + #define F100(body, name, idx) \ 24 + F10(body, PASTE(name, idx), 0) F10(body, PASTE(name, idx), 1) F10(body, PASTE(name, idx), 2) \ 25 + F10(body, PASTE(name, idx), 3) F10(body, PASTE(name, idx), 4) F10(body, PASTE(name, idx), 5) \ 26 + F10(body, PASTE(name, idx), 6) F10(body, PASTE(name, idx), 7) F10(body, PASTE(name, idx), 8) \ 27 + F10(body, PASTE(name, idx), 9) 28 + 29 + #define F1000(body, name, idx) \ 30 + F100(body, PASTE(name, idx), 0) F100(body, PASTE(name, idx), 1) F100(body, PASTE(name, idx), 2) \ 31 + F100(body, PASTE(name, idx), 3) F100(body, PASTE(name, idx), 4) F100(body, PASTE(name, idx), 5) \ 32 + F100(body, PASTE(name, idx), 6) F100(body, PASTE(name, idx), 7) F100(body, PASTE(name, idx), 8) \ 33 + F100(body, PASTE(name, idx), 9) 34 + 35 + #define F10000(body, name, idx) \ 36 + F1000(body, PASTE(name, idx), 0) F1000(body, PASTE(name, idx), 1) F1000(body, PASTE(name, idx), 2) \ 37 + F1000(body, PASTE(name, idx), 3) F1000(body, PASTE(name, idx), 4) F1000(body, PASTE(name, idx), 5) \ 38 + F1000(body, PASTE(name, idx), 6) F1000(body, PASTE(name, idx), 7) F1000(body, PASTE(name, idx), 8) \ 39 + F1000(body, PASTE(name, idx), 9) 40 + 41 + F10000(DEF, uprobe_multi_func_, 0) 42 + F10000(DEF, uprobe_multi_func_, 1) 43 + F10000(DEF, uprobe_multi_func_, 2) 44 + F10000(DEF, uprobe_multi_func_, 3) 45 + F10000(DEF, uprobe_multi_func_, 4) 46 + 47 + static int bench(void) 48 + { 49 + F10000(CALL, uprobe_multi_func_, 0) 50 + F10000(CALL, uprobe_multi_func_, 1) 51 + F10000(CALL, uprobe_multi_func_, 2) 52 + F10000(CALL, uprobe_multi_func_, 3) 53 + F10000(CALL, uprobe_multi_func_, 4) 54 + return 0; 55 + } 56 + 57 + #define PROBE STAP_PROBE(test, usdt); 58 + 59 + #define PROBE10 PROBE PROBE PROBE PROBE PROBE \ 60 + PROBE PROBE PROBE PROBE PROBE 61 + #define PROBE100 PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 \ 62 + PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 63 + #define PROBE1000 PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 \ 64 + PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 65 + #define PROBE10000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 \ 66 + PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 67 + 68 + static int usdt(void) 69 + { 70 + PROBE10000 71 + PROBE10000 72 + PROBE10000 73 + PROBE10000 74 + PROBE10000 75 + return 0; 76 + } 77 + 78 + int main(int argc, char **argv) 79 + { 80 + if (argc != 2) 81 + goto error; 82 + 83 + if (!strcmp("bench", argv[1])) 84 + return bench(); 85 + if (!strcmp("usdt", argv[1])) 86 + return usdt(); 87 + 88 + error: 89 + fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]); 90 + return -1; 91 + }