Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+4

arch/arm64/include/asm/insn.h

··· 186 186 AARCH64_INSN_LDST_LOAD_ACQ_EX, 187 187 AARCH64_INSN_LDST_STORE_EX, 188 188 AARCH64_INSN_LDST_STORE_REL_EX, 189 + AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, 190 + AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET, 189 191 }; 190 192 191 193 enum aarch64_insn_adsb_type { ··· 326 324 __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) 327 325 __AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000) 328 326 __AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000) 327 + __AARCH64_INSN_FUNCS(signed_load_imm, 0X3FC00000, 0x39800000) 329 328 __AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00) 330 329 __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00) 331 330 __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) ··· 340 337 __AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) 341 338 __AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) 342 339 __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) 340 + __AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800) 343 341 __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) 344 342 __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) 345 343 __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)

+6

arch/arm64/lib/insn.c

··· 385 385 case AARCH64_INSN_LDST_LOAD_REG_OFFSET: 386 386 insn = aarch64_insn_get_ldr_reg_value(); 387 387 break; 388 + case AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET: 389 + insn = aarch64_insn_get_signed_ldr_reg_value(); 390 + break; 388 391 case AARCH64_INSN_LDST_STORE_REG_OFFSET: 389 392 insn = aarch64_insn_get_str_reg_value(); 390 393 break; ··· 432 429 switch (type) { 433 430 case AARCH64_INSN_LDST_LOAD_IMM_OFFSET: 434 431 insn = aarch64_insn_get_ldr_imm_value(); 432 + break; 433 + case AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET: 434 + insn = aarch64_insn_get_signed_load_imm_value(); 435 435 break; 436 436 case AARCH64_INSN_LDST_STORE_IMM_OFFSET: 437 437 insn = aarch64_insn_get_str_imm_value();

+12

arch/arm64/net/bpf_jit.h

··· 59 59 AARCH64_INSN_LDST_##type##_REG_OFFSET) 60 60 #define A64_STRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, STORE) 61 61 #define A64_LDRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, LOAD) 62 + #define A64_LDRSB(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 8, SIGNED_LOAD) 62 63 #define A64_STRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, STORE) 63 64 #define A64_LDRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, LOAD) 65 + #define A64_LDRSH(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 16, SIGNED_LOAD) 64 66 #define A64_STR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, STORE) 65 67 #define A64_LDR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, LOAD) 68 + #define A64_LDRSW(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 32, SIGNED_LOAD) 66 69 #define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE) 67 70 #define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD) 68 71 ··· 76 73 AARCH64_INSN_LDST_##type##_IMM_OFFSET) 77 74 #define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE) 78 75 #define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD) 76 + #define A64_LDRSBI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 8, SIGNED_LOAD) 79 77 #define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE) 80 78 #define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD) 79 + #define A64_LDRSHI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 16, SIGNED_LOAD) 81 80 #define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE) 82 81 #define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD) 82 + #define A64_LDRSWI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 32, SIGNED_LOAD) 83 83 #define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) 84 84 #define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) 85 85 ··· 192 186 #define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) 193 187 #define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) 194 188 189 + /* Sign extend */ 190 + #define A64_SXTB(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 7) 191 + #define A64_SXTH(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 15) 192 + #define A64_SXTW(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 31) 193 + 195 194 /* Move wide (immediate) */ 196 195 #define A64_MOVEW(sf, Rd, imm16, shift, type) \ 197 196 aarch64_insn_gen_movewide(Rd, imm16, shift, \ ··· 234 223 #define A64_DATA2(sf, Rd, Rn, Rm, type) aarch64_insn_gen_data2(Rd, Rn, Rm, \ 235 224 A64_VARIANT(sf), AARCH64_INSN_DATA2_##type) 236 225 #define A64_UDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, UDIV) 226 + #define A64_SDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, SDIV) 237 227 #define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV) 238 228 #define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV) 239 229 #define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV)

+75 -16

arch/arm64/net/bpf_jit_comp.c

··· 715 715 /* First pass */ 716 716 return 0; 717 717 718 - if (BPF_MODE(insn->code) != BPF_PROBE_MEM) 718 + if (BPF_MODE(insn->code) != BPF_PROBE_MEM && 719 + BPF_MODE(insn->code) != BPF_PROBE_MEMSX) 719 720 return 0; 720 721 721 722 if (!ctx->prog->aux->extable || ··· 780 779 u8 dst_adj; 781 780 int off_adj; 782 781 int ret; 782 + bool sign_extend; 783 783 784 784 switch (code) { 785 785 /* dst = src */ 786 786 case BPF_ALU | BPF_MOV | BPF_X: 787 787 case BPF_ALU64 | BPF_MOV | BPF_X: 788 - emit(A64_MOV(is64, dst, src), ctx); 788 + switch (insn->off) { 789 + case 0: 790 + emit(A64_MOV(is64, dst, src), ctx); 791 + break; 792 + case 8: 793 + emit(A64_SXTB(is64, dst, src), ctx); 794 + break; 795 + case 16: 796 + emit(A64_SXTH(is64, dst, src), ctx); 797 + break; 798 + case 32: 799 + emit(A64_SXTW(is64, dst, src), ctx); 800 + break; 801 + } 789 802 break; 790 803 /* dst = dst OP src */ 791 804 case BPF_ALU | BPF_ADD | BPF_X: ··· 828 813 break; 829 814 case BPF_ALU | BPF_DIV | BPF_X: 830 815 case BPF_ALU64 | BPF_DIV | BPF_X: 831 - emit(A64_UDIV(is64, dst, dst, src), ctx); 816 + if (!off) 817 + emit(A64_UDIV(is64, dst, dst, src), ctx); 818 + else 819 + emit(A64_SDIV(is64, dst, dst, src), ctx); 832 820 break; 833 821 case BPF_ALU | BPF_MOD | BPF_X: 834 822 case BPF_ALU64 | BPF_MOD | BPF_X: 835 - emit(A64_UDIV(is64, tmp, dst, src), ctx); 823 + if (!off) 824 + emit(A64_UDIV(is64, tmp, dst, src), ctx); 825 + else 826 + emit(A64_SDIV(is64, tmp, dst, src), ctx); 836 827 emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); 837 828 break; 838 829 case BPF_ALU | BPF_LSH | BPF_X: ··· 861 840 /* dst = BSWAP##imm(dst) */ 862 841 case BPF_ALU | BPF_END | BPF_FROM_LE: 863 842 case BPF_ALU | BPF_END | BPF_FROM_BE: 843 + case BPF_ALU64 | BPF_END | BPF_FROM_LE: 864 844 #ifdef CONFIG_CPU_BIG_ENDIAN 865 - if (BPF_SRC(code) == BPF_FROM_BE) 845 + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) 866 846 goto emit_bswap_uxt; 867 847 #else /* !CONFIG_CPU_BIG_ENDIAN */ 868 - if (BPF_SRC(code) == BPF_FROM_LE) 848 + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) 869 849 goto emit_bswap_uxt; 870 850 #endif 871 851 switch (imm) { ··· 965 943 case BPF_ALU | BPF_DIV | BPF_K: 966 944 case BPF_ALU64 | BPF_DIV | BPF_K: 967 945 emit_a64_mov_i(is64, tmp, imm, ctx); 968 - emit(A64_UDIV(is64, dst, dst, tmp), ctx); 946 + if (!off) 947 + emit(A64_UDIV(is64, dst, dst, tmp), ctx); 948 + else 949 + emit(A64_SDIV(is64, dst, dst, tmp), ctx); 969 950 break; 970 951 case BPF_ALU | BPF_MOD | BPF_K: 971 952 case BPF_ALU64 | BPF_MOD | BPF_K: 972 953 emit_a64_mov_i(is64, tmp2, imm, ctx); 973 - emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 954 + if (!off) 955 + emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); 956 + else 957 + emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); 974 958 emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); 975 959 break; 976 960 case BPF_ALU | BPF_LSH | BPF_K: ··· 994 966 995 967 /* JUMP off */ 996 968 case BPF_JMP | BPF_JA: 997 - jmp_offset = bpf2a64_offset(i, off, ctx); 969 + case BPF_JMP32 | BPF_JA: 970 + if (BPF_CLASS(code) == BPF_JMP) 971 + jmp_offset = bpf2a64_offset(i, off, ctx); 972 + else 973 + jmp_offset = bpf2a64_offset(i, imm, ctx); 998 974 check_imm26(jmp_offset); 999 975 emit(A64_B(jmp_offset), ctx); 1000 976 break; ··· 1154 1122 return 1; 1155 1123 } 1156 1124 1157 - /* LDX: dst = *(size *)(src + off) */ 1125 + /* LDX: dst = (u64)*(unsigned size *)(src + off) */ 1158 1126 case BPF_LDX | BPF_MEM | BPF_W: 1159 1127 case BPF_LDX | BPF_MEM | BPF_H: 1160 1128 case BPF_LDX | BPF_MEM | BPF_B: ··· 1163 1131 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1164 1132 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1165 1133 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 1134 + /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ 1135 + case BPF_LDX | BPF_MEMSX | BPF_B: 1136 + case BPF_LDX | BPF_MEMSX | BPF_H: 1137 + case BPF_LDX | BPF_MEMSX | BPF_W: 1138 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1139 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1140 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1166 1141 if (ctx->fpb_offset > 0 && src == fp) { 1167 1142 src_adj = fpb; 1168 1143 off_adj = off + ctx->fpb_offset; ··· 1177 1138 src_adj = src; 1178 1139 off_adj = off; 1179 1140 } 1141 + sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || 1142 + BPF_MODE(insn->code) == BPF_PROBE_MEMSX); 1180 1143 switch (BPF_SIZE(code)) { 1181 1144 case BPF_W: 1182 1145 if (is_lsi_offset(off_adj, 2)) { 1183 - emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1146 + if (sign_extend) 1147 + emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); 1148 + else 1149 + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); 1184 1150 } else { 1185 1151 emit_a64_mov_i(1, tmp, off, ctx); 1186 - emit(A64_LDR32(dst, src, tmp), ctx); 1152 + if (sign_extend) 1153 + emit(A64_LDRSW(dst, src_adj, off_adj), ctx); 1154 + else 1155 + emit(A64_LDR32(dst, src, tmp), ctx); 1187 1156 } 1188 1157 break; 1189 1158 case BPF_H: 1190 1159 if (is_lsi_offset(off_adj, 1)) { 1191 - emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1160 + if (sign_extend) 1161 + emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); 1162 + else 1163 + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); 1192 1164 } else { 1193 1165 emit_a64_mov_i(1, tmp, off, ctx); 1194 - emit(A64_LDRH(dst, src, tmp), ctx); 1166 + if (sign_extend) 1167 + emit(A64_LDRSH(dst, src, tmp), ctx); 1168 + else 1169 + emit(A64_LDRH(dst, src, tmp), ctx); 1195 1170 } 1196 1171 break; 1197 1172 case BPF_B: 1198 1173 if (is_lsi_offset(off_adj, 0)) { 1199 - emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1174 + if (sign_extend) 1175 + emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); 1176 + else 1177 + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); 1200 1178 } else { 1201 1179 emit_a64_mov_i(1, tmp, off, ctx); 1202 - emit(A64_LDRB(dst, src, tmp), ctx); 1180 + if (sign_extend) 1181 + emit(A64_LDRSB(dst, src, tmp), ctx); 1182 + else 1183 + emit(A64_LDRB(dst, src, tmp), ctx); 1203 1184 } 1204 1185 break; 1205 1186 case BPF_DW:

+30

arch/riscv/net/bpf_jit.h

··· 431 431 return rv_r_insn(1, rs2, rs1, 3, rd, 0x33); 432 432 } 433 433 434 + static inline u32 rv_div(u8 rd, u8 rs1, u8 rs2) 435 + { 436 + return rv_r_insn(1, rs2, rs1, 4, rd, 0x33); 437 + } 438 + 434 439 static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2) 435 440 { 436 441 return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); 442 + } 443 + 444 + static inline u32 rv_rem(u8 rd, u8 rs1, u8 rs2) 445 + { 446 + return rv_r_insn(1, rs2, rs1, 6, rd, 0x33); 437 447 } 438 448 439 449 static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2) ··· 509 499 static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1) 510 500 { 511 501 return rv_bge(rs2, rs1, imm12_1); 502 + } 503 + 504 + static inline u32 rv_lb(u8 rd, u16 imm11_0, u8 rs1) 505 + { 506 + return rv_i_insn(imm11_0, rs1, 0, rd, 0x03); 507 + } 508 + 509 + static inline u32 rv_lh(u8 rd, u16 imm11_0, u8 rs1) 510 + { 511 + return rv_i_insn(imm11_0, rs1, 1, rd, 0x03); 512 512 } 513 513 514 514 static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1) ··· 786 766 return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); 787 767 } 788 768 769 + static inline u32 rv_divw(u8 rd, u8 rs1, u8 rs2) 770 + { 771 + return rv_r_insn(1, rs2, rs1, 4, rd, 0x3b); 772 + } 773 + 789 774 static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) 790 775 { 791 776 return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); 777 + } 778 + 779 + static inline u32 rv_remw(u8 rd, u8 rs1, u8 rs2) 780 + { 781 + return rv_r_insn(1, rs2, rs1, 6, rd, 0x3b); 792 782 } 793 783 794 784 static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2)

+80 -22

arch/riscv/net/bpf_jit_comp64.c

··· 580 580 unsigned long pc; 581 581 off_t offset; 582 582 583 - if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) 583 + if (!ctx->insns || !ctx->prog->aux->extable || 584 + (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) 584 585 return 0; 585 586 586 587 if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) ··· 1047 1046 emit_zext_32(rd, ctx); 1048 1047 break; 1049 1048 } 1050 - emit_mv(rd, rs, ctx); 1049 + switch (insn->off) { 1050 + case 0: 1051 + emit_mv(rd, rs, ctx); 1052 + break; 1053 + case 8: 1054 + case 16: 1055 + emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx); 1056 + emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx); 1057 + break; 1058 + case 32: 1059 + emit_addiw(rd, rs, 0, ctx); 1060 + break; 1061 + } 1051 1062 if (!is64 && !aux->verifier_zext) 1052 1063 emit_zext_32(rd, ctx); 1053 1064 break; ··· 1107 1094 break; 1108 1095 case BPF_ALU | BPF_DIV | BPF_X: 1109 1096 case BPF_ALU64 | BPF_DIV | BPF_X: 1110 - emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 1097 + if (off) 1098 + emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx); 1099 + else 1100 + emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); 1111 1101 if (!is64 && !aux->verifier_zext) 1112 1102 emit_zext_32(rd, ctx); 1113 1103 break; 1114 1104 case BPF_ALU | BPF_MOD | BPF_X: 1115 1105 case BPF_ALU64 | BPF_MOD | BPF_X: 1116 - emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 1106 + if (off) 1107 + emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx); 1108 + else 1109 + emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); 1117 1110 if (!is64 && !aux->verifier_zext) 1118 1111 emit_zext_32(rd, ctx); 1119 1112 break; ··· 1168 1149 break; 1169 1150 1170 1151 case BPF_ALU | BPF_END | BPF_FROM_BE: 1152 + case BPF_ALU64 | BPF_END | BPF_FROM_LE: 1171 1153 emit_li(RV_REG_T2, 0, ctx); 1172 1154 1173 1155 emit_andi(RV_REG_T1, rd, 0xff, ctx); ··· 1291 1271 case BPF_ALU | BPF_DIV | BPF_K: 1292 1272 case BPF_ALU64 | BPF_DIV | BPF_K: 1293 1273 emit_imm(RV_REG_T1, imm, ctx); 1294 - emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 1295 - rv_divuw(rd, rd, RV_REG_T1), ctx); 1274 + if (off) 1275 + emit(is64 ? rv_div(rd, rd, RV_REG_T1) : 1276 + rv_divw(rd, rd, RV_REG_T1), ctx); 1277 + else 1278 + emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : 1279 + rv_divuw(rd, rd, RV_REG_T1), ctx); 1296 1280 if (!is64 && !aux->verifier_zext) 1297 1281 emit_zext_32(rd, ctx); 1298 1282 break; 1299 1283 case BPF_ALU | BPF_MOD | BPF_K: 1300 1284 case BPF_ALU64 | BPF_MOD | BPF_K: 1301 1285 emit_imm(RV_REG_T1, imm, ctx); 1302 - emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 1303 - rv_remuw(rd, rd, RV_REG_T1), ctx); 1286 + if (off) 1287 + emit(is64 ? rv_rem(rd, rd, RV_REG_T1) : 1288 + rv_remw(rd, rd, RV_REG_T1), ctx); 1289 + else 1290 + emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : 1291 + rv_remuw(rd, rd, RV_REG_T1), ctx); 1304 1292 if (!is64 && !aux->verifier_zext) 1305 1293 emit_zext_32(rd, ctx); 1306 1294 break; ··· 1342 1314 1343 1315 /* JUMP off */ 1344 1316 case BPF_JMP | BPF_JA: 1345 - rvoff = rv_offset(i, off, ctx); 1317 + case BPF_JMP32 | BPF_JA: 1318 + if (BPF_CLASS(code) == BPF_JMP) 1319 + rvoff = rv_offset(i, off, ctx); 1320 + else 1321 + rvoff = rv_offset(i, imm, ctx); 1346 1322 ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 1347 1323 if (ret) 1348 1324 return ret; ··· 1518 1486 return 1; 1519 1487 } 1520 1488 1521 - /* LDX: dst = *(size *)(src + off) */ 1489 + /* LDX: dst = *(unsigned size *)(src + off) */ 1522 1490 case BPF_LDX | BPF_MEM | BPF_B: 1523 1491 case BPF_LDX | BPF_MEM | BPF_H: 1524 1492 case BPF_LDX | BPF_MEM | BPF_W: ··· 1527 1495 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 1528 1496 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 1529 1497 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 1498 + /* LDSX: dst = *(signed size *)(src + off) */ 1499 + case BPF_LDX | BPF_MEMSX | BPF_B: 1500 + case BPF_LDX | BPF_MEMSX | BPF_H: 1501 + case BPF_LDX | BPF_MEMSX | BPF_W: 1502 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 1503 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 1504 + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 1530 1505 { 1531 1506 int insn_len, insns_start; 1507 + bool sign_ext; 1508 + 1509 + sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || 1510 + BPF_MODE(insn->code) == BPF_PROBE_MEMSX; 1532 1511 1533 1512 switch (BPF_SIZE(code)) { 1534 1513 case BPF_B: 1535 1514 if (is_12b_int(off)) { 1536 1515 insns_start = ctx->ninsns; 1537 - emit(rv_lbu(rd, off, rs), ctx); 1516 + if (sign_ext) 1517 + emit(rv_lb(rd, off, rs), ctx); 1518 + else 1519 + emit(rv_lbu(rd, off, rs), ctx); 1538 1520 insn_len = ctx->ninsns - insns_start; 1539 1521 break; 1540 1522 } ··· 1556 1510 emit_imm(RV_REG_T1, off, ctx); 1557 1511 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 1558 1512 insns_start = ctx->ninsns; 1559 - emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 1513 + if (sign_ext) 1514 + emit(rv_lb(rd, 0, RV_REG_T1), ctx); 1515 + else 1516 + emit(rv_lbu(rd, 0, RV_REG_T1), ctx); 1560 1517 insn_len = ctx->ninsns - insns_start; 1561 - if (insn_is_zext(&insn[1])) 1562 - return 1; 1563 1518 break; 1564 1519 case BPF_H: 1565 1520 if (is_12b_int(off)) { 1566 1521 insns_start = ctx->ninsns; 1567 - emit(rv_lhu(rd, off, rs), ctx); 1522 + if (sign_ext) 1523 + emit(rv_lh(rd, off, rs), ctx); 1524 + else 1525 + emit(rv_lhu(rd, off, rs), ctx); 1568 1526 insn_len = ctx->ninsns - insns_start; 1569 1527 break; 1570 1528 } ··· 1576 1526 emit_imm(RV_REG_T1, off, ctx); 1577 1527 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 1578 1528 insns_start = ctx->ninsns; 1579 - emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 1529 + if (sign_ext) 1530 + emit(rv_lh(rd, 0, RV_REG_T1), ctx); 1531 + else 1532 + emit(rv_lhu(rd, 0, RV_REG_T1), ctx); 1580 1533 insn_len = ctx->ninsns - insns_start; 1581 - if (insn_is_zext(&insn[1])) 1582 - return 1; 1583 1534 break; 1584 1535 case BPF_W: 1585 1536 if (is_12b_int(off)) { 1586 1537 insns_start = ctx->ninsns; 1587 - emit(rv_lwu(rd, off, rs), ctx); 1538 + if (sign_ext) 1539 + emit(rv_lw(rd, off, rs), ctx); 1540 + else 1541 + emit(rv_lwu(rd, off, rs), ctx); 1588 1542 insn_len = ctx->ninsns - insns_start; 1589 1543 break; 1590 1544 } ··· 1596 1542 emit_imm(RV_REG_T1, off, ctx); 1597 1543 emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); 1598 1544 insns_start = ctx->ninsns; 1599 - emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 1545 + if (sign_ext) 1546 + emit(rv_lw(rd, 0, RV_REG_T1), ctx); 1547 + else 1548 + emit(rv_lwu(rd, 0, RV_REG_T1), ctx); 1600 1549 insn_len = ctx->ninsns - insns_start; 1601 - if (insn_is_zext(&insn[1])) 1602 - return 1; 1603 1550 break; 1604 1551 case BPF_DW: 1605 1552 if (is_12b_int(off)) { ··· 1621 1566 ret = add_exception_handler(insn, ctx, rd, insn_len); 1622 1567 if (ret) 1623 1568 return ret; 1569 + 1570 + if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) 1571 + return 1; 1624 1572 break; 1625 1573 } 1626 1574 /* speculation barrier */

+2 -1

include/linux/bpf.h

··· 653 653 MEM_RCU = BIT(13 + BPF_BASE_TYPE_BITS), 654 654 655 655 /* Used to tag PTR_TO_BTF_ID | MEM_ALLOC references which are non-owning. 656 - * Currently only valid for linked-list and rbtree nodes. 656 + * Currently only valid for linked-list and rbtree nodes. If the nodes 657 + * have a bpf_refcount_field, they must be tagged MEM_RCU as well. 657 658 */ 658 659 NON_OWN_REF = BIT(14 + BPF_BASE_TYPE_BITS), 659 660

+1 -1

include/linux/bpf_verifier.h

··· 745 745 } 746 746 } 747 747 748 - #define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED) 748 + #define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED | NON_OWN_REF) 749 749 750 750 static inline bool bpf_type_has_unsafe_modifiers(u32 type) 751 751 {

+6

include/linux/trace_events.h

··· 752 752 u32 *fd_type, const char **buf, 753 753 u64 *probe_offset, u64 *probe_addr); 754 754 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); 755 + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); 755 756 #else 756 757 static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) 757 758 { ··· 796 795 } 797 796 static inline int 798 797 bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 798 + { 799 + return -EOPNOTSUPP; 800 + } 801 + static inline int 802 + bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 799 803 { 800 804 return -EOPNOTSUPP; 801 805 }

+4 -1

include/net/lwtunnel.h

··· 16 16 #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) 17 17 #define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) 18 18 19 + /* LWTUNNEL_XMIT_CONTINUE should be distinguishable from dst_output return 20 + * values (NET_XMIT_xxx and NETDEV_TX_xxx in linux/netdevice.h) for safety. 21 + */ 19 22 enum { 20 23 LWTUNNEL_XMIT_DONE, 21 - LWTUNNEL_XMIT_CONTINUE, 24 + LWTUNNEL_XMIT_CONTINUE = 0x100, 22 25 }; 23 26 24 27

+21 -1

include/uapi/linux/bpf.h

··· 1039 1039 BPF_NETFILTER, 1040 1040 BPF_TCX_INGRESS, 1041 1041 BPF_TCX_EGRESS, 1042 + BPF_TRACE_UPROBE_MULTI, 1042 1043 __MAX_BPF_ATTACH_TYPE 1043 1044 }; 1044 1045 ··· 1058 1057 BPF_LINK_TYPE_STRUCT_OPS = 9, 1059 1058 BPF_LINK_TYPE_NETFILTER = 10, 1060 1059 BPF_LINK_TYPE_TCX = 11, 1060 + BPF_LINK_TYPE_UPROBE_MULTI = 12, 1061 1061 MAX_BPF_LINK_TYPE, 1062 1062 }; 1063 1063 ··· 1188 1186 /* link_create.kprobe_multi.flags used in LINK_CREATE command for 1189 1187 * BPF_TRACE_KPROBE_MULTI attach type to create return probe. 1190 1188 */ 1191 - #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) 1189 + enum { 1190 + BPF_F_KPROBE_MULTI_RETURN = (1U << 0) 1191 + }; 1192 + 1193 + /* link_create.uprobe_multi.flags used in LINK_CREATE command for 1194 + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. 1195 + */ 1196 + enum { 1197 + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) 1198 + }; 1192 1199 1193 1200 /* link_create.netfilter.flags used in LINK_CREATE command for 1194 1201 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. ··· 1635 1624 }; 1636 1625 __u64 expected_revision; 1637 1626 } tcx; 1627 + struct { 1628 + __aligned_u64 path; 1629 + __aligned_u64 offsets; 1630 + __aligned_u64 ref_ctr_offsets; 1631 + __aligned_u64 cookies; 1632 + __u32 cnt; 1633 + __u32 flags; 1634 + __u32 pid; 1635 + } uprobe_multi; 1638 1636 }; 1639 1637 } link_create; 1640 1638

+35 -78

kernel/bpf/cpumap.c

··· 68 68 struct bpf_cpumap_val value; 69 69 struct bpf_prog *prog; 70 70 71 - atomic_t refcnt; /* Control when this struct can be free'ed */ 72 - struct rcu_head rcu; 73 - 74 - struct work_struct kthread_stop_wq; 75 71 struct completion kthread_running; 72 + struct rcu_work free_work; 76 73 }; 77 74 78 75 struct bpf_cpu_map { ··· 114 117 return &cmap->map; 115 118 } 116 119 117 - static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) 118 - { 119 - atomic_inc(&rcpu->refcnt); 120 - } 121 - 122 120 static void __cpu_map_ring_cleanup(struct ptr_ring *ring) 123 121 { 124 122 /* The tear-down procedure should have made sure that queue is ··· 132 140 } 133 141 xdp_return_frame(ptr); 134 142 } 135 - } 136 - 137 - static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) 138 - { 139 - if (atomic_dec_and_test(&rcpu->refcnt)) { 140 - if (rcpu->prog) 141 - bpf_prog_put(rcpu->prog); 142 - /* The queue should be empty at this point */ 143 - __cpu_map_ring_cleanup(rcpu->queue); 144 - ptr_ring_cleanup(rcpu->queue, NULL); 145 - kfree(rcpu->queue); 146 - kfree(rcpu); 147 - } 148 - } 149 - 150 - /* called from workqueue, to workaround syscall using preempt_disable */ 151 - static void cpu_map_kthread_stop(struct work_struct *work) 152 - { 153 - struct bpf_cpu_map_entry *rcpu; 154 - 155 - rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq); 156 - 157 - /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier, 158 - * as it waits until all in-flight call_rcu() callbacks complete. 159 - */ 160 - rcu_barrier(); 161 - 162 - /* kthread_stop will wake_up_process and wait for it to complete */ 163 - kthread_stop(rcpu->kthread); 164 143 } 165 144 166 145 static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu, ··· 358 395 } 359 396 __set_current_state(TASK_RUNNING); 360 397 361 - put_cpu_map_entry(rcpu); 362 398 return 0; 363 399 } 364 400 ··· 434 472 if (IS_ERR(rcpu->kthread)) 435 473 goto free_prog; 436 474 437 - get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */ 438 - get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */ 439 - 440 475 /* Make sure kthread runs on a single CPU */ 441 476 kthread_bind(rcpu->kthread, cpu); 442 477 wake_up_process(rcpu->kthread); ··· 460 501 return NULL; 461 502 } 462 503 463 - static void __cpu_map_entry_free(struct rcu_head *rcu) 504 + static void __cpu_map_entry_free(struct work_struct *work) 464 505 { 465 506 struct bpf_cpu_map_entry *rcpu; 466 507 467 508 /* This cpu_map_entry have been disconnected from map and one 468 - * RCU grace-period have elapsed. Thus, XDP cannot queue any 509 + * RCU grace-period have elapsed. Thus, XDP cannot queue any 469 510 * new packets and cannot change/set flush_needed that can 470 511 * find this entry. 471 512 */ 472 - rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); 513 + rcpu = container_of(to_rcu_work(work), struct bpf_cpu_map_entry, free_work); 473 514 515 + /* kthread_stop will wake_up_process and wait for it to complete. 516 + * cpu_map_kthread_run() makes sure the pointer ring is empty 517 + * before exiting. 518 + */ 519 + kthread_stop(rcpu->kthread); 520 + 521 + if (rcpu->prog) 522 + bpf_prog_put(rcpu->prog); 523 + /* The queue should be empty at this point */ 524 + __cpu_map_ring_cleanup(rcpu->queue); 525 + ptr_ring_cleanup(rcpu->queue, NULL); 526 + kfree(rcpu->queue); 474 527 free_percpu(rcpu->bulkq); 475 - /* Cannot kthread_stop() here, last put free rcpu resources */ 476 - put_cpu_map_entry(rcpu); 528 + kfree(rcpu); 477 529 } 478 530 479 - /* After xchg pointer to bpf_cpu_map_entry, use the call_rcu() to 480 - * ensure any driver rcu critical sections have completed, but this 481 - * does not guarantee a flush has happened yet. Because driver side 482 - * rcu_read_lock/unlock only protects the running XDP program. The 483 - * atomic xchg and NULL-ptr check in __cpu_map_flush() makes sure a 484 - * pending flush op doesn't fail. 485 - * 486 - * The bpf_cpu_map_entry is still used by the kthread, and there can 487 - * still be pending packets (in queue and percpu bulkq). A refcnt 488 - * makes sure to last user (kthread_stop vs. call_rcu) free memory 489 - * resources. 490 - * 491 - * The rcu callback __cpu_map_entry_free flush remaining packets in 492 - * percpu bulkq to queue. Due to caller map_delete_elem() disable 493 - * preemption, cannot call kthread_stop() to make sure queue is empty. 494 - * Instead a work_queue is started for stopping kthread, 495 - * cpu_map_kthread_stop, which waits for an RCU grace period before 496 - * stopping kthread, emptying the queue. 531 + /* After the xchg of the bpf_cpu_map_entry pointer, we need to make sure the old 532 + * entry is no longer in use before freeing. We use queue_rcu_work() to call 533 + * __cpu_map_entry_free() in a separate workqueue after waiting for an RCU grace 534 + * period. This means that (a) all pending enqueue and flush operations have 535 + * completed (because of the RCU callback), and (b) we are in a workqueue 536 + * context where we can stop the kthread and wait for it to exit before freeing 537 + * everything. 497 538 */ 498 539 static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, 499 540 u32 key_cpu, struct bpf_cpu_map_entry *rcpu) ··· 502 543 503 544 old_rcpu = unrcu_pointer(xchg(&cmap->cpu_map[key_cpu], RCU_INITIALIZER(rcpu))); 504 545 if (old_rcpu) { 505 - call_rcu(&old_rcpu->rcu, __cpu_map_entry_free); 506 - INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop); 507 - schedule_work(&old_rcpu->kthread_stop_wq); 546 + INIT_RCU_WORK(&old_rcpu->free_work, __cpu_map_entry_free); 547 + queue_rcu_work(system_wq, &old_rcpu->free_work); 508 548 } 509 549 } 510 550 ··· 515 557 if (key_cpu >= map->max_entries) 516 558 return -EINVAL; 517 559 518 - /* notice caller map_delete_elem() use preempt_disable() */ 560 + /* notice caller map_delete_elem() uses rcu_read_lock() */ 519 561 __cpu_map_entry_replace(cmap, key_cpu, NULL); 520 562 return 0; 521 563 } ··· 566 608 /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, 567 609 * so the bpf programs (can be more than one that used this map) were 568 610 * disconnected from events. Wait for outstanding critical sections in 569 - * these programs to complete. The rcu critical section only guarantees 570 - * no further "XDP/bpf-side" reads against bpf_cpu_map->cpu_map. 571 - * It does __not__ ensure pending flush operations (if any) are 572 - * complete. 611 + * these programs to complete. synchronize_rcu() below not only 612 + * guarantees no further "XDP/bpf-side" reads against 613 + * bpf_cpu_map->cpu_map, but also ensure pending flush operations 614 + * (if any) are completed. 573 615 */ 574 - 575 616 synchronize_rcu(); 576 617 577 - /* For cpu_map the remote CPUs can still be using the entries 578 - * (struct bpf_cpu_map_entry). 618 + /* The only possible user of bpf_cpu_map_entry is 619 + * cpu_map_kthread_run(). 579 620 */ 580 621 for (i = 0; i < cmap->map.max_entries; i++) { 581 622 struct bpf_cpu_map_entry *rcpu; ··· 583 626 if (!rcpu) 584 627 continue; 585 628 586 - /* bq flush and cleanup happens after RCU grace-period */ 587 - __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ 629 + /* Stop kthread and cleanup entry directly */ 630 + __cpu_map_entry_free(&rcpu->free_work.work); 588 631 } 589 632 bpf_map_area_free(cmap->cpu_map); 590 633 bpf_map_area_free(cmap);

+7 -1

kernel/bpf/helpers.c

··· 286 286 compiletime_assert(u.val == 0, "__ARCH_SPIN_LOCK_UNLOCKED not 0"); 287 287 BUILD_BUG_ON(sizeof(*l) != sizeof(__u32)); 288 288 BUILD_BUG_ON(sizeof(*lock) != sizeof(__u32)); 289 + preempt_disable(); 289 290 arch_spin_lock(l); 290 291 } 291 292 ··· 295 294 arch_spinlock_t *l = (void *)lock; 296 295 297 296 arch_spin_unlock(l); 297 + preempt_enable(); 298 298 } 299 299 300 300 #else ··· 1915 1913 1916 1914 if (rec) 1917 1915 bpf_obj_free_fields(rec, p); 1918 - bpf_mem_free(&bpf_global_ma, p); 1916 + 1917 + if (rec && rec->refcount_off >= 0) 1918 + bpf_mem_free_rcu(&bpf_global_ma, p); 1919 + else 1920 + bpf_mem_free(&bpf_global_ma, p); 1919 1921 } 1920 1922 1921 1923 __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)

+63 -72

kernel/bpf/syscall.c

··· 657 657 if (!btf_is_kernel(field->kptr.btf)) { 658 658 pointee_struct_meta = btf_find_struct_meta(field->kptr.btf, 659 659 field->kptr.btf_id); 660 - WARN_ON_ONCE(!pointee_struct_meta); 661 660 migrate_disable(); 662 661 __bpf_obj_drop_impl(xchgd_field, pointee_struct_meta ? 663 662 pointee_struct_meta->record : ··· 2814 2815 2815 2816 /* Clean up bpf_link and corresponding anon_inode file and FD. After 2816 2817 * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2817 - * anon_inode's release() call. This helper marksbpf_link as 2818 + * anon_inode's release() call. This helper marks bpf_link as 2818 2819 * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt 2819 2820 * is not decremented, it's the responsibility of a calling code that failed 2820 2821 * to complete bpf_link initialization. 2822 + * This helper eventually calls link's dealloc callback, but does not call 2823 + * link's release callback. 2821 2824 */ 2822 2825 void bpf_link_cleanup(struct bpf_link_primer *primer) 2823 2826 { ··· 3656 3655 return fd; 3657 3656 } 3658 3657 3659 - static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3660 - enum bpf_attach_type attach_type) 3661 - { 3662 - switch (prog->type) { 3663 - case BPF_PROG_TYPE_CGROUP_SOCK: 3664 - case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3665 - case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3666 - case BPF_PROG_TYPE_SK_LOOKUP: 3667 - return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3668 - case BPF_PROG_TYPE_CGROUP_SKB: 3669 - if (!capable(CAP_NET_ADMIN)) 3670 - /* cg-skb progs can be loaded by unpriv user. 3671 - * check permissions at attach time. 3672 - */ 3673 - return -EPERM; 3674 - return prog->enforce_expected_attach_type && 3675 - prog->expected_attach_type != attach_type ? 3676 - -EINVAL : 0; 3677 - case BPF_PROG_TYPE_KPROBE: 3678 - if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 3679 - attach_type != BPF_TRACE_KPROBE_MULTI) 3680 - return -EINVAL; 3681 - return 0; 3682 - default: 3683 - return 0; 3684 - } 3685 - } 3686 - 3687 3658 static enum bpf_prog_type 3688 3659 attach_type_to_prog_type(enum bpf_attach_type attach_type) 3689 3660 { ··· 3719 3746 return BPF_PROG_TYPE_SCHED_CLS; 3720 3747 default: 3721 3748 return BPF_PROG_TYPE_UNSPEC; 3749 + } 3750 + } 3751 + 3752 + static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, 3753 + enum bpf_attach_type attach_type) 3754 + { 3755 + enum bpf_prog_type ptype; 3756 + 3757 + switch (prog->type) { 3758 + case BPF_PROG_TYPE_CGROUP_SOCK: 3759 + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: 3760 + case BPF_PROG_TYPE_CGROUP_SOCKOPT: 3761 + case BPF_PROG_TYPE_SK_LOOKUP: 3762 + return attach_type == prog->expected_attach_type ? 0 : -EINVAL; 3763 + case BPF_PROG_TYPE_CGROUP_SKB: 3764 + if (!capable(CAP_NET_ADMIN)) 3765 + /* cg-skb progs can be loaded by unpriv user. 3766 + * check permissions at attach time. 3767 + */ 3768 + return -EPERM; 3769 + return prog->enforce_expected_attach_type && 3770 + prog->expected_attach_type != attach_type ? 3771 + -EINVAL : 0; 3772 + case BPF_PROG_TYPE_EXT: 3773 + return 0; 3774 + case BPF_PROG_TYPE_NETFILTER: 3775 + if (attach_type != BPF_NETFILTER) 3776 + return -EINVAL; 3777 + return 0; 3778 + case BPF_PROG_TYPE_PERF_EVENT: 3779 + case BPF_PROG_TYPE_TRACEPOINT: 3780 + if (attach_type != BPF_PERF_EVENT) 3781 + return -EINVAL; 3782 + return 0; 3783 + case BPF_PROG_TYPE_KPROBE: 3784 + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI && 3785 + attach_type != BPF_TRACE_KPROBE_MULTI) 3786 + return -EINVAL; 3787 + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI && 3788 + attach_type != BPF_TRACE_UPROBE_MULTI) 3789 + return -EINVAL; 3790 + if (attach_type != BPF_PERF_EVENT && 3791 + attach_type != BPF_TRACE_KPROBE_MULTI && 3792 + attach_type != BPF_TRACE_UPROBE_MULTI) 3793 + return -EINVAL; 3794 + return 0; 3795 + case BPF_PROG_TYPE_SCHED_CLS: 3796 + if (attach_type != BPF_TCX_INGRESS && 3797 + attach_type != BPF_TCX_EGRESS) 3798 + return -EINVAL; 3799 + return 0; 3800 + default: 3801 + ptype = attach_type_to_prog_type(attach_type); 3802 + if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) 3803 + return -EINVAL; 3804 + return 0; 3722 3805 } 3723 3806 } 3724 3807 ··· 4881 4852 return err; 4882 4853 } 4883 4854 4884 - #define BPF_LINK_CREATE_LAST_FIELD link_create.kprobe_multi.cookies 4855 + #define BPF_LINK_CREATE_LAST_FIELD link_create.uprobe_multi.pid 4885 4856 static int link_create(union bpf_attr *attr, bpfptr_t uattr) 4886 4857 { 4887 - enum bpf_prog_type ptype; 4888 4858 struct bpf_prog *prog; 4889 4859 int ret; 4890 4860 ··· 4901 4873 attr->link_create.attach_type); 4902 4874 if (ret) 4903 4875 goto out; 4904 - 4905 - switch (prog->type) { 4906 - case BPF_PROG_TYPE_EXT: 4907 - break; 4908 - case BPF_PROG_TYPE_NETFILTER: 4909 - if (attr->link_create.attach_type != BPF_NETFILTER) { 4910 - ret = -EINVAL; 4911 - goto out; 4912 - } 4913 - break; 4914 - case BPF_PROG_TYPE_PERF_EVENT: 4915 - case BPF_PROG_TYPE_TRACEPOINT: 4916 - if (attr->link_create.attach_type != BPF_PERF_EVENT) { 4917 - ret = -EINVAL; 4918 - goto out; 4919 - } 4920 - break; 4921 - case BPF_PROG_TYPE_KPROBE: 4922 - if (attr->link_create.attach_type != BPF_PERF_EVENT && 4923 - attr->link_create.attach_type != BPF_TRACE_KPROBE_MULTI) { 4924 - ret = -EINVAL; 4925 - goto out; 4926 - } 4927 - break; 4928 - case BPF_PROG_TYPE_SCHED_CLS: 4929 - if (attr->link_create.attach_type != BPF_TCX_INGRESS && 4930 - attr->link_create.attach_type != BPF_TCX_EGRESS) { 4931 - ret = -EINVAL; 4932 - goto out; 4933 - } 4934 - break; 4935 - default: 4936 - ptype = attach_type_to_prog_type(attr->link_create.attach_type); 4937 - if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) { 4938 - ret = -EINVAL; 4939 - goto out; 4940 - } 4941 - break; 4942 - } 4943 4876 4944 4877 switch (prog->type) { 4945 4878 case BPF_PROG_TYPE_CGROUP_SKB: ··· 4958 4969 case BPF_PROG_TYPE_KPROBE: 4959 4970 if (attr->link_create.attach_type == BPF_PERF_EVENT) 4960 4971 ret = bpf_perf_link_attach(attr, prog); 4961 - else 4972 + else if (attr->link_create.attach_type == BPF_TRACE_KPROBE_MULTI) 4962 4973 ret = bpf_kprobe_multi_link_attach(attr, prog); 4974 + else if (attr->link_create.attach_type == BPF_TRACE_UPROBE_MULTI) 4975 + ret = bpf_uprobe_multi_link_attach(attr, prog); 4963 4976 break; 4964 4977 default: 4965 4978 ret = -EINVAL;

+54 -40

kernel/bpf/verifier.c

··· 4990 4990 struct bpf_reg_state *reg, u32 regno) 4991 4991 { 4992 4992 const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id); 4993 - int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; 4993 + int perm_flags; 4994 4994 const char *reg_name = ""; 4995 4995 4996 - /* Only unreferenced case accepts untrusted pointers */ 4997 - if (kptr_field->type == BPF_KPTR_UNREF) 4998 - perm_flags |= PTR_UNTRUSTED; 4996 + if (btf_is_kernel(reg->btf)) { 4997 + perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU; 4998 + 4999 + /* Only unreferenced case accepts untrusted pointers */ 5000 + if (kptr_field->type == BPF_KPTR_UNREF) 5001 + perm_flags |= PTR_UNTRUSTED; 5002 + } else { 5003 + perm_flags = PTR_MAYBE_NULL | MEM_ALLOC; 5004 + } 4999 5005 5000 5006 if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags)) 5001 5007 goto bad_type; 5002 5008 5003 - if (!btf_is_kernel(reg->btf)) { 5004 - verbose(env, "R%d must point to kernel BTF\n", regno); 5005 - return -EINVAL; 5006 - } 5007 5009 /* We need to verify reg->type and reg->btf, before accessing reg->btf */ 5008 5010 reg_name = btf_type_name(reg->btf, reg->btf_id); 5009 5011 ··· 5018 5016 if (__check_ptr_off_reg(env, reg, regno, true)) 5019 5017 return -EACCES; 5020 5018 5021 - /* A full type match is needed, as BTF can be vmlinux or module BTF, and 5019 + /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and 5022 5020 * we also need to take into account the reg->off. 5023 5021 * 5024 5022 * We want to support cases like: ··· 5064 5062 */ 5065 5063 static bool in_rcu_cs(struct bpf_verifier_env *env) 5066 5064 { 5067 - return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable; 5065 + return env->cur_state->active_rcu_lock || 5066 + env->cur_state->active_lock.ptr || 5067 + !env->prog->aux->sleepable; 5068 5068 } 5069 5069 5070 5070 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */ ··· 7920 7916 verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n"); 7921 7917 return -EFAULT; 7922 7918 } 7923 - /* Handled by helper specific checks */ 7919 + if (meta->func_id == BPF_FUNC_kptr_xchg) { 7920 + if (map_kptr_match_type(env, meta->kptr_field, reg, regno)) 7921 + return -EACCES; 7922 + } 7924 7923 break; 7925 7924 case PTR_TO_BTF_ID | MEM_PERCPU: 7926 7925 case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED: ··· 7975 7968 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK) 7976 7969 return 0; 7977 7970 7978 - if ((type_is_ptr_alloc_obj(type) || type_is_non_owning_ref(type)) && reg->off) { 7979 - if (reg_find_field_offset(reg, reg->off, BPF_GRAPH_NODE_OR_ROOT)) 7980 - return __check_ptr_off_reg(env, reg, regno, true); 7981 - 7982 - verbose(env, "R%d must have zero offset when passed to release func\n", 7983 - regno); 7984 - verbose(env, "No graph node or root found at R%d type:%s off:%d\n", regno, 7985 - btf_type_name(reg->btf, reg->btf_id), reg->off); 7986 - return -EINVAL; 7987 - } 7988 - 7989 7971 /* Doing check_ptr_off_reg check for the offset will catch this 7990 7972 * because fixed_off_ok is false, but checking here allows us 7991 7973 * to give the user a better error message. ··· 8009 8013 case PTR_TO_BTF_ID | PTR_TRUSTED: 8010 8014 case PTR_TO_BTF_ID | MEM_RCU: 8011 8015 case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF: 8016 + case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU: 8012 8017 /* When referenced PTR_TO_BTF_ID is passed to release function, 8013 8018 * its fixed offset must be 0. In the other cases, fixed offset 8014 8019 * can be non-zero. This was already checked above. So pass ··· 10476 10479 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) 10477 10480 { 10478 10481 struct bpf_verifier_state *state = env->cur_state; 10482 + struct btf_record *rec = reg_btf_record(reg); 10479 10483 10480 10484 if (!state->active_lock.ptr) { 10481 10485 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); ··· 10489 10491 } 10490 10492 10491 10493 reg->type |= NON_OWN_REF; 10494 + if (rec->refcount_off >= 0) 10495 + reg->type |= MEM_RCU; 10496 + 10492 10497 return 0; 10493 10498 } 10494 10499 ··· 11224 11223 verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i); 11225 11224 return -EINVAL; 11226 11225 } 11227 - if (rec->refcount_off >= 0) { 11228 - verbose(env, "bpf_refcount_acquire calls are disabled for now\n"); 11229 - return -EINVAL; 11230 - } 11226 + 11231 11227 meta->arg_btf = reg->btf; 11232 11228 meta->arg_btf_id = reg->btf_id; 11233 11229 break; ··· 11328 11330 if (env->cur_state->active_rcu_lock) { 11329 11331 struct bpf_func_state *state; 11330 11332 struct bpf_reg_state *reg; 11333 + 11334 + if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { 11335 + verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); 11336 + return -EACCES; 11337 + } 11331 11338 11332 11339 if (rcu_lock) { 11333 11340 verbose(env, "nested rcu read lock (kernel function %s)\n", func_name); ··· 14050 14047 return -EINVAL; 14051 14048 } 14052 14049 14050 + /* check src2 operand */ 14051 + err = check_reg_arg(env, insn->dst_reg, SRC_OP); 14052 + if (err) 14053 + return err; 14054 + 14055 + dst_reg = &regs[insn->dst_reg]; 14053 14056 if (BPF_SRC(insn->code) == BPF_X) { 14054 14057 if (insn->imm != 0) { 14055 14058 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); ··· 14067 14058 if (err) 14068 14059 return err; 14069 14060 14070 - if (is_pointer_value(env, insn->src_reg)) { 14061 + src_reg = &regs[insn->src_reg]; 14062 + if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) && 14063 + is_pointer_value(env, insn->src_reg)) { 14071 14064 verbose(env, "R%d pointer comparison prohibited\n", 14072 14065 insn->src_reg); 14073 14066 return -EACCES; 14074 14067 } 14075 - src_reg = &regs[insn->src_reg]; 14076 14068 } else { 14077 14069 if (insn->src_reg != BPF_REG_0) { 14078 14070 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); ··· 14081 14071 } 14082 14072 } 14083 14073 14084 - /* check src2 operand */ 14085 - err = check_reg_arg(env, insn->dst_reg, SRC_OP); 14086 - if (err) 14087 - return err; 14088 - 14089 - dst_reg = &regs[insn->dst_reg]; 14090 14074 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 14091 14075 14092 14076 if (BPF_SRC(insn->code) == BPF_K) { ··· 16696 16692 return -EINVAL; 16697 16693 } 16698 16694 16699 - if (env->cur_state->active_rcu_lock) { 16695 + if (env->cur_state->active_rcu_lock && 16696 + !in_rbtree_lock_required_cb(env)) { 16700 16697 verbose(env, "bpf_rcu_read_unlock is missing\n"); 16701 16698 return -EINVAL; 16702 16699 } ··· 16975 16970 16976 16971 if (is_tracing_prog_type(prog_type)) { 16977 16972 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); 16978 - return -EINVAL; 16979 - } 16980 - 16981 - if (prog->aux->sleepable) { 16982 - verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n"); 16983 16973 return -EINVAL; 16984 16974 } 16985 16975 } ··· 18281 18281 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 18282 18282 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) }; 18283 18283 18284 + if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] && 18285 + !kptr_struct_meta) { 18286 + verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n", 18287 + insn_idx); 18288 + return -EFAULT; 18289 + } 18290 + 18284 18291 insn_buf[0] = addr[0]; 18285 18292 insn_buf[1] = addr[1]; 18286 18293 insn_buf[2] = *insn; ··· 18295 18288 } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] || 18296 18289 desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] || 18297 18290 desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 18291 + struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta; 18298 18292 int struct_meta_reg = BPF_REG_3; 18299 18293 int node_offset_reg = BPF_REG_4; 18300 18294 ··· 18303 18295 if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) { 18304 18296 struct_meta_reg = BPF_REG_4; 18305 18297 node_offset_reg = BPF_REG_5; 18298 + } 18299 + 18300 + if (!kptr_struct_meta) { 18301 + verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n", 18302 + insn_idx); 18303 + return -EFAULT; 18306 18304 } 18307 18305 18308 18306 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,

+336 -6

kernel/trace/bpf_trace.c

··· 23 23 #include <linux/sort.h> 24 24 #include <linux/key.h> 25 25 #include <linux/verification.h> 26 + #include <linux/namei.h> 26 27 27 28 #include <net/bpf_sk_storage.h> 28 29 ··· 86 85 s32 *btf_id); 87 86 static u64 bpf_kprobe_multi_cookie(struct bpf_run_ctx *ctx); 88 87 static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 88 + 89 + static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx); 90 + static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx); 89 91 90 92 /** 91 93 * trace_call_bpf - invoke BPF program ··· 1107 1103 .arg1_type = ARG_PTR_TO_CTX, 1108 1104 }; 1109 1105 1106 + BPF_CALL_1(bpf_get_func_ip_uprobe_multi, struct pt_regs *, regs) 1107 + { 1108 + return bpf_uprobe_multi_entry_ip(current->bpf_ctx); 1109 + } 1110 + 1111 + static const struct bpf_func_proto bpf_get_func_ip_proto_uprobe_multi = { 1112 + .func = bpf_get_func_ip_uprobe_multi, 1113 + .gpl_only = false, 1114 + .ret_type = RET_INTEGER, 1115 + .arg1_type = ARG_PTR_TO_CTX, 1116 + }; 1117 + 1118 + BPF_CALL_1(bpf_get_attach_cookie_uprobe_multi, struct pt_regs *, regs) 1119 + { 1120 + return bpf_uprobe_multi_cookie(current->bpf_ctx); 1121 + } 1122 + 1123 + static const struct bpf_func_proto bpf_get_attach_cookie_proto_umulti = { 1124 + .func = bpf_get_attach_cookie_uprobe_multi, 1125 + .gpl_only = false, 1126 + .ret_type = RET_INTEGER, 1127 + .arg1_type = ARG_PTR_TO_CTX, 1128 + }; 1129 + 1110 1130 BPF_CALL_1(bpf_get_attach_cookie_trace, void *, ctx) 1111 1131 { 1112 1132 struct bpf_trace_run_ctx *run_ctx; ··· 1573 1545 return &bpf_override_return_proto; 1574 1546 #endif 1575 1547 case BPF_FUNC_get_func_ip: 1576 - return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ? 1577 - &bpf_get_func_ip_proto_kprobe_multi : 1578 - &bpf_get_func_ip_proto_kprobe; 1548 + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) 1549 + return &bpf_get_func_ip_proto_kprobe_multi; 1550 + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) 1551 + return &bpf_get_func_ip_proto_uprobe_multi; 1552 + return &bpf_get_func_ip_proto_kprobe; 1579 1553 case BPF_FUNC_get_attach_cookie: 1580 - return prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI ? 1581 - &bpf_get_attach_cookie_proto_kmulti : 1582 - &bpf_get_attach_cookie_proto_trace; 1554 + if (prog->expected_attach_type == BPF_TRACE_KPROBE_MULTI) 1555 + return &bpf_get_attach_cookie_proto_kmulti; 1556 + if (prog->expected_attach_type == BPF_TRACE_UPROBE_MULTI) 1557 + return &bpf_get_attach_cookie_proto_umulti; 1558 + return &bpf_get_attach_cookie_proto_trace; 1583 1559 default: 1584 1560 return bpf_tracing_func_proto(func_id, prog); 1585 1561 } ··· 3002 2970 return 0; 3003 2971 } 3004 2972 #endif 2973 + 2974 + #ifdef CONFIG_UPROBES 2975 + struct bpf_uprobe_multi_link; 2976 + 2977 + struct bpf_uprobe { 2978 + struct bpf_uprobe_multi_link *link; 2979 + loff_t offset; 2980 + u64 cookie; 2981 + struct uprobe_consumer consumer; 2982 + }; 2983 + 2984 + struct bpf_uprobe_multi_link { 2985 + struct path path; 2986 + struct bpf_link link; 2987 + u32 cnt; 2988 + struct bpf_uprobe *uprobes; 2989 + struct task_struct *task; 2990 + }; 2991 + 2992 + struct bpf_uprobe_multi_run_ctx { 2993 + struct bpf_run_ctx run_ctx; 2994 + unsigned long entry_ip; 2995 + struct bpf_uprobe *uprobe; 2996 + }; 2997 + 2998 + static void bpf_uprobe_unregister(struct path *path, struct bpf_uprobe *uprobes, 2999 + u32 cnt) 3000 + { 3001 + u32 i; 3002 + 3003 + for (i = 0; i < cnt; i++) { 3004 + uprobe_unregister(d_real_inode(path->dentry), uprobes[i].offset, 3005 + &uprobes[i].consumer); 3006 + } 3007 + } 3008 + 3009 + static void bpf_uprobe_multi_link_release(struct bpf_link *link) 3010 + { 3011 + struct bpf_uprobe_multi_link *umulti_link; 3012 + 3013 + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3014 + bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt); 3015 + } 3016 + 3017 + static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link) 3018 + { 3019 + struct bpf_uprobe_multi_link *umulti_link; 3020 + 3021 + umulti_link = container_of(link, struct bpf_uprobe_multi_link, link); 3022 + if (umulti_link->task) 3023 + put_task_struct(umulti_link->task); 3024 + path_put(&umulti_link->path); 3025 + kvfree(umulti_link->uprobes); 3026 + kfree(umulti_link); 3027 + } 3028 + 3029 + static const struct bpf_link_ops bpf_uprobe_multi_link_lops = { 3030 + .release = bpf_uprobe_multi_link_release, 3031 + .dealloc = bpf_uprobe_multi_link_dealloc, 3032 + }; 3033 + 3034 + static int uprobe_prog_run(struct bpf_uprobe *uprobe, 3035 + unsigned long entry_ip, 3036 + struct pt_regs *regs) 3037 + { 3038 + struct bpf_uprobe_multi_link *link = uprobe->link; 3039 + struct bpf_uprobe_multi_run_ctx run_ctx = { 3040 + .entry_ip = entry_ip, 3041 + .uprobe = uprobe, 3042 + }; 3043 + struct bpf_prog *prog = link->link.prog; 3044 + bool sleepable = prog->aux->sleepable; 3045 + struct bpf_run_ctx *old_run_ctx; 3046 + int err = 0; 3047 + 3048 + if (link->task && current != link->task) 3049 + return 0; 3050 + 3051 + if (sleepable) 3052 + rcu_read_lock_trace(); 3053 + else 3054 + rcu_read_lock(); 3055 + 3056 + migrate_disable(); 3057 + 3058 + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); 3059 + err = bpf_prog_run(link->link.prog, regs); 3060 + bpf_reset_run_ctx(old_run_ctx); 3061 + 3062 + migrate_enable(); 3063 + 3064 + if (sleepable) 3065 + rcu_read_unlock_trace(); 3066 + else 3067 + rcu_read_unlock(); 3068 + return err; 3069 + } 3070 + 3071 + static bool 3072 + uprobe_multi_link_filter(struct uprobe_consumer *con, enum uprobe_filter_ctx ctx, 3073 + struct mm_struct *mm) 3074 + { 3075 + struct bpf_uprobe *uprobe; 3076 + 3077 + uprobe = container_of(con, struct bpf_uprobe, consumer); 3078 + return uprobe->link->task->mm == mm; 3079 + } 3080 + 3081 + static int 3082 + uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs *regs) 3083 + { 3084 + struct bpf_uprobe *uprobe; 3085 + 3086 + uprobe = container_of(con, struct bpf_uprobe, consumer); 3087 + return uprobe_prog_run(uprobe, instruction_pointer(regs), regs); 3088 + } 3089 + 3090 + static int 3091 + uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) 3092 + { 3093 + struct bpf_uprobe *uprobe; 3094 + 3095 + uprobe = container_of(con, struct bpf_uprobe, consumer); 3096 + return uprobe_prog_run(uprobe, func, regs); 3097 + } 3098 + 3099 + static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3100 + { 3101 + struct bpf_uprobe_multi_run_ctx *run_ctx; 3102 + 3103 + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); 3104 + return run_ctx->entry_ip; 3105 + } 3106 + 3107 + static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3108 + { 3109 + struct bpf_uprobe_multi_run_ctx *run_ctx; 3110 + 3111 + run_ctx = container_of(current->bpf_ctx, struct bpf_uprobe_multi_run_ctx, run_ctx); 3112 + return run_ctx->uprobe->cookie; 3113 + } 3114 + 3115 + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3116 + { 3117 + struct bpf_uprobe_multi_link *link = NULL; 3118 + unsigned long __user *uref_ctr_offsets; 3119 + unsigned long *ref_ctr_offsets = NULL; 3120 + struct bpf_link_primer link_primer; 3121 + struct bpf_uprobe *uprobes = NULL; 3122 + struct task_struct *task = NULL; 3123 + unsigned long __user *uoffsets; 3124 + u64 __user *ucookies; 3125 + void __user *upath; 3126 + u32 flags, cnt, i; 3127 + struct path path; 3128 + char *name; 3129 + pid_t pid; 3130 + int err; 3131 + 3132 + /* no support for 32bit archs yet */ 3133 + if (sizeof(u64) != sizeof(void *)) 3134 + return -EOPNOTSUPP; 3135 + 3136 + if (prog->expected_attach_type != BPF_TRACE_UPROBE_MULTI) 3137 + return -EINVAL; 3138 + 3139 + flags = attr->link_create.uprobe_multi.flags; 3140 + if (flags & ~BPF_F_UPROBE_MULTI_RETURN) 3141 + return -EINVAL; 3142 + 3143 + /* 3144 + * path, offsets and cnt are mandatory, 3145 + * ref_ctr_offsets and cookies are optional 3146 + */ 3147 + upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); 3148 + uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); 3149 + cnt = attr->link_create.uprobe_multi.cnt; 3150 + 3151 + if (!upath || !uoffsets || !cnt) 3152 + return -EINVAL; 3153 + 3154 + uref_ctr_offsets = u64_to_user_ptr(attr->link_create.uprobe_multi.ref_ctr_offsets); 3155 + ucookies = u64_to_user_ptr(attr->link_create.uprobe_multi.cookies); 3156 + 3157 + name = strndup_user(upath, PATH_MAX); 3158 + if (IS_ERR(name)) { 3159 + err = PTR_ERR(name); 3160 + return err; 3161 + } 3162 + 3163 + err = kern_path(name, LOOKUP_FOLLOW, &path); 3164 + kfree(name); 3165 + if (err) 3166 + return err; 3167 + 3168 + if (!d_is_reg(path.dentry)) { 3169 + err = -EBADF; 3170 + goto error_path_put; 3171 + } 3172 + 3173 + pid = attr->link_create.uprobe_multi.pid; 3174 + if (pid) { 3175 + rcu_read_lock(); 3176 + task = get_pid_task(find_vpid(pid), PIDTYPE_PID); 3177 + rcu_read_unlock(); 3178 + if (!task) 3179 + goto error_path_put; 3180 + } 3181 + 3182 + err = -ENOMEM; 3183 + 3184 + link = kzalloc(sizeof(*link), GFP_KERNEL); 3185 + uprobes = kvcalloc(cnt, sizeof(*uprobes), GFP_KERNEL); 3186 + 3187 + if (!uprobes || !link) 3188 + goto error_free; 3189 + 3190 + if (uref_ctr_offsets) { 3191 + ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL); 3192 + if (!ref_ctr_offsets) 3193 + goto error_free; 3194 + } 3195 + 3196 + for (i = 0; i < cnt; i++) { 3197 + if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { 3198 + err = -EFAULT; 3199 + goto error_free; 3200 + } 3201 + if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) { 3202 + err = -EFAULT; 3203 + goto error_free; 3204 + } 3205 + if (__get_user(uprobes[i].offset, uoffsets + i)) { 3206 + err = -EFAULT; 3207 + goto error_free; 3208 + } 3209 + 3210 + uprobes[i].link = link; 3211 + 3212 + if (flags & BPF_F_UPROBE_MULTI_RETURN) 3213 + uprobes[i].consumer.ret_handler = uprobe_multi_link_ret_handler; 3214 + else 3215 + uprobes[i].consumer.handler = uprobe_multi_link_handler; 3216 + 3217 + if (pid) 3218 + uprobes[i].consumer.filter = uprobe_multi_link_filter; 3219 + } 3220 + 3221 + link->cnt = cnt; 3222 + link->uprobes = uprobes; 3223 + link->path = path; 3224 + link->task = task; 3225 + 3226 + bpf_link_init(&link->link, BPF_LINK_TYPE_UPROBE_MULTI, 3227 + &bpf_uprobe_multi_link_lops, prog); 3228 + 3229 + for (i = 0; i < cnt; i++) { 3230 + err = uprobe_register_refctr(d_real_inode(link->path.dentry), 3231 + uprobes[i].offset, 3232 + ref_ctr_offsets ? ref_ctr_offsets[i] : 0, 3233 + &uprobes[i].consumer); 3234 + if (err) { 3235 + bpf_uprobe_unregister(&path, uprobes, i); 3236 + goto error_free; 3237 + } 3238 + } 3239 + 3240 + err = bpf_link_prime(&link->link, &link_primer); 3241 + if (err) 3242 + goto error_free; 3243 + 3244 + kvfree(ref_ctr_offsets); 3245 + return bpf_link_settle(&link_primer); 3246 + 3247 + error_free: 3248 + kvfree(ref_ctr_offsets); 3249 + kvfree(uprobes); 3250 + kfree(link); 3251 + if (task) 3252 + put_task_struct(task); 3253 + error_path_put: 3254 + path_put(&path); 3255 + return err; 3256 + } 3257 + #else /* !CONFIG_UPROBES */ 3258 + int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) 3259 + { 3260 + return -EOPNOTSUPP; 3261 + } 3262 + static u64 bpf_uprobe_multi_cookie(struct bpf_run_ctx *ctx) 3263 + { 3264 + return 0; 3265 + } 3266 + static u64 bpf_uprobe_multi_entry_ip(struct bpf_run_ctx *ctx) 3267 + { 3268 + return 0; 3269 + } 3270 + #endif /* CONFIG_UPROBES */

+7 -5

lib/test_bpf.c

··· 596 596 { 597 597 static const s64 regs[] = { 598 598 0x0123456789abcdefLL, /* dword > 0, word < 0 */ 599 - 0xfedcba9876543210LL, /* dowrd < 0, word > 0 */ 600 - 0xfedcba0198765432LL, /* dowrd < 0, word < 0 */ 599 + 0xfedcba9876543210LL, /* dword < 0, word > 0 */ 600 + 0xfedcba0198765432LL, /* dword < 0, word < 0 */ 601 601 0x0123458967abcdefLL, /* dword > 0, word > 0 */ 602 602 }; 603 603 int bits = alu32 ? 32 : 64; ··· 14567 14567 if (ret == test->test[i].result) { 14568 14568 pr_cont("%lld ", duration); 14569 14569 } else { 14570 - pr_cont("ret %d != %d ", ret, 14571 - test->test[i].result); 14570 + s32 res = test->test[i].result; 14571 + 14572 + pr_cont("ret %d != %d (%#x != %#x)", 14573 + ret, res, ret, res); 14572 14574 err_cnt++; 14573 14575 } 14574 14576 } ··· 15047 15045 struct bpf_array *progs; 15048 15046 int which, err; 15049 15047 15050 - /* Allocate the table of programs to be used for tall calls */ 15048 + /* Allocate the table of programs to be used for tail calls */ 15051 15049 progs = kzalloc(struct_size(progs, ptrs, ntests + 1), GFP_KERNEL); 15052 15050 if (!progs) 15053 15051 goto out_nomem;

+3 -4

net/core/lwt_bpf.c

··· 60 60 ret = BPF_OK; 61 61 } else { 62 62 skb_reset_mac_header(skb); 63 - ret = skb_do_redirect(skb); 64 - if (ret == 0) 65 - ret = BPF_REDIRECT; 63 + skb_do_redirect(skb); 64 + ret = BPF_REDIRECT; 66 65 } 67 66 break; 68 67 ··· 254 255 255 256 err = dst_output(dev_net(skb_dst(skb)->dev), skb->sk, skb); 256 257 if (unlikely(err)) 257 - return err; 258 + return net_xmit_errno(err); 258 259 259 260 /* ip[6]_finish_output2 understand LWTUNNEL_XMIT_DONE */ 260 261 return LWTUNNEL_XMIT_DONE;

+1 -1

net/ipv4/ip_output.c

··· 216 216 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 217 217 int res = lwtunnel_xmit(skb); 218 218 219 - if (res < 0 || res == LWTUNNEL_XMIT_DONE) 219 + if (res != LWTUNNEL_XMIT_CONTINUE) 220 220 return res; 221 221 } 222 222

+1 -1

net/ipv6/ip6_output.c

··· 113 113 if (lwtunnel_xmit_redirect(dst->lwtstate)) { 114 114 int res = lwtunnel_xmit(skb); 115 115 116 - if (res < 0 || res == LWTUNNEL_XMIT_DONE) 116 + if (res != LWTUNNEL_XMIT_CONTINUE) 117 117 return res; 118 118 } 119 119

-12

samples/bpf/.gitignore

··· 37 37 tracex5 38 38 tracex6 39 39 tracex7 40 - xdp1 41 - xdp2 42 40 xdp_adjust_tail 43 41 xdp_fwd 44 - xdp_monitor 45 - xdp_redirect 46 - xdp_redirect_cpu 47 - xdp_redirect_map 48 - xdp_redirect_map_multi 49 42 xdp_router_ipv4 50 - xdp_rxq_info 51 - xdp_sample_pkts 52 43 xdp_tx_iptunnel 53 - xdpsock 54 - xdpsock_ctrl_proc 55 - xsk_fwd 56 44 testfile.img 57 45 hbm_out.log 58 46 iperf.*

+11 -57

samples/bpf/Makefile

··· 30 30 tprogs-y += test_cgrp2_attach 31 31 tprogs-y += test_cgrp2_sock 32 32 tprogs-y += test_cgrp2_sock2 33 - tprogs-y += xdp1 34 - tprogs-y += xdp2 35 33 tprogs-y += xdp_router_ipv4 36 34 tprogs-y += test_current_task_under_cgroup 37 35 tprogs-y += trace_event ··· 39 41 tprogs-y += xdp_tx_iptunnel 40 42 tprogs-y += test_map_in_map 41 43 tprogs-y += per_socket_stats_example 42 - tprogs-y += xdp_rxq_info 43 44 tprogs-y += syscall_tp 44 45 tprogs-y += cpustat 45 46 tprogs-y += xdp_adjust_tail 46 47 tprogs-y += xdp_fwd 47 48 tprogs-y += task_fd_query 48 - tprogs-y += xdp_sample_pkts 49 49 tprogs-y += ibumad 50 50 tprogs-y += hbm 51 - 52 - tprogs-y += xdp_redirect_cpu 53 - tprogs-y += xdp_redirect_map_multi 54 - tprogs-y += xdp_redirect_map 55 - tprogs-y += xdp_redirect 56 - tprogs-y += xdp_monitor 57 51 58 52 # Libbpf dependencies 59 53 LIBBPF_SRC = $(TOOLS_PATH)/lib/bpf ··· 80 90 test_cgrp2_attach-objs := test_cgrp2_attach.o 81 91 test_cgrp2_sock-objs := test_cgrp2_sock.o 82 92 test_cgrp2_sock2-objs := test_cgrp2_sock2.o 83 - xdp1-objs := xdp1_user.o 84 - # reuse xdp1 source intentionally 85 - xdp2-objs := xdp1_user.o 86 93 test_current_task_under_cgroup-objs := $(CGROUP_HELPERS) \ 87 94 test_current_task_under_cgroup_user.o 88 95 trace_event-objs := trace_event_user.o $(TRACE_HELPERS) ··· 89 102 xdp_tx_iptunnel-objs := xdp_tx_iptunnel_user.o 90 103 test_map_in_map-objs := test_map_in_map_user.o 91 104 per_socket_stats_example-objs := cookie_uid_helper_example.o 92 - xdp_rxq_info-objs := xdp_rxq_info_user.o 93 105 syscall_tp-objs := syscall_tp_user.o 94 106 cpustat-objs := cpustat_user.o 95 107 xdp_adjust_tail-objs := xdp_adjust_tail_user.o 96 108 xdp_fwd-objs := xdp_fwd_user.o 97 109 task_fd_query-objs := task_fd_query_user.o $(TRACE_HELPERS) 98 - xdp_sample_pkts-objs := xdp_sample_pkts_user.o 99 110 ibumad-objs := ibumad_user.o 100 111 hbm-objs := hbm.o $(CGROUP_HELPERS) 101 112 102 - xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o $(XDP_SAMPLE) 103 - xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o $(XDP_SAMPLE) 104 - xdp_redirect_map-objs := xdp_redirect_map_user.o $(XDP_SAMPLE) 105 - xdp_redirect-objs := xdp_redirect_user.o $(XDP_SAMPLE) 106 - xdp_monitor-objs := xdp_monitor_user.o $(XDP_SAMPLE) 107 113 xdp_router_ipv4-objs := xdp_router_ipv4_user.o $(XDP_SAMPLE) 108 114 109 115 # Tell kbuild to always build the programs ··· 104 124 always-y += sockex1_kern.o 105 125 always-y += sockex2_kern.o 106 126 always-y += sockex3_kern.o 107 - always-y += tracex1_kern.o 127 + always-y += tracex1.bpf.o 108 128 always-y += tracex2.bpf.o 109 - always-y += tracex3_kern.o 110 - always-y += tracex4_kern.o 111 - always-y += tracex5_kern.o 112 - always-y += tracex6_kern.o 113 - always-y += tracex7_kern.o 129 + always-y += tracex3.bpf.o 130 + always-y += tracex4.bpf.o 131 + always-y += tracex5.bpf.o 132 + always-y += tracex6.bpf.o 133 + always-y += tracex7.bpf.o 114 134 always-y += sock_flags.bpf.o 115 135 always-y += test_probe_write_user.bpf.o 116 136 always-y += trace_output.bpf.o 117 137 always-y += tcbpf1_kern.o 118 138 always-y += tc_l2_redirect_kern.o 119 139 always-y += lathist_kern.o 120 - always-y += offwaketime_kern.o 121 - always-y += spintest_kern.o 140 + always-y += offwaketime.bpf.o 141 + always-y += spintest.bpf.o 122 142 always-y += map_perf_test.bpf.o 123 143 always-y += test_overhead_tp.bpf.o 124 144 always-y += test_overhead_raw_tp.bpf.o 125 145 always-y += test_overhead_kprobe.bpf.o 126 146 always-y += parse_varlen.o parse_simple.o parse_ldabs.o 127 147 always-y += test_cgrp2_tc.bpf.o 128 - always-y += xdp1_kern.o 129 - always-y += xdp2_kern.o 130 148 always-y += test_current_task_under_cgroup.bpf.o 131 149 always-y += trace_event_kern.o 132 150 always-y += sampleip_kern.o ··· 140 162 always-y += tcp_basertt_kern.o 141 163 always-y += tcp_tos_reflect_kern.o 142 164 always-y += tcp_dumpstats_kern.o 143 - always-y += xdp_rxq_info_kern.o 144 165 always-y += xdp2skb_meta_kern.o 145 166 always-y += syscall_tp_kern.o 146 167 always-y += cpustat_kern.o 147 168 always-y += xdp_adjust_tail_kern.o 148 169 always-y += xdp_fwd_kern.o 149 170 always-y += task_fd_query_kern.o 150 - always-y += xdp_sample_pkts_kern.o 151 171 always-y += ibumad_kern.o 152 172 always-y += hbm_out_kern.o 153 173 always-y += hbm_edt_kern.o ··· 183 207 endif 184 208 185 209 TPROGS_LDLIBS += $(LIBBPF) -lelf -lz 186 - TPROGLDLIBS_xdp_monitor += -lm 187 - TPROGLDLIBS_xdp_redirect += -lm 188 - TPROGLDLIBS_xdp_redirect_cpu += -lm 189 - TPROGLDLIBS_xdp_redirect_map += -lm 190 - TPROGLDLIBS_xdp_redirect_map_multi += -lm 191 210 TPROGLDLIBS_xdp_router_ipv4 += -lm -pthread 192 211 TPROGLDLIBS_tracex4 += -lrt 193 212 TPROGLDLIBS_trace_output += -lrt ··· 297 326 298 327 .PHONY: libbpf_hdrs 299 328 300 - $(obj)/xdp_redirect_cpu_user.o: $(obj)/xdp_redirect_cpu.skel.h 301 - $(obj)/xdp_redirect_map_multi_user.o: $(obj)/xdp_redirect_map_multi.skel.h 302 - $(obj)/xdp_redirect_map_user.o: $(obj)/xdp_redirect_map.skel.h 303 - $(obj)/xdp_redirect_user.o: $(obj)/xdp_redirect.skel.h 304 - $(obj)/xdp_monitor_user.o: $(obj)/xdp_monitor.skel.h 305 329 $(obj)/xdp_router_ipv4_user.o: $(obj)/xdp_router_ipv4.skel.h 306 330 307 - $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h 331 + $(obj)/tracex5.bpf.o: $(obj)/syscall_nrs.h 308 332 $(obj)/hbm_out_kern.o: $(src)/hbm.h $(src)/hbm_kern.h 309 333 $(obj)/hbm.o: $(src)/hbm.h 310 334 $(obj)/hbm_edt_kern.o: $(src)/hbm.h $(src)/hbm_kern.h ··· 349 383 350 384 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) 351 385 352 - $(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o 353 - $(obj)/xdp_redirect_map_multi.bpf.o: $(obj)/xdp_sample.bpf.o 354 - $(obj)/xdp_redirect_map.bpf.o: $(obj)/xdp_sample.bpf.o 355 - $(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o 356 - $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o 357 386 $(obj)/xdp_router_ipv4.bpf.o: $(obj)/xdp_sample.bpf.o 358 387 359 388 $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/xdp_sample_shared.h ··· 359 398 -I$(LIBBPF_INCLUDE) $(CLANG_SYS_INCLUDES) \ 360 399 -c $(filter %.bpf.c,$^) -o $@ 361 400 362 - LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect_map_multi.skel.h \ 363 - xdp_redirect_map.skel.h xdp_redirect.skel.h xdp_monitor.skel.h \ 364 - xdp_router_ipv4.skel.h 401 + LINKED_SKELS := xdp_router_ipv4.skel.h 365 402 clean-files += $(LINKED_SKELS) 366 403 367 - xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o 368 - xdp_redirect_map_multi.skel.h-deps := xdp_redirect_map_multi.bpf.o xdp_sample.bpf.o 369 - xdp_redirect_map.skel.h-deps := xdp_redirect_map.bpf.o xdp_sample.bpf.o 370 - xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o 371 - xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o 372 404 xdp_router_ipv4.skel.h-deps := xdp_router_ipv4.bpf.o xdp_sample.bpf.o 373 405 374 406 LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.bpf.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps))) ··· 394 440 -Wno-gnu-variable-sized-type-not-at-end \ 395 441 -Wno-address-of-packed-member -Wno-tautological-compare \ 396 442 -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \ 397 - -fno-asynchronous-unwind-tables \ 443 + -fno-asynchronous-unwind-tables -fcf-protection \ 398 444 -I$(srctree)/samples/bpf/ -include asm_goto_workaround.h \ 399 445 -O2 -emit-llvm -Xclang -disable-llvm-passes -c $< -o - | \ 400 446 $(OPT) -O2 -mtriple=bpf-pc-linux | $(LLVM_DIS) | \

+6

samples/bpf/README.rst

··· 4 4 This directory contains a test stubs, verifier test-suite and examples 5 5 for using eBPF. The examples use libbpf from tools/lib/bpf. 6 6 7 + Note that the XDP-specific samples have been removed from this directory and 8 + moved to the xdp-tools repository: https://github.com/xdp-project/xdp-tools 9 + See the commit messages removing each tool from this directory for how to 10 + convert specific command invocations between the old samples and the utilities 11 + in xdp-tools. 12 + 7 13 Build dependencies 8 14 ================== 9 15

+2

samples/bpf/net_shared.h

··· 17 17 #define TC_ACT_OK 0 18 18 #define TC_ACT_SHOT 2 19 19 20 + #define IFNAMSIZ 16 21 + 20 22 #if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ 21 23 __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 22 24 #define bpf_ntohs(x) __builtin_bswap16(x)

+11 -28

samples/bpf/offwaketime_kern.c samples/bpf/offwaketime.bpf.c

··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <uapi/linux/bpf.h> 8 - #include <uapi/linux/ptrace.h> 9 - #include <uapi/linux/perf_event.h> 7 + #include "vmlinux.h" 10 8 #include <linux/version.h> 11 - #include <linux/sched.h> 12 9 #include <bpf/bpf_helpers.h> 13 10 #include <bpf/bpf_tracing.h> 11 + #include <bpf/bpf_core_read.h> 14 12 15 - #define _(P) \ 16 - ({ \ 17 - typeof(P) val; \ 18 - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 19 - val; \ 20 - }) 13 + #ifndef PERF_MAX_STACK_DEPTH 14 + #define PERF_MAX_STACK_DEPTH 127 15 + #endif 21 16 22 17 #define MINBLOCK_US 1 23 18 #define MAX_ENTRIES 10000 ··· 62 67 SEC("kprobe/try_to_wake_up") 63 68 int waker(struct pt_regs *ctx) 64 69 { 65 - struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 70 + struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx); 71 + u32 pid = BPF_CORE_READ(p, pid); 66 72 struct wokeby_t woke; 67 - u32 pid; 68 - 69 - pid = _(p->pid); 70 73 71 74 bpf_get_current_comm(&woke.name, sizeof(woke.name)); 72 75 woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); ··· 104 111 105 112 #if 1 106 113 /* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ 107 - struct sched_switch_args { 108 - unsigned long long pad; 109 - char prev_comm[TASK_COMM_LEN]; 110 - int prev_pid; 111 - int prev_prio; 112 - long long prev_state; 113 - char next_comm[TASK_COMM_LEN]; 114 - int next_pid; 115 - int next_prio; 116 - }; 117 114 SEC("tracepoint/sched/sched_switch") 118 - int oncpu(struct sched_switch_args *ctx) 115 + int oncpu(struct trace_event_raw_sched_switch *ctx) 119 116 { 120 117 /* record previous thread sleep time */ 121 118 u32 pid = ctx->prev_pid; 122 119 #else 123 - SEC("kprobe/finish_task_switch") 120 + SEC("kprobe.multi/finish_task_switch*") 124 121 int oncpu(struct pt_regs *ctx) 125 122 { 126 - struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 123 + struct task_struct *p = (void *)PT_REGS_PARM1_CORE(ctx); 127 124 /* record previous thread sleep time */ 128 - u32 pid = _(p->pid); 125 + u32 pid = BPF_CORE_READ(p, pid); 129 126 #endif 130 127 u64 delta, ts, *tsp; 131 128

+1 -1

samples/bpf/offwaketime_user.c

··· 105 105 return 2; 106 106 } 107 107 108 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 108 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 109 109 obj = bpf_object__open_file(filename, NULL); 110 110 if (libbpf_get_error(obj)) { 111 111 fprintf(stderr, "ERROR: opening BPF object file failed\n");

+9 -18

samples/bpf/spintest_kern.c samples/bpf/spintest.bpf.c

··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/skbuff.h> 8 - #include <linux/netdevice.h> 7 + #include "vmlinux.h" 9 8 #include <linux/version.h> 10 - #include <uapi/linux/bpf.h> 11 - #include <uapi/linux/perf_event.h> 12 9 #include <bpf/bpf_helpers.h> 13 10 #include <bpf/bpf_tracing.h> 11 + 12 + #ifndef PERF_MAX_STACK_DEPTH 13 + #define PERF_MAX_STACK_DEPTH 127 14 + #endif 14 15 15 16 struct { 16 17 __uint(type, BPF_MAP_TYPE_HASH); ··· 47 46 } 48 47 49 48 /* add kprobes to all possible *spin* functions */ 50 - SEC("kprobe/spin_unlock")PROG(p1) 51 - SEC("kprobe/spin_lock")PROG(p2) 52 - SEC("kprobe/mutex_spin_on_owner")PROG(p3) 53 - SEC("kprobe/rwsem_spin_on_owner")PROG(p4) 54 - SEC("kprobe/spin_unlock_irqrestore")PROG(p5) 55 - SEC("kprobe/_raw_spin_unlock_irqrestore")PROG(p6) 56 - SEC("kprobe/_raw_spin_unlock_bh")PROG(p7) 57 - SEC("kprobe/_raw_spin_unlock")PROG(p8) 58 - SEC("kprobe/_raw_spin_lock_irqsave")PROG(p9) 59 - SEC("kprobe/_raw_spin_trylock_bh")PROG(p10) 60 - SEC("kprobe/_raw_spin_lock_irq")PROG(p11) 61 - SEC("kprobe/_raw_spin_trylock")PROG(p12) 62 - SEC("kprobe/_raw_spin_lock")PROG(p13) 63 - SEC("kprobe/_raw_spin_lock_bh")PROG(p14) 49 + SEC("kprobe.multi/spin_*lock*")PROG(spin_lock) 50 + SEC("kprobe.multi/*_spin_on_owner")PROG(spin_on_owner) 51 + SEC("kprobe.multi/_raw_spin_*lock*")PROG(raw_spin_lock) 52 + 64 53 /* and to inner bpf helpers */ 65 54 SEC("kprobe/htab_map_update_elem")PROG(p15) 66 55 SEC("kprobe/__htab_percpu_map_update_elem")PROG(p16)

+8 -16

samples/bpf/spintest_user.c

··· 9 9 10 10 int main(int ac, char **argv) 11 11 { 12 - char filename[256], symbol[256]; 13 12 struct bpf_object *obj = NULL; 14 13 struct bpf_link *links[20]; 15 14 long key, next_key, value; 16 15 struct bpf_program *prog; 17 16 int map_fd, i, j = 0; 18 - const char *section; 17 + char filename[256]; 19 18 struct ksym *sym; 20 19 21 20 if (load_kallsyms()) { ··· 22 23 return 2; 23 24 } 24 25 25 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 26 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 26 27 obj = bpf_object__open_file(filename, NULL); 27 28 if (libbpf_get_error(obj)) { 28 29 fprintf(stderr, "ERROR: opening BPF object file failed\n"); ··· 43 44 } 44 45 45 46 bpf_object__for_each_program(prog, obj) { 46 - section = bpf_program__section_name(prog); 47 - if (sscanf(section, "kprobe/%s", symbol) != 1) 48 - continue; 49 - 50 - /* Attach prog only when symbol exists */ 51 - if (ksym_get_addr(symbol)) { 52 - links[j] = bpf_program__attach(prog); 53 - if (libbpf_get_error(links[j])) { 54 - fprintf(stderr, "bpf_program__attach failed\n"); 55 - links[j] = NULL; 56 - goto cleanup; 57 - } 58 - j++; 47 + links[j] = bpf_program__attach(prog); 48 + if (libbpf_get_error(links[j])) { 49 + fprintf(stderr, "bpf_program__attach failed\n"); 50 + links[j] = NULL; 51 + goto cleanup; 59 52 } 53 + j++; 60 54 } 61 55 62 56 for (i = 0; i < 5; i++) {

+3 -7

samples/bpf/test_map_in_map.bpf.c

··· 103 103 return result ? *result : -ENOENT; 104 104 } 105 105 106 - SEC("kprobe/__sys_connect") 107 - int trace_sys_connect(struct pt_regs *ctx) 106 + SEC("ksyscall/connect") 107 + int BPF_KSYSCALL(trace_sys_connect, unsigned int fd, struct sockaddr_in6 *in6, int addrlen) 108 108 { 109 - struct sockaddr_in6 *in6; 110 109 u16 test_case, port, dst6[8]; 111 - int addrlen, ret, inline_ret, ret_key = 0; 110 + int ret, inline_ret, ret_key = 0; 112 111 u32 port_key; 113 112 void *outer_map, *inner_map; 114 113 bool inline_hash = false; 115 - 116 - in6 = (struct sockaddr_in6 *)PT_REGS_PARM2_CORE(ctx); 117 - addrlen = (int)PT_REGS_PARM3_CORE(ctx); 118 114 119 115 if (addrlen != sizeof(*in6)) 120 116 return 0;

+7 -13

samples/bpf/test_overhead_kprobe.bpf.c

··· 8 8 #include <linux/version.h> 9 9 #include <bpf/bpf_helpers.h> 10 10 #include <bpf/bpf_tracing.h> 11 - 12 - #define _(P) \ 13 - ({ \ 14 - typeof(P) val = 0; \ 15 - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 16 - val; \ 17 - }) 11 + #include <bpf/bpf_core_read.h> 18 12 19 13 SEC("kprobe/__set_task_comm") 20 14 int prog(struct pt_regs *ctx) ··· 20 26 u16 oom_score_adj; 21 27 u32 pid; 22 28 23 - tsk = (void *)PT_REGS_PARM1(ctx); 29 + tsk = (void *)PT_REGS_PARM1_CORE(ctx); 24 30 25 - pid = _(tsk->pid); 26 - bpf_probe_read_kernel_str(oldcomm, sizeof(oldcomm), &tsk->comm); 27 - bpf_probe_read_kernel_str(newcomm, sizeof(newcomm), 31 + pid = BPF_CORE_READ(tsk, pid); 32 + bpf_core_read_str(oldcomm, sizeof(oldcomm), &tsk->comm); 33 + bpf_core_read_str(newcomm, sizeof(newcomm), 28 34 (void *)PT_REGS_PARM2(ctx)); 29 - signal = _(tsk->signal); 30 - oom_score_adj = _(signal->oom_score_adj); 35 + signal = BPF_CORE_READ(tsk, signal); 36 + oom_score_adj = BPF_CORE_READ(signal, oom_score_adj); 31 37 return 0; 32 38 } 33 39

+2 -27

samples/bpf/test_overhead_tp.bpf.c

··· 8 8 #include <bpf/bpf_helpers.h> 9 9 10 10 /* from /sys/kernel/tracing/events/task/task_rename/format */ 11 - struct task_rename { 12 - __u64 pad; 13 - __u32 pid; 14 - char oldcomm[TASK_COMM_LEN]; 15 - char newcomm[TASK_COMM_LEN]; 16 - __u16 oom_score_adj; 17 - }; 18 11 SEC("tracepoint/task/task_rename") 19 - int prog(struct task_rename *ctx) 12 + int prog(struct trace_event_raw_task_rename *ctx) 20 13 { 21 14 return 0; 22 15 } 23 16 24 17 /* from /sys/kernel/tracing/events/fib/fib_table_lookup/format */ 25 - struct fib_table_lookup { 26 - __u64 pad; 27 - __u32 tb_id; 28 - int err; 29 - int oif; 30 - int iif; 31 - __u8 proto; 32 - __u8 tos; 33 - __u8 scope; 34 - __u8 flags; 35 - __u8 src[4]; 36 - __u8 dst[4]; 37 - __u8 gw4[4]; 38 - __u8 gw6[16]; 39 - __u16 sport; 40 - __u16 dport; 41 - char name[16]; 42 - }; 43 18 SEC("tracepoint/fib/fib_table_lookup") 44 - int prog2(struct fib_table_lookup *ctx) 19 + int prog2(struct trace_event_raw_fib_table_lookup *ctx) 45 20 { 46 21 return 0; 47 22 }

+9 -16

samples/bpf/tracex1_kern.c samples/bpf/tracex1.bpf.c

··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/skbuff.h> 8 - #include <linux/netdevice.h> 9 - #include <uapi/linux/bpf.h> 7 + #include "vmlinux.h" 8 + #include "net_shared.h" 10 9 #include <linux/version.h> 11 10 #include <bpf/bpf_helpers.h> 11 + #include <bpf/bpf_core_read.h> 12 12 #include <bpf/bpf_tracing.h> 13 - 14 - #define _(P) \ 15 - ({ \ 16 - typeof(P) val = 0; \ 17 - bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 18 - val; \ 19 - }) 20 13 21 14 /* kprobe is NOT a stable ABI 22 15 * kernel functions can be removed, renamed or completely change semantics. 23 16 * Number of arguments and their positions can change, etc. 24 17 * In such case this bpf+kprobe example will no longer be meaningful 25 18 */ 26 - SEC("kprobe/__netif_receive_skb_core") 19 + SEC("kprobe.multi/__netif_receive_skb_core*") 27 20 int bpf_prog1(struct pt_regs *ctx) 28 21 { 29 22 /* attaches to kprobe __netif_receive_skb_core, 30 23 * looks for packets on loobpack device and prints them 24 + * (wildcard is used for avoiding symbol mismatch due to optimization) 31 25 */ 32 26 char devname[IFNAMSIZ]; 33 27 struct net_device *dev; 34 28 struct sk_buff *skb; 35 29 int len; 36 30 37 - /* non-portable! works for the given kernel only */ 38 - bpf_probe_read_kernel(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx)); 39 - dev = _(skb->dev); 40 - len = _(skb->len); 31 + bpf_core_read(&skb, sizeof(skb), (void *)PT_REGS_PARM1(ctx)); 32 + dev = BPF_CORE_READ(skb, dev); 33 + len = BPF_CORE_READ(skb, len); 41 34 42 - bpf_probe_read_kernel(devname, sizeof(devname), dev->name); 35 + BPF_CORE_READ_STR_INTO(&devname, dev, name); 43 36 44 37 if (devname[0] == 'l' && devname[1] == 'o') { 45 38 char fmt[] = "skb %p len %d\n";

+1 -1

samples/bpf/tracex1_user.c

··· 12 12 char filename[256]; 13 13 FILE *f; 14 14 15 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 15 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 16 16 obj = bpf_object__open_file(filename, NULL); 17 17 if (libbpf_get_error(obj)) { 18 18 fprintf(stderr, "ERROR: opening BPF object file failed\n");

+25 -15

samples/bpf/tracex3_kern.c samples/bpf/tracex3.bpf.c

··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/skbuff.h> 8 - #include <linux/netdevice.h> 7 + #include "vmlinux.h" 9 8 #include <linux/version.h> 10 - #include <uapi/linux/bpf.h> 11 9 #include <bpf/bpf_helpers.h> 12 10 #include <bpf/bpf_tracing.h> 11 + 12 + struct start_key { 13 + dev_t dev; 14 + u32 _pad; 15 + sector_t sector; 16 + }; 13 17 14 18 struct { 15 19 __uint(type, BPF_MAP_TYPE_HASH); ··· 22 18 __uint(max_entries, 4096); 23 19 } my_map SEC(".maps"); 24 20 25 - /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe 26 - * example will no longer be meaningful 27 - */ 28 - SEC("kprobe/blk_mq_start_request") 29 - int bpf_prog1(struct pt_regs *ctx) 21 + /* from /sys/kernel/tracing/events/block/block_io_start/format */ 22 + SEC("tracepoint/block/block_io_start") 23 + int bpf_prog1(struct trace_event_raw_block_rq *ctx) 30 24 { 31 - long rq = PT_REGS_PARM1(ctx); 32 25 u64 val = bpf_ktime_get_ns(); 26 + struct start_key key = { 27 + .dev = ctx->dev, 28 + .sector = ctx->sector 29 + }; 33 30 34 - bpf_map_update_elem(&my_map, &rq, &val, BPF_ANY); 31 + bpf_map_update_elem(&my_map, &key, &val, BPF_ANY); 35 32 return 0; 36 33 } 37 34 ··· 54 49 __uint(max_entries, SLOTS); 55 50 } lat_map SEC(".maps"); 56 51 57 - SEC("kprobe/__blk_account_io_done") 58 - int bpf_prog2(struct pt_regs *ctx) 52 + /* from /sys/kernel/tracing/events/block/block_io_done/format */ 53 + SEC("tracepoint/block/block_io_done") 54 + int bpf_prog2(struct trace_event_raw_block_rq *ctx) 59 55 { 60 - long rq = PT_REGS_PARM1(ctx); 56 + struct start_key key = { 57 + .dev = ctx->dev, 58 + .sector = ctx->sector 59 + }; 60 + 61 61 u64 *value, l, base; 62 62 u32 index; 63 63 64 - value = bpf_map_lookup_elem(&my_map, &rq); 64 + value = bpf_map_lookup_elem(&my_map, &key); 65 65 if (!value) 66 66 return 0; 67 67 68 68 u64 cur_time = bpf_ktime_get_ns(); 69 69 u64 delta = cur_time - *value; 70 70 71 - bpf_map_delete_elem(&my_map, &rq); 71 + bpf_map_delete_elem(&my_map, &key); 72 72 73 73 /* the lines below are computing index = log10(delta)*10 74 74 * using integer arithmetic

+1 -1

samples/bpf/tracex3_user.c

··· 125 125 } 126 126 } 127 127 128 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 128 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 129 129 obj = bpf_object__open_file(filename, NULL); 130 130 if (libbpf_get_error(obj)) { 131 131 fprintf(stderr, "ERROR: opening BPF object file failed\n");

+1 -2

samples/bpf/tracex4_kern.c samples/bpf/tracex4.bpf.c

··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/ptrace.h> 7 + #include "vmlinux.h" 8 8 #include <linux/version.h> 9 - #include <uapi/linux/bpf.h> 10 9 #include <bpf/bpf_helpers.h> 11 10 #include <bpf/bpf_tracing.h> 12 11

+1 -1

samples/bpf/tracex4_user.c

··· 53 53 char filename[256]; 54 54 int map_fd, j = 0; 55 55 56 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 56 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 57 57 obj = bpf_object__open_file(filename, NULL); 58 58 if (libbpf_get_error(obj)) { 59 59 fprintf(stderr, "ERROR: opening BPF object file failed\n");

+7 -7

samples/bpf/tracex5_kern.c samples/bpf/tracex5.bpf.c

··· 4 4 * modify it under the terms of version 2 of the GNU General Public 5 5 * License as published by the Free Software Foundation. 6 6 */ 7 - #include <linux/ptrace.h> 8 - #include <linux/version.h> 9 - #include <uapi/linux/bpf.h> 10 - #include <uapi/linux/seccomp.h> 11 - #include <uapi/linux/unistd.h> 7 + #include "vmlinux.h" 12 8 #include "syscall_nrs.h" 9 + #include <linux/version.h> 10 + #include <uapi/linux/unistd.h> 13 11 #include <bpf/bpf_helpers.h> 14 12 #include <bpf/bpf_tracing.h> 13 + #include <bpf/bpf_core_read.h> 15 14 15 + #define __stringify(x) #x 16 16 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F 17 17 18 18 struct { ··· 47 47 { 48 48 struct seccomp_data sd; 49 49 50 - bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 50 + bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 51 51 if (sd.args[2] == 512) { 52 52 char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; 53 53 bpf_trace_printk(fmt, sizeof(fmt), ··· 60 60 { 61 61 struct seccomp_data sd; 62 62 63 - bpf_probe_read_kernel(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 63 + bpf_core_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); 64 64 if (sd.args[2] > 128 && sd.args[2] <= 1024) { 65 65 char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; 66 66 bpf_trace_printk(fmt, sizeof(fmt),

+1 -1

samples/bpf/tracex5_user.c

··· 42 42 char filename[256]; 43 43 FILE *f; 44 44 45 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 45 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 46 46 obj = bpf_object__open_file(filename, NULL); 47 47 if (libbpf_get_error(obj)) { 48 48 fprintf(stderr, "ERROR: opening BPF object file failed\n");

+16 -4

samples/bpf/tracex6_kern.c samples/bpf/tracex6.bpf.c

··· 1 - #include <linux/ptrace.h> 1 + #include "vmlinux.h" 2 2 #include <linux/version.h> 3 - #include <uapi/linux/bpf.h> 4 3 #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + #include <bpf/bpf_core_read.h> 5 6 6 7 struct { 7 8 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); ··· 46 45 return 0; 47 46 } 48 47 49 - SEC("kprobe/htab_map_lookup_elem") 50 - int bpf_prog2(struct pt_regs *ctx) 48 + /* 49 + * Since *_map_lookup_elem can't be expected to trigger bpf programs 50 + * due to potential deadlocks (bpf_disable_instrumentation), this bpf 51 + * program will be attached to bpf_map_copy_value (which is called 52 + * from map_lookup_elem) and will only filter the hashtable type. 53 + */ 54 + SEC("kprobe/bpf_map_copy_value") 55 + int BPF_KPROBE(bpf_prog2, struct bpf_map *map) 51 56 { 52 57 u32 key = bpf_get_smp_processor_id(); 53 58 struct bpf_perf_event_value *val, buf; 59 + enum bpf_map_type type; 54 60 int error; 61 + 62 + type = BPF_CORE_READ(map, map_type); 63 + if (type != BPF_MAP_TYPE_HASH) 64 + return 0; 55 65 56 66 error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf)); 57 67 if (error)

+1 -1

samples/bpf/tracex6_user.c

··· 180 180 char filename[256]; 181 181 int i = 0; 182 182 183 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 183 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 184 184 obj = bpf_object__open_file(filename, NULL); 185 185 if (libbpf_get_error(obj)) { 186 186 fprintf(stderr, "ERROR: opening BPF object file failed\n");

+1 -2

samples/bpf/tracex7_kern.c samples/bpf/tracex7.bpf.c

··· 1 - #include <uapi/linux/ptrace.h> 2 - #include <uapi/linux/bpf.h> 1 + #include "vmlinux.h" 3 2 #include <linux/version.h> 4 3 #include <bpf/bpf_helpers.h> 5 4

+1 -1

samples/bpf/tracex7_user.c

··· 19 19 return 0; 20 20 } 21 21 22 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 22 + snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]); 23 23 obj = bpf_object__open_file(filename, NULL); 24 24 if (libbpf_get_error(obj)) { 25 25 fprintf(stderr, "ERROR: opening BPF object file failed\n");

-100

samples/bpf/xdp1_kern.c

··· 1 - /* Copyright (c) 2016 PLUMgrid 2 - * 3 - * This program is free software; you can redistribute it and/or 4 - * modify it under the terms of version 2 of the GNU General Public 5 - * License as published by the Free Software Foundation. 6 - */ 7 - #define KBUILD_MODNAME "foo" 8 - #include <uapi/linux/bpf.h> 9 - #include <linux/in.h> 10 - #include <linux/if_ether.h> 11 - #include <linux/if_packet.h> 12 - #include <linux/if_vlan.h> 13 - #include <linux/ip.h> 14 - #include <linux/ipv6.h> 15 - #include <bpf/bpf_helpers.h> 16 - 17 - struct { 18 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 19 - __type(key, u32); 20 - __type(value, long); 21 - __uint(max_entries, 256); 22 - } rxcnt SEC(".maps"); 23 - 24 - static int parse_ipv4(void *data, u64 nh_off, void *data_end) 25 - { 26 - struct iphdr *iph = data + nh_off; 27 - 28 - if (iph + 1 > data_end) 29 - return 0; 30 - return iph->protocol; 31 - } 32 - 33 - static int parse_ipv6(void *data, u64 nh_off, void *data_end) 34 - { 35 - struct ipv6hdr *ip6h = data + nh_off; 36 - 37 - if (ip6h + 1 > data_end) 38 - return 0; 39 - return ip6h->nexthdr; 40 - } 41 - 42 - #define XDPBUFSIZE 60 43 - SEC("xdp.frags") 44 - int xdp_prog1(struct xdp_md *ctx) 45 - { 46 - __u8 pkt[XDPBUFSIZE] = {}; 47 - void *data_end = &pkt[XDPBUFSIZE-1]; 48 - void *data = pkt; 49 - struct ethhdr *eth = data; 50 - int rc = XDP_DROP; 51 - long *value; 52 - u16 h_proto; 53 - u64 nh_off; 54 - u32 ipproto; 55 - 56 - if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) 57 - return rc; 58 - 59 - nh_off = sizeof(*eth); 60 - if (data + nh_off > data_end) 61 - return rc; 62 - 63 - h_proto = eth->h_proto; 64 - 65 - /* Handle VLAN tagged packet */ 66 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 67 - struct vlan_hdr *vhdr; 68 - 69 - vhdr = data + nh_off; 70 - nh_off += sizeof(struct vlan_hdr); 71 - if (data + nh_off > data_end) 72 - return rc; 73 - h_proto = vhdr->h_vlan_encapsulated_proto; 74 - } 75 - /* Handle double VLAN tagged packet */ 76 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 77 - struct vlan_hdr *vhdr; 78 - 79 - vhdr = data + nh_off; 80 - nh_off += sizeof(struct vlan_hdr); 81 - if (data + nh_off > data_end) 82 - return rc; 83 - h_proto = vhdr->h_vlan_encapsulated_proto; 84 - } 85 - 86 - if (h_proto == htons(ETH_P_IP)) 87 - ipproto = parse_ipv4(data, nh_off, data_end); 88 - else if (h_proto == htons(ETH_P_IPV6)) 89 - ipproto = parse_ipv6(data, nh_off, data_end); 90 - else 91 - ipproto = 0; 92 - 93 - value = bpf_map_lookup_elem(&rxcnt, &ipproto); 94 - if (value) 95 - *value += 1; 96 - 97 - return rc; 98 - } 99 - 100 - char _license[] SEC("license") = "GPL";

-166

samples/bpf/xdp1_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright (c) 2016 PLUMgrid 3 - */ 4 - #include <linux/bpf.h> 5 - #include <linux/if_link.h> 6 - #include <assert.h> 7 - #include <errno.h> 8 - #include <signal.h> 9 - #include <stdio.h> 10 - #include <stdlib.h> 11 - #include <string.h> 12 - #include <unistd.h> 13 - #include <libgen.h> 14 - #include <net/if.h> 15 - 16 - #include "bpf_util.h" 17 - #include <bpf/bpf.h> 18 - #include <bpf/libbpf.h> 19 - 20 - static int ifindex; 21 - static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 22 - static __u32 prog_id; 23 - 24 - static void int_exit(int sig) 25 - { 26 - __u32 curr_prog_id = 0; 27 - 28 - if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 29 - printf("bpf_xdp_query_id failed\n"); 30 - exit(1); 31 - } 32 - if (prog_id == curr_prog_id) 33 - bpf_xdp_detach(ifindex, xdp_flags, NULL); 34 - else if (!curr_prog_id) 35 - printf("couldn't find a prog id on a given interface\n"); 36 - else 37 - printf("program on interface changed, not removing\n"); 38 - exit(0); 39 - } 40 - 41 - /* simple per-protocol drop counter 42 - */ 43 - static void poll_stats(int map_fd, int interval) 44 - { 45 - unsigned int nr_cpus = bpf_num_possible_cpus(); 46 - __u64 values[nr_cpus], prev[UINT8_MAX] = { 0 }; 47 - int i; 48 - 49 - while (1) { 50 - __u32 key = UINT32_MAX; 51 - 52 - sleep(interval); 53 - 54 - while (bpf_map_get_next_key(map_fd, &key, &key) == 0) { 55 - __u64 sum = 0; 56 - 57 - assert(bpf_map_lookup_elem(map_fd, &key, values) == 0); 58 - for (i = 0; i < nr_cpus; i++) 59 - sum += values[i]; 60 - if (sum > prev[key]) 61 - printf("proto %u: %10llu pkt/s\n", 62 - key, (sum - prev[key]) / interval); 63 - prev[key] = sum; 64 - } 65 - } 66 - } 67 - 68 - static void usage(const char *prog) 69 - { 70 - fprintf(stderr, 71 - "usage: %s [OPTS] IFACE\n\n" 72 - "OPTS:\n" 73 - " -S use skb-mode\n" 74 - " -N enforce native mode\n" 75 - " -F force loading prog\n", 76 - prog); 77 - } 78 - 79 - int main(int argc, char **argv) 80 - { 81 - struct bpf_prog_info info = {}; 82 - __u32 info_len = sizeof(info); 83 - const char *optstr = "FSN"; 84 - int prog_fd, map_fd, opt; 85 - struct bpf_program *prog; 86 - struct bpf_object *obj; 87 - struct bpf_map *map; 88 - char filename[256]; 89 - int err; 90 - 91 - while ((opt = getopt(argc, argv, optstr)) != -1) { 92 - switch (opt) { 93 - case 'S': 94 - xdp_flags |= XDP_FLAGS_SKB_MODE; 95 - break; 96 - case 'N': 97 - /* default, set below */ 98 - break; 99 - case 'F': 100 - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 101 - break; 102 - default: 103 - usage(basename(argv[0])); 104 - return 1; 105 - } 106 - } 107 - 108 - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 109 - xdp_flags |= XDP_FLAGS_DRV_MODE; 110 - 111 - if (optind == argc) { 112 - usage(basename(argv[0])); 113 - return 1; 114 - } 115 - 116 - ifindex = if_nametoindex(argv[optind]); 117 - if (!ifindex) { 118 - perror("if_nametoindex"); 119 - return 1; 120 - } 121 - 122 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 123 - obj = bpf_object__open_file(filename, NULL); 124 - if (libbpf_get_error(obj)) 125 - return 1; 126 - 127 - prog = bpf_object__next_program(obj, NULL); 128 - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); 129 - 130 - err = bpf_object__load(obj); 131 - if (err) 132 - return 1; 133 - 134 - prog_fd = bpf_program__fd(prog); 135 - 136 - map = bpf_object__next_map(obj, NULL); 137 - if (!map) { 138 - printf("finding a map in obj file failed\n"); 139 - return 1; 140 - } 141 - map_fd = bpf_map__fd(map); 142 - 143 - if (!prog_fd) { 144 - printf("bpf_prog_load_xattr: %s\n", strerror(errno)); 145 - return 1; 146 - } 147 - 148 - signal(SIGINT, int_exit); 149 - signal(SIGTERM, int_exit); 150 - 151 - if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 152 - printf("link set xdp fd failed\n"); 153 - return 1; 154 - } 155 - 156 - err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 157 - if (err) { 158 - printf("can't get prog info - %s\n", strerror(errno)); 159 - return err; 160 - } 161 - prog_id = info.id; 162 - 163 - poll_stats(map_fd, 1); 164 - 165 - return 0; 166 - }

-125

samples/bpf/xdp2_kern.c

··· 1 - /* Copyright (c) 2016 PLUMgrid 2 - * 3 - * This program is free software; you can redistribute it and/or 4 - * modify it under the terms of version 2 of the GNU General Public 5 - * License as published by the Free Software Foundation. 6 - */ 7 - #define KBUILD_MODNAME "foo" 8 - #include <uapi/linux/bpf.h> 9 - #include <linux/in.h> 10 - #include <linux/if_ether.h> 11 - #include <linux/if_packet.h> 12 - #include <linux/if_vlan.h> 13 - #include <linux/ip.h> 14 - #include <linux/ipv6.h> 15 - #include <bpf/bpf_helpers.h> 16 - 17 - struct { 18 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 19 - __type(key, u32); 20 - __type(value, long); 21 - __uint(max_entries, 256); 22 - } rxcnt SEC(".maps"); 23 - 24 - static void swap_src_dst_mac(void *data) 25 - { 26 - unsigned short *p = data; 27 - unsigned short dst[3]; 28 - 29 - dst[0] = p[0]; 30 - dst[1] = p[1]; 31 - dst[2] = p[2]; 32 - p[0] = p[3]; 33 - p[1] = p[4]; 34 - p[2] = p[5]; 35 - p[3] = dst[0]; 36 - p[4] = dst[1]; 37 - p[5] = dst[2]; 38 - } 39 - 40 - static int parse_ipv4(void *data, u64 nh_off, void *data_end) 41 - { 42 - struct iphdr *iph = data + nh_off; 43 - 44 - if (iph + 1 > data_end) 45 - return 0; 46 - return iph->protocol; 47 - } 48 - 49 - static int parse_ipv6(void *data, u64 nh_off, void *data_end) 50 - { 51 - struct ipv6hdr *ip6h = data + nh_off; 52 - 53 - if (ip6h + 1 > data_end) 54 - return 0; 55 - return ip6h->nexthdr; 56 - } 57 - 58 - #define XDPBUFSIZE 60 59 - SEC("xdp.frags") 60 - int xdp_prog1(struct xdp_md *ctx) 61 - { 62 - __u8 pkt[XDPBUFSIZE] = {}; 63 - void *data_end = &pkt[XDPBUFSIZE-1]; 64 - void *data = pkt; 65 - struct ethhdr *eth = data; 66 - int rc = XDP_DROP; 67 - long *value; 68 - u16 h_proto; 69 - u64 nh_off; 70 - u32 ipproto; 71 - 72 - if (bpf_xdp_load_bytes(ctx, 0, pkt, sizeof(pkt))) 73 - return rc; 74 - 75 - nh_off = sizeof(*eth); 76 - if (data + nh_off > data_end) 77 - return rc; 78 - 79 - h_proto = eth->h_proto; 80 - 81 - /* Handle VLAN tagged packet */ 82 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 83 - struct vlan_hdr *vhdr; 84 - 85 - vhdr = data + nh_off; 86 - nh_off += sizeof(struct vlan_hdr); 87 - if (data + nh_off > data_end) 88 - return rc; 89 - h_proto = vhdr->h_vlan_encapsulated_proto; 90 - } 91 - /* Handle double VLAN tagged packet */ 92 - if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { 93 - struct vlan_hdr *vhdr; 94 - 95 - vhdr = data + nh_off; 96 - nh_off += sizeof(struct vlan_hdr); 97 - if (data + nh_off > data_end) 98 - return rc; 99 - h_proto = vhdr->h_vlan_encapsulated_proto; 100 - } 101 - 102 - if (h_proto == htons(ETH_P_IP)) 103 - ipproto = parse_ipv4(data, nh_off, data_end); 104 - else if (h_proto == htons(ETH_P_IPV6)) 105 - ipproto = parse_ipv6(data, nh_off, data_end); 106 - else 107 - ipproto = 0; 108 - 109 - value = bpf_map_lookup_elem(&rxcnt, &ipproto); 110 - if (value) 111 - *value += 1; 112 - 113 - if (ipproto == IPPROTO_UDP) { 114 - swap_src_dst_mac(data); 115 - 116 - if (bpf_xdp_store_bytes(ctx, 0, pkt, sizeof(pkt))) 117 - return rc; 118 - 119 - rc = XDP_TX; 120 - } 121 - 122 - return rc; 123 - } 124 - 125 - char _license[] SEC("license") = "GPL";

-8

samples/bpf/xdp_monitor.bpf.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* Copyright(c) 2017-2018 Jesper Dangaard Brouer, Red Hat Inc. 3 - * 4 - * XDP monitor tool, based on tracepoints 5 - */ 6 - #include "xdp_sample.bpf.h" 7 - 8 - char _license[] SEC("license") = "GPL";

-118

samples/bpf/xdp_monitor_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. */ 3 - static const char *__doc__= 4 - "XDP monitor tool, based on tracepoints\n"; 5 - 6 - static const char *__doc_err_only__= 7 - " NOTICE: Only tracking XDP redirect errors\n" 8 - " Enable redirect success stats via '-s/--stats'\n" 9 - " (which comes with a per packet processing overhead)\n"; 10 - 11 - #include <errno.h> 12 - #include <stdio.h> 13 - #include <stdlib.h> 14 - #include <stdbool.h> 15 - #include <stdint.h> 16 - #include <string.h> 17 - #include <ctype.h> 18 - #include <unistd.h> 19 - #include <locale.h> 20 - #include <getopt.h> 21 - #include <net/if.h> 22 - #include <time.h> 23 - #include <signal.h> 24 - #include <bpf/bpf.h> 25 - #include <bpf/libbpf.h> 26 - #include "bpf_util.h" 27 - #include "xdp_sample_user.h" 28 - #include "xdp_monitor.skel.h" 29 - 30 - static int mask = SAMPLE_REDIRECT_ERR_CNT | SAMPLE_CPUMAP_ENQUEUE_CNT | 31 - SAMPLE_CPUMAP_KTHREAD_CNT | SAMPLE_EXCEPTION_CNT | 32 - SAMPLE_DEVMAP_XMIT_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; 33 - 34 - DEFINE_SAMPLE_INIT(xdp_monitor); 35 - 36 - static const struct option long_options[] = { 37 - { "help", no_argument, NULL, 'h' }, 38 - { "stats", no_argument, NULL, 's' }, 39 - { "interval", required_argument, NULL, 'i' }, 40 - { "verbose", no_argument, NULL, 'v' }, 41 - {} 42 - }; 43 - 44 - int main(int argc, char **argv) 45 - { 46 - unsigned long interval = 2; 47 - int ret = EXIT_FAIL_OPTION; 48 - struct xdp_monitor *skel; 49 - bool errors_only = true; 50 - int longindex = 0, opt; 51 - bool error = true; 52 - 53 - /* Parse commands line args */ 54 - while ((opt = getopt_long(argc, argv, "si:vh", 55 - long_options, &longindex)) != -1) { 56 - switch (opt) { 57 - case 's': 58 - errors_only = false; 59 - mask |= SAMPLE_REDIRECT_CNT; 60 - break; 61 - case 'i': 62 - interval = strtoul(optarg, NULL, 0); 63 - break; 64 - case 'v': 65 - sample_switch_mode(); 66 - break; 67 - case 'h': 68 - error = false; 69 - default: 70 - sample_usage(argv, long_options, __doc__, mask, error); 71 - return ret; 72 - } 73 - } 74 - 75 - skel = xdp_monitor__open(); 76 - if (!skel) { 77 - fprintf(stderr, "Failed to xdp_monitor__open: %s\n", 78 - strerror(errno)); 79 - ret = EXIT_FAIL_BPF; 80 - goto end; 81 - } 82 - 83 - ret = sample_init_pre_load(skel); 84 - if (ret < 0) { 85 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 86 - ret = EXIT_FAIL_BPF; 87 - goto end_destroy; 88 - } 89 - 90 - ret = xdp_monitor__load(skel); 91 - if (ret < 0) { 92 - fprintf(stderr, "Failed to xdp_monitor__load: %s\n", strerror(errno)); 93 - ret = EXIT_FAIL_BPF; 94 - goto end_destroy; 95 - } 96 - 97 - ret = sample_init(skel, mask); 98 - if (ret < 0) { 99 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 100 - ret = EXIT_FAIL_BPF; 101 - goto end_destroy; 102 - } 103 - 104 - if (errors_only) 105 - printf("%s", __doc_err_only__); 106 - 107 - ret = sample_run(interval, NULL, NULL); 108 - if (ret < 0) { 109 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 110 - ret = EXIT_FAIL; 111 - goto end_destroy; 112 - } 113 - ret = EXIT_OK; 114 - end_destroy: 115 - xdp_monitor__destroy(skel); 116 - end: 117 - sample_exit(ret); 118 - }

-49

samples/bpf/xdp_redirect.bpf.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - /* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> 3 - * 4 - * This program is free software; you can redistribute it and/or 5 - * modify it under the terms of version 2 of the GNU General Public 6 - * License as published by the Free Software Foundation. 7 - * 8 - * This program is distributed in the hope that it will be useful, but 9 - * WITHOUT ANY WARRANTY; without even the implied warranty of 10 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 - * General Public License for more details. 12 - */ 13 - #include "vmlinux.h" 14 - #include "xdp_sample.bpf.h" 15 - #include "xdp_sample_shared.h" 16 - 17 - const volatile int ifindex_out; 18 - 19 - SEC("xdp") 20 - int xdp_redirect_prog(struct xdp_md *ctx) 21 - { 22 - void *data_end = (void *)(long)ctx->data_end; 23 - void *data = (void *)(long)ctx->data; 24 - u32 key = bpf_get_smp_processor_id(); 25 - struct ethhdr *eth = data; 26 - struct datarec *rec; 27 - u64 nh_off; 28 - 29 - nh_off = sizeof(*eth); 30 - if (data + nh_off > data_end) 31 - return XDP_DROP; 32 - 33 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 34 - if (!rec) 35 - return XDP_PASS; 36 - NO_TEAR_INC(rec->processed); 37 - 38 - swap_src_dst_mac(data); 39 - return bpf_redirect(ifindex_out, 0); 40 - } 41 - 42 - /* Redirect require an XDP bpf_prog loaded on the TX device */ 43 - SEC("xdp") 44 - int xdp_redirect_dummy_prog(struct xdp_md *ctx) 45 - { 46 - return XDP_PASS; 47 - } 48 - 49 - char _license[] SEC("license") = "GPL";

-539

samples/bpf/xdp_redirect_cpu.bpf.c

··· 1 - /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) 2 - * 3 - * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 4 - */ 5 - #include "vmlinux.h" 6 - #include "xdp_sample.bpf.h" 7 - #include "xdp_sample_shared.h" 8 - #include "hash_func01.h" 9 - 10 - /* Special map type that can XDP_REDIRECT frames to another CPU */ 11 - struct { 12 - __uint(type, BPF_MAP_TYPE_CPUMAP); 13 - __uint(key_size, sizeof(u32)); 14 - __uint(value_size, sizeof(struct bpf_cpumap_val)); 15 - } cpu_map SEC(".maps"); 16 - 17 - /* Set of maps controlling available CPU, and for iterating through 18 - * selectable redirect CPUs. 19 - */ 20 - struct { 21 - __uint(type, BPF_MAP_TYPE_ARRAY); 22 - __type(key, u32); 23 - __type(value, u32); 24 - } cpus_available SEC(".maps"); 25 - 26 - struct { 27 - __uint(type, BPF_MAP_TYPE_ARRAY); 28 - __type(key, u32); 29 - __type(value, u32); 30 - __uint(max_entries, 1); 31 - } cpus_count SEC(".maps"); 32 - 33 - struct { 34 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 35 - __type(key, u32); 36 - __type(value, u32); 37 - __uint(max_entries, 1); 38 - } cpus_iterator SEC(".maps"); 39 - 40 - struct { 41 - __uint(type, BPF_MAP_TYPE_DEVMAP); 42 - __uint(key_size, sizeof(int)); 43 - __uint(value_size, sizeof(struct bpf_devmap_val)); 44 - __uint(max_entries, 1); 45 - } tx_port SEC(".maps"); 46 - 47 - char tx_mac_addr[ETH_ALEN]; 48 - 49 - /* Helper parse functions */ 50 - 51 - static __always_inline 52 - bool parse_eth(struct ethhdr *eth, void *data_end, 53 - u16 *eth_proto, u64 *l3_offset) 54 - { 55 - u16 eth_type; 56 - u64 offset; 57 - 58 - offset = sizeof(*eth); 59 - if ((void *)eth + offset > data_end) 60 - return false; 61 - 62 - eth_type = eth->h_proto; 63 - 64 - /* Skip non 802.3 Ethertypes */ 65 - if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0)) 66 - return false; 67 - 68 - /* Handle VLAN tagged packet */ 69 - if (eth_type == bpf_htons(ETH_P_8021Q) || 70 - eth_type == bpf_htons(ETH_P_8021AD)) { 71 - struct vlan_hdr *vlan_hdr; 72 - 73 - vlan_hdr = (void *)eth + offset; 74 - offset += sizeof(*vlan_hdr); 75 - if ((void *)eth + offset > data_end) 76 - return false; 77 - eth_type = vlan_hdr->h_vlan_encapsulated_proto; 78 - } 79 - /* Handle double VLAN tagged packet */ 80 - if (eth_type == bpf_htons(ETH_P_8021Q) || 81 - eth_type == bpf_htons(ETH_P_8021AD)) { 82 - struct vlan_hdr *vlan_hdr; 83 - 84 - vlan_hdr = (void *)eth + offset; 85 - offset += sizeof(*vlan_hdr); 86 - if ((void *)eth + offset > data_end) 87 - return false; 88 - eth_type = vlan_hdr->h_vlan_encapsulated_proto; 89 - } 90 - 91 - *eth_proto = bpf_ntohs(eth_type); 92 - *l3_offset = offset; 93 - return true; 94 - } 95 - 96 - static __always_inline 97 - u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) 98 - { 99 - void *data_end = (void *)(long)ctx->data_end; 100 - void *data = (void *)(long)ctx->data; 101 - struct iphdr *iph = data + nh_off; 102 - struct udphdr *udph; 103 - 104 - if (iph + 1 > data_end) 105 - return 0; 106 - if (!(iph->protocol == IPPROTO_UDP)) 107 - return 0; 108 - 109 - udph = (void *)(iph + 1); 110 - if (udph + 1 > data_end) 111 - return 0; 112 - 113 - return bpf_ntohs(udph->dest); 114 - } 115 - 116 - static __always_inline 117 - int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) 118 - { 119 - void *data_end = (void *)(long)ctx->data_end; 120 - void *data = (void *)(long)ctx->data; 121 - struct iphdr *iph = data + nh_off; 122 - 123 - if (iph + 1 > data_end) 124 - return 0; 125 - return iph->protocol; 126 - } 127 - 128 - static __always_inline 129 - int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) 130 - { 131 - void *data_end = (void *)(long)ctx->data_end; 132 - void *data = (void *)(long)ctx->data; 133 - struct ipv6hdr *ip6h = data + nh_off; 134 - 135 - if (ip6h + 1 > data_end) 136 - return 0; 137 - return ip6h->nexthdr; 138 - } 139 - 140 - SEC("xdp") 141 - int xdp_prognum0_no_touch(struct xdp_md *ctx) 142 - { 143 - u32 key = bpf_get_smp_processor_id(); 144 - struct datarec *rec; 145 - u32 *cpu_selected; 146 - u32 cpu_dest = 0; 147 - u32 key0 = 0; 148 - 149 - /* Only use first entry in cpus_available */ 150 - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 151 - if (!cpu_selected) 152 - return XDP_ABORTED; 153 - cpu_dest = *cpu_selected; 154 - 155 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 156 - if (!rec) 157 - return XDP_PASS; 158 - NO_TEAR_INC(rec->processed); 159 - 160 - if (cpu_dest >= nr_cpus) { 161 - NO_TEAR_INC(rec->issue); 162 - return XDP_ABORTED; 163 - } 164 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 165 - } 166 - 167 - SEC("xdp") 168 - int xdp_prognum1_touch_data(struct xdp_md *ctx) 169 - { 170 - void *data_end = (void *)(long)ctx->data_end; 171 - void *data = (void *)(long)ctx->data; 172 - u32 key = bpf_get_smp_processor_id(); 173 - struct ethhdr *eth = data; 174 - struct datarec *rec; 175 - u32 *cpu_selected; 176 - u32 cpu_dest = 0; 177 - u32 key0 = 0; 178 - u16 eth_type; 179 - 180 - /* Only use first entry in cpus_available */ 181 - cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0); 182 - if (!cpu_selected) 183 - return XDP_ABORTED; 184 - cpu_dest = *cpu_selected; 185 - 186 - /* Validate packet length is minimum Eth header size */ 187 - if (eth + 1 > data_end) 188 - return XDP_ABORTED; 189 - 190 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 191 - if (!rec) 192 - return XDP_PASS; 193 - NO_TEAR_INC(rec->processed); 194 - 195 - /* Read packet data, and use it (drop non 802.3 Ethertypes) */ 196 - eth_type = eth->h_proto; 197 - if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) { 198 - NO_TEAR_INC(rec->dropped); 199 - return XDP_DROP; 200 - } 201 - 202 - if (cpu_dest >= nr_cpus) { 203 - NO_TEAR_INC(rec->issue); 204 - return XDP_ABORTED; 205 - } 206 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 207 - } 208 - 209 - SEC("xdp") 210 - int xdp_prognum2_round_robin(struct xdp_md *ctx) 211 - { 212 - void *data_end = (void *)(long)ctx->data_end; 213 - void *data = (void *)(long)ctx->data; 214 - u32 key = bpf_get_smp_processor_id(); 215 - struct datarec *rec; 216 - u32 cpu_dest = 0; 217 - u32 key0 = 0; 218 - 219 - u32 *cpu_selected; 220 - u32 *cpu_iterator; 221 - u32 *cpu_max; 222 - u32 cpu_idx; 223 - 224 - cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 225 - if (!cpu_max) 226 - return XDP_ABORTED; 227 - 228 - cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); 229 - if (!cpu_iterator) 230 - return XDP_ABORTED; 231 - cpu_idx = *cpu_iterator; 232 - 233 - *cpu_iterator += 1; 234 - if (*cpu_iterator == *cpu_max) 235 - *cpu_iterator = 0; 236 - 237 - cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 238 - if (!cpu_selected) 239 - return XDP_ABORTED; 240 - cpu_dest = *cpu_selected; 241 - 242 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 243 - if (!rec) 244 - return XDP_PASS; 245 - NO_TEAR_INC(rec->processed); 246 - 247 - if (cpu_dest >= nr_cpus) { 248 - NO_TEAR_INC(rec->issue); 249 - return XDP_ABORTED; 250 - } 251 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 252 - } 253 - 254 - SEC("xdp") 255 - int xdp_prognum3_proto_separate(struct xdp_md *ctx) 256 - { 257 - void *data_end = (void *)(long)ctx->data_end; 258 - void *data = (void *)(long)ctx->data; 259 - u32 key = bpf_get_smp_processor_id(); 260 - struct ethhdr *eth = data; 261 - u8 ip_proto = IPPROTO_UDP; 262 - struct datarec *rec; 263 - u16 eth_proto = 0; 264 - u64 l3_offset = 0; 265 - u32 cpu_dest = 0; 266 - u32 *cpu_lookup; 267 - u32 cpu_idx = 0; 268 - 269 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 270 - if (!rec) 271 - return XDP_PASS; 272 - NO_TEAR_INC(rec->processed); 273 - 274 - if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 275 - return XDP_PASS; /* Just skip */ 276 - 277 - /* Extract L4 protocol */ 278 - switch (eth_proto) { 279 - case ETH_P_IP: 280 - ip_proto = get_proto_ipv4(ctx, l3_offset); 281 - break; 282 - case ETH_P_IPV6: 283 - ip_proto = get_proto_ipv6(ctx, l3_offset); 284 - break; 285 - case ETH_P_ARP: 286 - cpu_idx = 0; /* ARP packet handled on separate CPU */ 287 - break; 288 - default: 289 - cpu_idx = 0; 290 - } 291 - 292 - /* Choose CPU based on L4 protocol */ 293 - switch (ip_proto) { 294 - case IPPROTO_ICMP: 295 - case IPPROTO_ICMPV6: 296 - cpu_idx = 2; 297 - break; 298 - case IPPROTO_TCP: 299 - cpu_idx = 0; 300 - break; 301 - case IPPROTO_UDP: 302 - cpu_idx = 1; 303 - break; 304 - default: 305 - cpu_idx = 0; 306 - } 307 - 308 - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 309 - if (!cpu_lookup) 310 - return XDP_ABORTED; 311 - cpu_dest = *cpu_lookup; 312 - 313 - if (cpu_dest >= nr_cpus) { 314 - NO_TEAR_INC(rec->issue); 315 - return XDP_ABORTED; 316 - } 317 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 318 - } 319 - 320 - SEC("xdp") 321 - int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) 322 - { 323 - void *data_end = (void *)(long)ctx->data_end; 324 - void *data = (void *)(long)ctx->data; 325 - u32 key = bpf_get_smp_processor_id(); 326 - struct ethhdr *eth = data; 327 - u8 ip_proto = IPPROTO_UDP; 328 - struct datarec *rec; 329 - u16 eth_proto = 0; 330 - u64 l3_offset = 0; 331 - u32 cpu_dest = 0; 332 - u32 *cpu_lookup; 333 - u32 cpu_idx = 0; 334 - u16 dest_port; 335 - 336 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 337 - if (!rec) 338 - return XDP_PASS; 339 - NO_TEAR_INC(rec->processed); 340 - 341 - if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 342 - return XDP_PASS; /* Just skip */ 343 - 344 - /* Extract L4 protocol */ 345 - switch (eth_proto) { 346 - case ETH_P_IP: 347 - ip_proto = get_proto_ipv4(ctx, l3_offset); 348 - break; 349 - case ETH_P_IPV6: 350 - ip_proto = get_proto_ipv6(ctx, l3_offset); 351 - break; 352 - case ETH_P_ARP: 353 - cpu_idx = 0; /* ARP packet handled on separate CPU */ 354 - break; 355 - default: 356 - cpu_idx = 0; 357 - } 358 - 359 - /* Choose CPU based on L4 protocol */ 360 - switch (ip_proto) { 361 - case IPPROTO_ICMP: 362 - case IPPROTO_ICMPV6: 363 - cpu_idx = 2; 364 - break; 365 - case IPPROTO_TCP: 366 - cpu_idx = 0; 367 - break; 368 - case IPPROTO_UDP: 369 - cpu_idx = 1; 370 - /* DDoS filter UDP port 9 (pktgen) */ 371 - dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); 372 - if (dest_port == 9) { 373 - NO_TEAR_INC(rec->dropped); 374 - return XDP_DROP; 375 - } 376 - break; 377 - default: 378 - cpu_idx = 0; 379 - } 380 - 381 - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 382 - if (!cpu_lookup) 383 - return XDP_ABORTED; 384 - cpu_dest = *cpu_lookup; 385 - 386 - if (cpu_dest >= nr_cpus) { 387 - NO_TEAR_INC(rec->issue); 388 - return XDP_ABORTED; 389 - } 390 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 391 - } 392 - 393 - /* Hashing initval */ 394 - #define INITVAL 15485863 395 - 396 - static __always_inline 397 - u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 398 - { 399 - void *data_end = (void *)(long)ctx->data_end; 400 - void *data = (void *)(long)ctx->data; 401 - struct iphdr *iph = data + nh_off; 402 - u32 cpu_hash; 403 - 404 - if (iph + 1 > data_end) 405 - return 0; 406 - 407 - cpu_hash = iph->saddr + iph->daddr; 408 - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); 409 - 410 - return cpu_hash; 411 - } 412 - 413 - static __always_inline 414 - u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) 415 - { 416 - void *data_end = (void *)(long)ctx->data_end; 417 - void *data = (void *)(long)ctx->data; 418 - struct ipv6hdr *ip6h = data + nh_off; 419 - u32 cpu_hash; 420 - 421 - if (ip6h + 1 > data_end) 422 - return 0; 423 - 424 - cpu_hash = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0]; 425 - cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1]; 426 - cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2]; 427 - cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3]; 428 - cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); 429 - 430 - return cpu_hash; 431 - } 432 - 433 - /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The 434 - * hashing scheme is symmetric, meaning swapping IP src/dest still hit 435 - * same CPU. 436 - */ 437 - SEC("xdp") 438 - int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) 439 - { 440 - void *data_end = (void *)(long)ctx->data_end; 441 - void *data = (void *)(long)ctx->data; 442 - u32 key = bpf_get_smp_processor_id(); 443 - struct ethhdr *eth = data; 444 - struct datarec *rec; 445 - u16 eth_proto = 0; 446 - u64 l3_offset = 0; 447 - u32 cpu_dest = 0; 448 - u32 cpu_idx = 0; 449 - u32 *cpu_lookup; 450 - u32 key0 = 0; 451 - u32 *cpu_max; 452 - u32 cpu_hash; 453 - 454 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 455 - if (!rec) 456 - return XDP_PASS; 457 - NO_TEAR_INC(rec->processed); 458 - 459 - cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); 460 - if (!cpu_max) 461 - return XDP_ABORTED; 462 - 463 - if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset))) 464 - return XDP_PASS; /* Just skip */ 465 - 466 - /* Hash for IPv4 and IPv6 */ 467 - switch (eth_proto) { 468 - case ETH_P_IP: 469 - cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); 470 - break; 471 - case ETH_P_IPV6: 472 - cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); 473 - break; 474 - case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ 475 - default: 476 - cpu_hash = 0; 477 - } 478 - 479 - /* Choose CPU based on hash */ 480 - cpu_idx = cpu_hash % *cpu_max; 481 - 482 - cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); 483 - if (!cpu_lookup) 484 - return XDP_ABORTED; 485 - cpu_dest = *cpu_lookup; 486 - 487 - if (cpu_dest >= nr_cpus) { 488 - NO_TEAR_INC(rec->issue); 489 - return XDP_ABORTED; 490 - } 491 - return bpf_redirect_map(&cpu_map, cpu_dest, 0); 492 - } 493 - 494 - SEC("xdp/cpumap") 495 - int xdp_redirect_cpu_devmap(struct xdp_md *ctx) 496 - { 497 - void *data_end = (void *)(long)ctx->data_end; 498 - void *data = (void *)(long)ctx->data; 499 - struct ethhdr *eth = data; 500 - u64 nh_off; 501 - 502 - nh_off = sizeof(*eth); 503 - if (data + nh_off > data_end) 504 - return XDP_DROP; 505 - 506 - swap_src_dst_mac(data); 507 - return bpf_redirect_map(&tx_port, 0, 0); 508 - } 509 - 510 - SEC("xdp/cpumap") 511 - int xdp_redirect_cpu_pass(struct xdp_md *ctx) 512 - { 513 - return XDP_PASS; 514 - } 515 - 516 - SEC("xdp/cpumap") 517 - int xdp_redirect_cpu_drop(struct xdp_md *ctx) 518 - { 519 - return XDP_DROP; 520 - } 521 - 522 - SEC("xdp/devmap") 523 - int xdp_redirect_egress_prog(struct xdp_md *ctx) 524 - { 525 - void *data_end = (void *)(long)ctx->data_end; 526 - void *data = (void *)(long)ctx->data; 527 - struct ethhdr *eth = data; 528 - u64 nh_off; 529 - 530 - nh_off = sizeof(*eth); 531 - if (data + nh_off > data_end) 532 - return XDP_DROP; 533 - 534 - __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN); 535 - 536 - return XDP_PASS; 537 - } 538 - 539 - char _license[] SEC("license") = "GPL";

-559

samples/bpf/xdp_redirect_cpu_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. 3 - */ 4 - static const char *__doc__ = 5 - "XDP CPU redirect tool, using BPF_MAP_TYPE_CPUMAP\n" 6 - "Usage: xdp_redirect_cpu -d <IFINDEX|IFNAME> -c 0 ... -c N\n" 7 - "Valid specification for CPUMAP BPF program:\n" 8 - " --mprog-name/-e pass (use built-in XDP_PASS program)\n" 9 - " --mprog-name/-e drop (use built-in XDP_DROP program)\n" 10 - " --redirect-device/-r <ifindex|ifname> (use built-in DEVMAP redirect program)\n" 11 - " Custom CPUMAP BPF program:\n" 12 - " --mprog-filename/-f <filename> --mprog-name/-e <program>\n" 13 - " Optionally, also pass --redirect-map/-m and --redirect-device/-r together\n" 14 - " to configure DEVMAP in BPF object <filename>\n"; 15 - 16 - #include <errno.h> 17 - #include <signal.h> 18 - #include <stdio.h> 19 - #include <stdlib.h> 20 - #include <stdbool.h> 21 - #include <string.h> 22 - #include <unistd.h> 23 - #include <locale.h> 24 - #include <sys/sysinfo.h> 25 - #include <getopt.h> 26 - #include <net/if.h> 27 - #include <time.h> 28 - #include <linux/limits.h> 29 - #include <arpa/inet.h> 30 - #include <linux/if_link.h> 31 - #include <bpf/bpf.h> 32 - #include <bpf/libbpf.h> 33 - #include "bpf_util.h" 34 - #include "xdp_sample_user.h" 35 - #include "xdp_redirect_cpu.skel.h" 36 - 37 - static int map_fd; 38 - static int avail_fd; 39 - static int count_fd; 40 - 41 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | 42 - SAMPLE_CPUMAP_ENQUEUE_CNT | SAMPLE_CPUMAP_KTHREAD_CNT | 43 - SAMPLE_EXCEPTION_CNT; 44 - 45 - DEFINE_SAMPLE_INIT(xdp_redirect_cpu); 46 - 47 - static const struct option long_options[] = { 48 - { "help", no_argument, NULL, 'h' }, 49 - { "dev", required_argument, NULL, 'd' }, 50 - { "skb-mode", no_argument, NULL, 'S' }, 51 - { "progname", required_argument, NULL, 'p' }, 52 - { "qsize", required_argument, NULL, 'q' }, 53 - { "cpu", required_argument, NULL, 'c' }, 54 - { "stress-mode", no_argument, NULL, 'x' }, 55 - { "force", no_argument, NULL, 'F' }, 56 - { "interval", required_argument, NULL, 'i' }, 57 - { "verbose", no_argument, NULL, 'v' }, 58 - { "stats", no_argument, NULL, 's' }, 59 - { "mprog-name", required_argument, NULL, 'e' }, 60 - { "mprog-filename", required_argument, NULL, 'f' }, 61 - { "redirect-device", required_argument, NULL, 'r' }, 62 - { "redirect-map", required_argument, NULL, 'm' }, 63 - {} 64 - }; 65 - 66 - static void print_avail_progs(struct bpf_object *obj) 67 - { 68 - struct bpf_program *pos; 69 - 70 - printf(" Programs to be used for -p/--progname:\n"); 71 - bpf_object__for_each_program(pos, obj) { 72 - if (bpf_program__type(pos) == BPF_PROG_TYPE_XDP) { 73 - if (!strncmp(bpf_program__name(pos), "xdp_prognum", 74 - sizeof("xdp_prognum") - 1)) 75 - printf(" %s\n", bpf_program__name(pos)); 76 - } 77 - } 78 - } 79 - 80 - static void usage(char *argv[], const struct option *long_options, 81 - const char *doc, int mask, bool error, struct bpf_object *obj) 82 - { 83 - sample_usage(argv, long_options, doc, mask, error); 84 - print_avail_progs(obj); 85 - } 86 - 87 - static int create_cpu_entry(__u32 cpu, struct bpf_cpumap_val *value, 88 - __u32 avail_idx, bool new) 89 - { 90 - __u32 curr_cpus_count = 0; 91 - __u32 key = 0; 92 - int ret; 93 - 94 - /* Add a CPU entry to cpumap, as this allocate a cpu entry in 95 - * the kernel for the cpu. 96 - */ 97 - ret = bpf_map_update_elem(map_fd, &cpu, value, 0); 98 - if (ret < 0) { 99 - fprintf(stderr, "Create CPU entry failed: %s\n", strerror(errno)); 100 - return ret; 101 - } 102 - 103 - /* Inform bpf_prog's that a new CPU is available to select 104 - * from via some control maps. 105 - */ 106 - ret = bpf_map_update_elem(avail_fd, &avail_idx, &cpu, 0); 107 - if (ret < 0) { 108 - fprintf(stderr, "Add to avail CPUs failed: %s\n", strerror(errno)); 109 - return ret; 110 - } 111 - 112 - /* When not replacing/updating existing entry, bump the count */ 113 - ret = bpf_map_lookup_elem(count_fd, &key, &curr_cpus_count); 114 - if (ret < 0) { 115 - fprintf(stderr, "Failed reading curr cpus_count: %s\n", 116 - strerror(errno)); 117 - return ret; 118 - } 119 - if (new) { 120 - curr_cpus_count++; 121 - ret = bpf_map_update_elem(count_fd, &key, 122 - &curr_cpus_count, 0); 123 - if (ret < 0) { 124 - fprintf(stderr, "Failed write curr cpus_count: %s\n", 125 - strerror(errno)); 126 - return ret; 127 - } 128 - } 129 - 130 - printf("%s CPU: %u as idx: %u qsize: %d cpumap_prog_fd: %d (cpus_count: %u)\n", 131 - new ? "Add new" : "Replace", cpu, avail_idx, 132 - value->qsize, value->bpf_prog.fd, curr_cpus_count); 133 - 134 - return 0; 135 - } 136 - 137 - /* CPUs are zero-indexed. Thus, add a special sentinel default value 138 - * in map cpus_available to mark CPU index'es not configured 139 - */ 140 - static int mark_cpus_unavailable(void) 141 - { 142 - int ret, i, n_cpus = libbpf_num_possible_cpus(); 143 - __u32 invalid_cpu = n_cpus; 144 - 145 - for (i = 0; i < n_cpus; i++) { 146 - ret = bpf_map_update_elem(avail_fd, &i, 147 - &invalid_cpu, 0); 148 - if (ret < 0) { 149 - fprintf(stderr, "Failed marking CPU unavailable: %s\n", 150 - strerror(errno)); 151 - return ret; 152 - } 153 - } 154 - 155 - return 0; 156 - } 157 - 158 - /* Stress cpumap management code by concurrently changing underlying cpumap */ 159 - static void stress_cpumap(void *ctx) 160 - { 161 - struct bpf_cpumap_val *value = ctx; 162 - 163 - /* Changing qsize will cause kernel to free and alloc a new 164 - * bpf_cpu_map_entry, with an associated/complicated tear-down 165 - * procedure. 166 - */ 167 - value->qsize = 1024; 168 - create_cpu_entry(1, value, 0, false); 169 - value->qsize = 8; 170 - create_cpu_entry(1, value, 0, false); 171 - value->qsize = 16000; 172 - create_cpu_entry(1, value, 0, false); 173 - } 174 - 175 - static int set_cpumap_prog(struct xdp_redirect_cpu *skel, 176 - const char *redir_interface, const char *redir_map, 177 - const char *mprog_filename, const char *mprog_name) 178 - { 179 - if (mprog_filename) { 180 - struct bpf_program *prog; 181 - struct bpf_object *obj; 182 - int ret; 183 - 184 - if (!mprog_name) { 185 - fprintf(stderr, "BPF program not specified for file %s\n", 186 - mprog_filename); 187 - goto end; 188 - } 189 - if ((redir_interface && !redir_map) || (!redir_interface && redir_map)) { 190 - fprintf(stderr, "--redirect-%s specified but --redirect-%s not specified\n", 191 - redir_interface ? "device" : "map", redir_interface ? "map" : "device"); 192 - goto end; 193 - } 194 - 195 - /* Custom BPF program */ 196 - obj = bpf_object__open_file(mprog_filename, NULL); 197 - if (!obj) { 198 - ret = -errno; 199 - fprintf(stderr, "Failed to bpf_prog_load_xattr: %s\n", 200 - strerror(errno)); 201 - return ret; 202 - } 203 - 204 - ret = bpf_object__load(obj); 205 - if (ret < 0) { 206 - ret = -errno; 207 - fprintf(stderr, "Failed to bpf_object__load: %s\n", 208 - strerror(errno)); 209 - return ret; 210 - } 211 - 212 - if (redir_map) { 213 - int err, redir_map_fd, ifindex_out, key = 0; 214 - 215 - redir_map_fd = bpf_object__find_map_fd_by_name(obj, redir_map); 216 - if (redir_map_fd < 0) { 217 - fprintf(stderr, "Failed to bpf_object__find_map_fd_by_name: %s\n", 218 - strerror(errno)); 219 - return redir_map_fd; 220 - } 221 - 222 - ifindex_out = if_nametoindex(redir_interface); 223 - if (!ifindex_out) 224 - ifindex_out = strtoul(redir_interface, NULL, 0); 225 - if (!ifindex_out) { 226 - fprintf(stderr, "Bad interface name or index\n"); 227 - return -EINVAL; 228 - } 229 - 230 - err = bpf_map_update_elem(redir_map_fd, &key, &ifindex_out, 0); 231 - if (err < 0) 232 - return err; 233 - } 234 - 235 - prog = bpf_object__find_program_by_name(obj, mprog_name); 236 - if (!prog) { 237 - ret = -errno; 238 - fprintf(stderr, "Failed to bpf_object__find_program_by_name: %s\n", 239 - strerror(errno)); 240 - return ret; 241 - } 242 - 243 - return bpf_program__fd(prog); 244 - } else { 245 - if (mprog_name) { 246 - if (redir_interface || redir_map) { 247 - fprintf(stderr, "Need to specify --mprog-filename/-f\n"); 248 - goto end; 249 - } 250 - if (!strcmp(mprog_name, "pass") || !strcmp(mprog_name, "drop")) { 251 - /* Use built-in pass/drop programs */ 252 - return *mprog_name == 'p' ? bpf_program__fd(skel->progs.xdp_redirect_cpu_pass) 253 - : bpf_program__fd(skel->progs.xdp_redirect_cpu_drop); 254 - } else { 255 - fprintf(stderr, "Unknown name \"%s\" for built-in BPF program\n", 256 - mprog_name); 257 - goto end; 258 - } 259 - } else { 260 - if (redir_map) { 261 - fprintf(stderr, "Need to specify --mprog-filename, --mprog-name and" 262 - " --redirect-device with --redirect-map\n"); 263 - goto end; 264 - } 265 - if (redir_interface) { 266 - /* Use built-in devmap redirect */ 267 - struct bpf_devmap_val val = {}; 268 - int ifindex_out, err; 269 - __u32 key = 0; 270 - 271 - if (!redir_interface) 272 - return 0; 273 - 274 - ifindex_out = if_nametoindex(redir_interface); 275 - if (!ifindex_out) 276 - ifindex_out = strtoul(redir_interface, NULL, 0); 277 - if (!ifindex_out) { 278 - fprintf(stderr, "Bad interface name or index\n"); 279 - return -EINVAL; 280 - } 281 - 282 - if (get_mac_addr(ifindex_out, skel->bss->tx_mac_addr) < 0) { 283 - printf("Get interface %d mac failed\n", ifindex_out); 284 - return -EINVAL; 285 - } 286 - 287 - val.ifindex = ifindex_out; 288 - val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_egress_prog); 289 - err = bpf_map_update_elem(bpf_map__fd(skel->maps.tx_port), &key, &val, 0); 290 - if (err < 0) 291 - return -errno; 292 - 293 - return bpf_program__fd(skel->progs.xdp_redirect_cpu_devmap); 294 - } 295 - } 296 - } 297 - 298 - /* Disabled */ 299 - return 0; 300 - end: 301 - fprintf(stderr, "Invalid options for CPUMAP BPF program\n"); 302 - return -EINVAL; 303 - } 304 - 305 - int main(int argc, char **argv) 306 - { 307 - const char *redir_interface = NULL, *redir_map = NULL; 308 - const char *mprog_filename = NULL, *mprog_name = NULL; 309 - struct xdp_redirect_cpu *skel; 310 - struct bpf_map_info info = {}; 311 - struct bpf_cpumap_val value; 312 - __u32 infosz = sizeof(info); 313 - int ret = EXIT_FAIL_OPTION; 314 - unsigned long interval = 2; 315 - bool stress_mode = false; 316 - struct bpf_program *prog; 317 - const char *prog_name; 318 - bool generic = false; 319 - bool force = false; 320 - int added_cpus = 0; 321 - bool error = true; 322 - int longindex = 0; 323 - int add_cpu = -1; 324 - int ifindex = -1; 325 - int *cpu, i, opt; 326 - __u32 qsize; 327 - int n_cpus; 328 - 329 - n_cpus = libbpf_num_possible_cpus(); 330 - 331 - /* Notice: Choosing the queue size is very important when CPU is 332 - * configured with power-saving states. 333 - * 334 - * If deepest state take 133 usec to wakeup from (133/10^6). When link 335 - * speed is 10Gbit/s ((10*10^9/8) in bytes/sec). How many bytes can 336 - * arrive with in 133 usec at this speed: (10*10^9/8)*(133/10^6) = 337 - * 166250 bytes. With MTU size packets this is 110 packets, and with 338 - * minimum Ethernet (MAC-preamble + intergap) 84 bytes is 1979 packets. 339 - * 340 - * Setting default cpumap queue to 2048 as worst-case (small packet) 341 - * should be +64 packet due kthread wakeup call (due to xdp_do_flush) 342 - * worst-case is 2043 packets. 343 - * 344 - * Sysadm can configured system to avoid deep-sleep via: 345 - * tuned-adm profile network-latency 346 - */ 347 - qsize = 2048; 348 - 349 - skel = xdp_redirect_cpu__open(); 350 - if (!skel) { 351 - fprintf(stderr, "Failed to xdp_redirect_cpu__open: %s\n", 352 - strerror(errno)); 353 - ret = EXIT_FAIL_BPF; 354 - goto end; 355 - } 356 - 357 - ret = sample_init_pre_load(skel); 358 - if (ret < 0) { 359 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 360 - ret = EXIT_FAIL_BPF; 361 - goto end_destroy; 362 - } 363 - 364 - if (bpf_map__set_max_entries(skel->maps.cpu_map, n_cpus) < 0) { 365 - fprintf(stderr, "Failed to set max entries for cpu_map map: %s", 366 - strerror(errno)); 367 - ret = EXIT_FAIL_BPF; 368 - goto end_destroy; 369 - } 370 - 371 - if (bpf_map__set_max_entries(skel->maps.cpus_available, n_cpus) < 0) { 372 - fprintf(stderr, "Failed to set max entries for cpus_available map: %s", 373 - strerror(errno)); 374 - ret = EXIT_FAIL_BPF; 375 - goto end_destroy; 376 - } 377 - 378 - cpu = calloc(n_cpus, sizeof(int)); 379 - if (!cpu) { 380 - fprintf(stderr, "Failed to allocate cpu array\n"); 381 - goto end_destroy; 382 - } 383 - 384 - prog = skel->progs.xdp_prognum5_lb_hash_ip_pairs; 385 - while ((opt = getopt_long(argc, argv, "d:si:Sxp:f:e:r:m:c:q:Fvh", 386 - long_options, &longindex)) != -1) { 387 - switch (opt) { 388 - case 'd': 389 - if (strlen(optarg) >= IF_NAMESIZE) { 390 - fprintf(stderr, "-d/--dev name too long\n"); 391 - usage(argv, long_options, __doc__, mask, true, skel->obj); 392 - goto end_cpu; 393 - } 394 - ifindex = if_nametoindex(optarg); 395 - if (!ifindex) 396 - ifindex = strtoul(optarg, NULL, 0); 397 - if (!ifindex) { 398 - fprintf(stderr, "Bad interface index or name (%d): %s\n", 399 - errno, strerror(errno)); 400 - usage(argv, long_options, __doc__, mask, true, skel->obj); 401 - goto end_cpu; 402 - } 403 - break; 404 - case 's': 405 - mask |= SAMPLE_REDIRECT_MAP_CNT; 406 - break; 407 - case 'i': 408 - interval = strtoul(optarg, NULL, 0); 409 - break; 410 - case 'S': 411 - generic = true; 412 - break; 413 - case 'x': 414 - stress_mode = true; 415 - break; 416 - case 'p': 417 - /* Selecting eBPF prog to load */ 418 - prog_name = optarg; 419 - prog = bpf_object__find_program_by_name(skel->obj, 420 - prog_name); 421 - if (!prog) { 422 - fprintf(stderr, 423 - "Failed to find program %s specified by" 424 - " option -p/--progname\n", 425 - prog_name); 426 - print_avail_progs(skel->obj); 427 - goto end_cpu; 428 - } 429 - break; 430 - case 'f': 431 - mprog_filename = optarg; 432 - break; 433 - case 'e': 434 - mprog_name = optarg; 435 - break; 436 - case 'r': 437 - redir_interface = optarg; 438 - mask |= SAMPLE_DEVMAP_XMIT_CNT_MULTI; 439 - break; 440 - case 'm': 441 - redir_map = optarg; 442 - break; 443 - case 'c': 444 - /* Add multiple CPUs */ 445 - add_cpu = strtoul(optarg, NULL, 0); 446 - if (add_cpu >= n_cpus) { 447 - fprintf(stderr, 448 - "--cpu nr too large for cpumap err (%d):%s\n", 449 - errno, strerror(errno)); 450 - usage(argv, long_options, __doc__, mask, true, skel->obj); 451 - goto end_cpu; 452 - } 453 - cpu[added_cpus++] = add_cpu; 454 - break; 455 - case 'q': 456 - qsize = strtoul(optarg, NULL, 0); 457 - break; 458 - case 'F': 459 - force = true; 460 - break; 461 - case 'v': 462 - sample_switch_mode(); 463 - break; 464 - case 'h': 465 - error = false; 466 - default: 467 - usage(argv, long_options, __doc__, mask, error, skel->obj); 468 - goto end_cpu; 469 - } 470 - } 471 - 472 - ret = EXIT_FAIL_OPTION; 473 - if (ifindex == -1) { 474 - fprintf(stderr, "Required option --dev missing\n"); 475 - usage(argv, long_options, __doc__, mask, true, skel->obj); 476 - goto end_cpu; 477 - } 478 - 479 - if (add_cpu == -1) { 480 - fprintf(stderr, "Required option --cpu missing\n" 481 - "Specify multiple --cpu option to add more\n"); 482 - usage(argv, long_options, __doc__, mask, true, skel->obj); 483 - goto end_cpu; 484 - } 485 - 486 - skel->rodata->from_match[0] = ifindex; 487 - if (redir_interface) 488 - skel->rodata->to_match[0] = if_nametoindex(redir_interface); 489 - 490 - ret = xdp_redirect_cpu__load(skel); 491 - if (ret < 0) { 492 - fprintf(stderr, "Failed to xdp_redirect_cpu__load: %s\n", 493 - strerror(errno)); 494 - goto end_cpu; 495 - } 496 - 497 - ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.cpu_map), &info, &infosz); 498 - if (ret < 0) { 499 - fprintf(stderr, "Failed bpf_map_get_info_by_fd for cpumap: %s\n", 500 - strerror(errno)); 501 - goto end_cpu; 502 - } 503 - 504 - skel->bss->cpumap_map_id = info.id; 505 - 506 - map_fd = bpf_map__fd(skel->maps.cpu_map); 507 - avail_fd = bpf_map__fd(skel->maps.cpus_available); 508 - count_fd = bpf_map__fd(skel->maps.cpus_count); 509 - 510 - ret = mark_cpus_unavailable(); 511 - if (ret < 0) { 512 - fprintf(stderr, "Unable to mark CPUs as unavailable\n"); 513 - goto end_cpu; 514 - } 515 - 516 - ret = sample_init(skel, mask); 517 - if (ret < 0) { 518 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 519 - ret = EXIT_FAIL; 520 - goto end_cpu; 521 - } 522 - 523 - value.bpf_prog.fd = set_cpumap_prog(skel, redir_interface, redir_map, 524 - mprog_filename, mprog_name); 525 - if (value.bpf_prog.fd < 0) { 526 - fprintf(stderr, "Failed to set CPUMAP BPF program: %s\n", 527 - strerror(-value.bpf_prog.fd)); 528 - usage(argv, long_options, __doc__, mask, true, skel->obj); 529 - ret = EXIT_FAIL_BPF; 530 - goto end_cpu; 531 - } 532 - value.qsize = qsize; 533 - 534 - for (i = 0; i < added_cpus; i++) { 535 - if (create_cpu_entry(cpu[i], &value, i, true) < 0) { 536 - fprintf(stderr, "Cannot proceed, exiting\n"); 537 - usage(argv, long_options, __doc__, mask, true, skel->obj); 538 - goto end_cpu; 539 - } 540 - } 541 - 542 - ret = EXIT_FAIL_XDP; 543 - if (sample_install_xdp(prog, ifindex, generic, force) < 0) 544 - goto end_cpu; 545 - 546 - ret = sample_run(interval, stress_mode ? stress_cpumap : NULL, &value); 547 - if (ret < 0) { 548 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 549 - ret = EXIT_FAIL; 550 - goto end_cpu; 551 - } 552 - ret = EXIT_OK; 553 - end_cpu: 554 - free(cpu); 555 - end_destroy: 556 - xdp_redirect_cpu__destroy(skel); 557 - end: 558 - sample_exit(ret); 559 - }

-97

samples/bpf/xdp_redirect_map.bpf.c

··· 1 - /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io 2 - * 3 - * This program is free software; you can redistribute it and/or 4 - * modify it under the terms of version 2 of the GNU General Public 5 - * License as published by the Free Software Foundation. 6 - * 7 - * This program is distributed in the hope that it will be useful, but 8 - * WITHOUT ANY WARRANTY; without even the implied warranty of 9 - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 10 - * General Public License for more details. 11 - */ 12 - #define KBUILD_MODNAME "foo" 13 - 14 - #include "vmlinux.h" 15 - #include "xdp_sample.bpf.h" 16 - #include "xdp_sample_shared.h" 17 - 18 - /* The 2nd xdp prog on egress does not support skb mode, so we define two 19 - * maps, tx_port_general and tx_port_native. 20 - */ 21 - struct { 22 - __uint(type, BPF_MAP_TYPE_DEVMAP); 23 - __uint(key_size, sizeof(int)); 24 - __uint(value_size, sizeof(int)); 25 - __uint(max_entries, 1); 26 - } tx_port_general SEC(".maps"); 27 - 28 - struct { 29 - __uint(type, BPF_MAP_TYPE_DEVMAP); 30 - __uint(key_size, sizeof(int)); 31 - __uint(value_size, sizeof(struct bpf_devmap_val)); 32 - __uint(max_entries, 1); 33 - } tx_port_native SEC(".maps"); 34 - 35 - /* store egress interface mac address */ 36 - const volatile __u8 tx_mac_addr[ETH_ALEN]; 37 - 38 - static __always_inline int xdp_redirect_map(struct xdp_md *ctx, void *redirect_map) 39 - { 40 - void *data_end = (void *)(long)ctx->data_end; 41 - void *data = (void *)(long)ctx->data; 42 - u32 key = bpf_get_smp_processor_id(); 43 - struct ethhdr *eth = data; 44 - struct datarec *rec; 45 - u64 nh_off; 46 - 47 - nh_off = sizeof(*eth); 48 - if (data + nh_off > data_end) 49 - return XDP_DROP; 50 - 51 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 52 - if (!rec) 53 - return XDP_PASS; 54 - NO_TEAR_INC(rec->processed); 55 - swap_src_dst_mac(data); 56 - return bpf_redirect_map(redirect_map, 0, 0); 57 - } 58 - 59 - SEC("xdp") 60 - int xdp_redirect_map_general(struct xdp_md *ctx) 61 - { 62 - return xdp_redirect_map(ctx, &tx_port_general); 63 - } 64 - 65 - SEC("xdp") 66 - int xdp_redirect_map_native(struct xdp_md *ctx) 67 - { 68 - return xdp_redirect_map(ctx, &tx_port_native); 69 - } 70 - 71 - SEC("xdp/devmap") 72 - int xdp_redirect_map_egress(struct xdp_md *ctx) 73 - { 74 - void *data_end = (void *)(long)ctx->data_end; 75 - void *data = (void *)(long)ctx->data; 76 - u8 *mac_addr = (u8 *) tx_mac_addr; 77 - struct ethhdr *eth = data; 78 - u64 nh_off; 79 - 80 - nh_off = sizeof(*eth); 81 - if (data + nh_off > data_end) 82 - return XDP_DROP; 83 - 84 - barrier_var(mac_addr); /* prevent optimizing out memcpy */ 85 - __builtin_memcpy(eth->h_source, mac_addr, ETH_ALEN); 86 - 87 - return XDP_PASS; 88 - } 89 - 90 - /* Redirect require an XDP bpf_prog loaded on the TX device */ 91 - SEC("xdp") 92 - int xdp_redirect_dummy_prog(struct xdp_md *ctx) 93 - { 94 - return XDP_PASS; 95 - } 96 - 97 - char _license[] SEC("license") = "GPL";

-77

samples/bpf/xdp_redirect_map_multi.bpf.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #define KBUILD_MODNAME "foo" 3 - 4 - #include "vmlinux.h" 5 - #include "xdp_sample.bpf.h" 6 - #include "xdp_sample_shared.h" 7 - 8 - struct { 9 - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); 10 - __uint(key_size, sizeof(int)); 11 - __uint(value_size, sizeof(int)); 12 - __uint(max_entries, 32); 13 - } forward_map_general SEC(".maps"); 14 - 15 - struct { 16 - __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); 17 - __uint(key_size, sizeof(int)); 18 - __uint(value_size, sizeof(struct bpf_devmap_val)); 19 - __uint(max_entries, 32); 20 - } forward_map_native SEC(".maps"); 21 - 22 - /* map to store egress interfaces mac addresses */ 23 - struct { 24 - __uint(type, BPF_MAP_TYPE_HASH); 25 - __type(key, u32); 26 - __type(value, __be64); 27 - __uint(max_entries, 32); 28 - } mac_map SEC(".maps"); 29 - 30 - static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map) 31 - { 32 - u32 key = bpf_get_smp_processor_id(); 33 - struct datarec *rec; 34 - 35 - rec = bpf_map_lookup_elem(&rx_cnt, &key); 36 - if (!rec) 37 - return XDP_PASS; 38 - NO_TEAR_INC(rec->processed); 39 - 40 - return bpf_redirect_map(forward_map, 0, 41 - BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS); 42 - } 43 - 44 - SEC("xdp") 45 - int xdp_redirect_map_general(struct xdp_md *ctx) 46 - { 47 - return xdp_redirect_map(ctx, &forward_map_general); 48 - } 49 - 50 - SEC("xdp") 51 - int xdp_redirect_map_native(struct xdp_md *ctx) 52 - { 53 - return xdp_redirect_map(ctx, &forward_map_native); 54 - } 55 - 56 - SEC("xdp/devmap") 57 - int xdp_devmap_prog(struct xdp_md *ctx) 58 - { 59 - void *data_end = (void *)(long)ctx->data_end; 60 - void *data = (void *)(long)ctx->data; 61 - u32 key = ctx->egress_ifindex; 62 - struct ethhdr *eth = data; 63 - __be64 *mac; 64 - u64 nh_off; 65 - 66 - nh_off = sizeof(*eth); 67 - if (data + nh_off > data_end) 68 - return XDP_DROP; 69 - 70 - mac = bpf_map_lookup_elem(&mac_map, &key); 71 - if (mac) 72 - __builtin_memcpy(eth->h_source, mac, ETH_ALEN); 73 - 74 - return XDP_PASS; 75 - } 76 - 77 - char _license[] SEC("license") = "GPL";

-232

samples/bpf/xdp_redirect_map_multi_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - static const char *__doc__ = 3 - "XDP multi redirect tool, using BPF_MAP_TYPE_DEVMAP and BPF_F_BROADCAST flag for bpf_redirect_map\n" 4 - "Usage: xdp_redirect_map_multi <IFINDEX|IFNAME> <IFINDEX|IFNAME> ... <IFINDEX|IFNAME>\n"; 5 - 6 - #include <linux/bpf.h> 7 - #include <linux/if_link.h> 8 - #include <assert.h> 9 - #include <getopt.h> 10 - #include <errno.h> 11 - #include <signal.h> 12 - #include <stdio.h> 13 - #include <stdlib.h> 14 - #include <string.h> 15 - #include <net/if.h> 16 - #include <unistd.h> 17 - #include <libgen.h> 18 - #include <sys/ioctl.h> 19 - #include <sys/types.h> 20 - #include <sys/socket.h> 21 - #include <netinet/in.h> 22 - #include <linux/if_ether.h> 23 - #include <bpf/bpf.h> 24 - #include <bpf/libbpf.h> 25 - #include "bpf_util.h" 26 - #include "xdp_sample_user.h" 27 - #include "xdp_redirect_map_multi.skel.h" 28 - 29 - #define MAX_IFACE_NUM 32 30 - static int ifaces[MAX_IFACE_NUM] = {}; 31 - 32 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | 33 - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT | 34 - SAMPLE_DEVMAP_XMIT_CNT_MULTI | SAMPLE_SKIP_HEADING; 35 - 36 - DEFINE_SAMPLE_INIT(xdp_redirect_map_multi); 37 - 38 - static const struct option long_options[] = { 39 - { "help", no_argument, NULL, 'h' }, 40 - { "skb-mode", no_argument, NULL, 'S' }, 41 - { "force", no_argument, NULL, 'F' }, 42 - { "load-egress", no_argument, NULL, 'X' }, 43 - { "stats", no_argument, NULL, 's' }, 44 - { "interval", required_argument, NULL, 'i' }, 45 - { "verbose", no_argument, NULL, 'v' }, 46 - {} 47 - }; 48 - 49 - static int update_mac_map(struct bpf_map *map) 50 - { 51 - int mac_map_fd = bpf_map__fd(map); 52 - unsigned char mac_addr[6]; 53 - unsigned int ifindex; 54 - int i, ret = -1; 55 - 56 - for (i = 0; ifaces[i] > 0; i++) { 57 - ifindex = ifaces[i]; 58 - 59 - ret = get_mac_addr(ifindex, mac_addr); 60 - if (ret < 0) { 61 - fprintf(stderr, "get interface %d mac failed\n", 62 - ifindex); 63 - return ret; 64 - } 65 - 66 - ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0); 67 - if (ret < 0) { 68 - fprintf(stderr, "Failed to update mac address for ifindex %d\n", 69 - ifindex); 70 - return ret; 71 - } 72 - } 73 - 74 - return 0; 75 - } 76 - 77 - int main(int argc, char **argv) 78 - { 79 - struct bpf_devmap_val devmap_val = {}; 80 - struct xdp_redirect_map_multi *skel; 81 - struct bpf_program *ingress_prog; 82 - bool xdp_devmap_attached = false; 83 - struct bpf_map *forward_map; 84 - int ret = EXIT_FAIL_OPTION; 85 - unsigned long interval = 2; 86 - char ifname[IF_NAMESIZE]; 87 - unsigned int ifindex; 88 - bool generic = false; 89 - bool force = false; 90 - bool tried = false; 91 - bool error = true; 92 - int i, opt; 93 - 94 - while ((opt = getopt_long(argc, argv, "hSFXi:vs", 95 - long_options, NULL)) != -1) { 96 - switch (opt) { 97 - case 'S': 98 - generic = true; 99 - /* devmap_xmit tracepoint not available */ 100 - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | 101 - SAMPLE_DEVMAP_XMIT_CNT_MULTI); 102 - break; 103 - case 'F': 104 - force = true; 105 - break; 106 - case 'X': 107 - xdp_devmap_attached = true; 108 - break; 109 - case 'i': 110 - interval = strtoul(optarg, NULL, 0); 111 - break; 112 - case 'v': 113 - sample_switch_mode(); 114 - break; 115 - case 's': 116 - mask |= SAMPLE_REDIRECT_MAP_CNT; 117 - break; 118 - case 'h': 119 - error = false; 120 - default: 121 - sample_usage(argv, long_options, __doc__, mask, error); 122 - return ret; 123 - } 124 - } 125 - 126 - if (argc <= optind + 1) { 127 - sample_usage(argv, long_options, __doc__, mask, error); 128 - return ret; 129 - } 130 - 131 - skel = xdp_redirect_map_multi__open(); 132 - if (!skel) { 133 - fprintf(stderr, "Failed to xdp_redirect_map_multi__open: %s\n", 134 - strerror(errno)); 135 - ret = EXIT_FAIL_BPF; 136 - goto end; 137 - } 138 - 139 - ret = sample_init_pre_load(skel); 140 - if (ret < 0) { 141 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 142 - ret = EXIT_FAIL_BPF; 143 - goto end_destroy; 144 - } 145 - 146 - ret = EXIT_FAIL_OPTION; 147 - for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) { 148 - ifaces[i] = if_nametoindex(argv[optind + i]); 149 - if (!ifaces[i]) 150 - ifaces[i] = strtoul(argv[optind + i], NULL, 0); 151 - if (!if_indextoname(ifaces[i], ifname)) { 152 - fprintf(stderr, "Bad interface index or name\n"); 153 - sample_usage(argv, long_options, __doc__, mask, true); 154 - goto end_destroy; 155 - } 156 - 157 - skel->rodata->from_match[i] = ifaces[i]; 158 - skel->rodata->to_match[i] = ifaces[i]; 159 - } 160 - 161 - ret = xdp_redirect_map_multi__load(skel); 162 - if (ret < 0) { 163 - fprintf(stderr, "Failed to xdp_redirect_map_multi__load: %s\n", 164 - strerror(errno)); 165 - ret = EXIT_FAIL_BPF; 166 - goto end_destroy; 167 - } 168 - 169 - if (xdp_devmap_attached) { 170 - /* Update mac_map with all egress interfaces' mac addr */ 171 - if (update_mac_map(skel->maps.mac_map) < 0) { 172 - fprintf(stderr, "Updating mac address failed\n"); 173 - ret = EXIT_FAIL; 174 - goto end_destroy; 175 - } 176 - } 177 - 178 - ret = sample_init(skel, mask); 179 - if (ret < 0) { 180 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 181 - ret = EXIT_FAIL; 182 - goto end_destroy; 183 - } 184 - 185 - ingress_prog = skel->progs.xdp_redirect_map_native; 186 - forward_map = skel->maps.forward_map_native; 187 - 188 - for (i = 0; ifaces[i] > 0; i++) { 189 - ifindex = ifaces[i]; 190 - 191 - ret = EXIT_FAIL_XDP; 192 - restart: 193 - /* bind prog_fd to each interface */ 194 - if (sample_install_xdp(ingress_prog, ifindex, generic, force) < 0) { 195 - if (generic && !tried) { 196 - fprintf(stderr, 197 - "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); 198 - ingress_prog = skel->progs.xdp_redirect_map_general; 199 - forward_map = skel->maps.forward_map_general; 200 - tried = true; 201 - goto restart; 202 - } 203 - goto end_destroy; 204 - } 205 - 206 - /* Add all the interfaces to forward group and attach 207 - * egress devmap program if exist 208 - */ 209 - devmap_val.ifindex = ifindex; 210 - if (xdp_devmap_attached) 211 - devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_devmap_prog); 212 - ret = bpf_map_update_elem(bpf_map__fd(forward_map), &ifindex, &devmap_val, 0); 213 - if (ret < 0) { 214 - fprintf(stderr, "Failed to update devmap value: %s\n", 215 - strerror(errno)); 216 - ret = EXIT_FAIL_BPF; 217 - goto end_destroy; 218 - } 219 - } 220 - 221 - ret = sample_run(interval, NULL, NULL); 222 - if (ret < 0) { 223 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 224 - ret = EXIT_FAIL; 225 - goto end_destroy; 226 - } 227 - ret = EXIT_OK; 228 - end_destroy: 229 - xdp_redirect_map_multi__destroy(skel); 230 - end: 231 - sample_exit(ret); 232 - }

-228

samples/bpf/xdp_redirect_map_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io 3 - */ 4 - static const char *__doc__ = 5 - "XDP redirect tool, using BPF_MAP_TYPE_DEVMAP\n" 6 - "Usage: xdp_redirect_map <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n"; 7 - 8 - #include <linux/bpf.h> 9 - #include <linux/if_link.h> 10 - #include <assert.h> 11 - #include <errno.h> 12 - #include <signal.h> 13 - #include <stdio.h> 14 - #include <stdlib.h> 15 - #include <stdbool.h> 16 - #include <string.h> 17 - #include <net/if.h> 18 - #include <unistd.h> 19 - #include <libgen.h> 20 - #include <getopt.h> 21 - #include <bpf/bpf.h> 22 - #include <bpf/libbpf.h> 23 - #include "bpf_util.h" 24 - #include "xdp_sample_user.h" 25 - #include "xdp_redirect_map.skel.h" 26 - 27 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_MAP_CNT | 28 - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; 29 - 30 - DEFINE_SAMPLE_INIT(xdp_redirect_map); 31 - 32 - static const struct option long_options[] = { 33 - { "help", no_argument, NULL, 'h' }, 34 - { "skb-mode", no_argument, NULL, 'S' }, 35 - { "force", no_argument, NULL, 'F' }, 36 - { "load-egress", no_argument, NULL, 'X' }, 37 - { "stats", no_argument, NULL, 's' }, 38 - { "interval", required_argument, NULL, 'i' }, 39 - { "verbose", no_argument, NULL, 'v' }, 40 - {} 41 - }; 42 - 43 - static int verbose = 0; 44 - 45 - int main(int argc, char **argv) 46 - { 47 - struct bpf_devmap_val devmap_val = {}; 48 - bool xdp_devmap_attached = false; 49 - struct xdp_redirect_map *skel; 50 - char str[2 * IF_NAMESIZE + 1]; 51 - char ifname_out[IF_NAMESIZE]; 52 - struct bpf_map *tx_port_map; 53 - char ifname_in[IF_NAMESIZE]; 54 - int ifindex_in, ifindex_out; 55 - unsigned long interval = 2; 56 - int ret = EXIT_FAIL_OPTION; 57 - struct bpf_program *prog; 58 - bool generic = false; 59 - bool force = false; 60 - bool tried = false; 61 - bool error = true; 62 - int opt, key = 0; 63 - 64 - while ((opt = getopt_long(argc, argv, "hSFXi:vs", 65 - long_options, NULL)) != -1) { 66 - switch (opt) { 67 - case 'S': 68 - generic = true; 69 - /* devmap_xmit tracepoint not available */ 70 - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | 71 - SAMPLE_DEVMAP_XMIT_CNT_MULTI); 72 - break; 73 - case 'F': 74 - force = true; 75 - break; 76 - case 'X': 77 - xdp_devmap_attached = true; 78 - break; 79 - case 'i': 80 - interval = strtoul(optarg, NULL, 0); 81 - break; 82 - case 'v': 83 - sample_switch_mode(); 84 - verbose = 1; 85 - break; 86 - case 's': 87 - mask |= SAMPLE_REDIRECT_MAP_CNT; 88 - break; 89 - case 'h': 90 - error = false; 91 - default: 92 - sample_usage(argv, long_options, __doc__, mask, error); 93 - return ret; 94 - } 95 - } 96 - 97 - if (argc <= optind + 1) { 98 - sample_usage(argv, long_options, __doc__, mask, true); 99 - goto end; 100 - } 101 - 102 - ifindex_in = if_nametoindex(argv[optind]); 103 - if (!ifindex_in) 104 - ifindex_in = strtoul(argv[optind], NULL, 0); 105 - 106 - ifindex_out = if_nametoindex(argv[optind + 1]); 107 - if (!ifindex_out) 108 - ifindex_out = strtoul(argv[optind + 1], NULL, 0); 109 - 110 - if (!ifindex_in || !ifindex_out) { 111 - fprintf(stderr, "Bad interface index or name\n"); 112 - sample_usage(argv, long_options, __doc__, mask, true); 113 - goto end; 114 - } 115 - 116 - skel = xdp_redirect_map__open(); 117 - if (!skel) { 118 - fprintf(stderr, "Failed to xdp_redirect_map__open: %s\n", 119 - strerror(errno)); 120 - ret = EXIT_FAIL_BPF; 121 - goto end; 122 - } 123 - 124 - ret = sample_init_pre_load(skel); 125 - if (ret < 0) { 126 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 127 - ret = EXIT_FAIL_BPF; 128 - goto end_destroy; 129 - } 130 - 131 - /* Load 2nd xdp prog on egress. */ 132 - if (xdp_devmap_attached) { 133 - ret = get_mac_addr(ifindex_out, skel->rodata->tx_mac_addr); 134 - if (ret < 0) { 135 - fprintf(stderr, "Failed to get interface %d mac address: %s\n", 136 - ifindex_out, strerror(-ret)); 137 - ret = EXIT_FAIL; 138 - goto end_destroy; 139 - } 140 - if (verbose) 141 - printf("Egress ifindex:%d using src MAC %02x:%02x:%02x:%02x:%02x:%02x\n", 142 - ifindex_out, 143 - skel->rodata->tx_mac_addr[0], skel->rodata->tx_mac_addr[1], 144 - skel->rodata->tx_mac_addr[2], skel->rodata->tx_mac_addr[3], 145 - skel->rodata->tx_mac_addr[4], skel->rodata->tx_mac_addr[5]); 146 - } 147 - 148 - skel->rodata->from_match[0] = ifindex_in; 149 - skel->rodata->to_match[0] = ifindex_out; 150 - 151 - ret = xdp_redirect_map__load(skel); 152 - if (ret < 0) { 153 - fprintf(stderr, "Failed to xdp_redirect_map__load: %s\n", 154 - strerror(errno)); 155 - ret = EXIT_FAIL_BPF; 156 - goto end_destroy; 157 - } 158 - 159 - ret = sample_init(skel, mask); 160 - if (ret < 0) { 161 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 162 - ret = EXIT_FAIL; 163 - goto end_destroy; 164 - } 165 - 166 - prog = skel->progs.xdp_redirect_map_native; 167 - tx_port_map = skel->maps.tx_port_native; 168 - restart: 169 - if (sample_install_xdp(prog, ifindex_in, generic, force) < 0) { 170 - /* First try with struct bpf_devmap_val as value for generic 171 - * mode, then fallback to sizeof(int) for older kernels. 172 - */ 173 - fprintf(stderr, 174 - "Trying fallback to sizeof(int) as value_size for devmap in generic mode\n"); 175 - if (generic && !tried) { 176 - prog = skel->progs.xdp_redirect_map_general; 177 - tx_port_map = skel->maps.tx_port_general; 178 - tried = true; 179 - goto restart; 180 - } 181 - ret = EXIT_FAIL_XDP; 182 - goto end_destroy; 183 - } 184 - 185 - /* Loading dummy XDP prog on out-device */ 186 - sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, generic, force); 187 - 188 - devmap_val.ifindex = ifindex_out; 189 - if (xdp_devmap_attached) 190 - devmap_val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_redirect_map_egress); 191 - ret = bpf_map_update_elem(bpf_map__fd(tx_port_map), &key, &devmap_val, 0); 192 - if (ret < 0) { 193 - fprintf(stderr, "Failed to update devmap value: %s\n", 194 - strerror(errno)); 195 - ret = EXIT_FAIL_BPF; 196 - goto end_destroy; 197 - } 198 - 199 - ret = EXIT_FAIL; 200 - if (!if_indextoname(ifindex_in, ifname_in)) { 201 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, 202 - strerror(errno)); 203 - goto end_destroy; 204 - } 205 - 206 - if (!if_indextoname(ifindex_out, ifname_out)) { 207 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, 208 - strerror(errno)); 209 - goto end_destroy; 210 - } 211 - 212 - safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); 213 - printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", 214 - ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); 215 - snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); 216 - 217 - ret = sample_run(interval, NULL, NULL); 218 - if (ret < 0) { 219 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 220 - ret = EXIT_FAIL; 221 - goto end_destroy; 222 - } 223 - ret = EXIT_OK; 224 - end_destroy: 225 - xdp_redirect_map__destroy(skel); 226 - end: 227 - sample_exit(ret); 228 - }

-172

samples/bpf/xdp_redirect_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0-only 2 - /* Copyright (c) 2016 John Fastabend <john.r.fastabend@intel.com> 3 - */ 4 - static const char *__doc__ = 5 - "XDP redirect tool, using bpf_redirect helper\n" 6 - "Usage: xdp_redirect <IFINDEX|IFNAME>_IN <IFINDEX|IFNAME>_OUT\n"; 7 - 8 - #include <linux/bpf.h> 9 - #include <linux/if_link.h> 10 - #include <assert.h> 11 - #include <errno.h> 12 - #include <signal.h> 13 - #include <stdio.h> 14 - #include <stdlib.h> 15 - #include <stdbool.h> 16 - #include <string.h> 17 - #include <net/if.h> 18 - #include <unistd.h> 19 - #include <libgen.h> 20 - #include <getopt.h> 21 - #include <bpf/bpf.h> 22 - #include <bpf/libbpf.h> 23 - #include "bpf_util.h" 24 - #include "xdp_sample_user.h" 25 - #include "xdp_redirect.skel.h" 26 - 27 - static int mask = SAMPLE_RX_CNT | SAMPLE_REDIRECT_ERR_CNT | 28 - SAMPLE_EXCEPTION_CNT | SAMPLE_DEVMAP_XMIT_CNT_MULTI; 29 - 30 - DEFINE_SAMPLE_INIT(xdp_redirect); 31 - 32 - static const struct option long_options[] = { 33 - {"help", no_argument, NULL, 'h' }, 34 - {"skb-mode", no_argument, NULL, 'S' }, 35 - {"force", no_argument, NULL, 'F' }, 36 - {"stats", no_argument, NULL, 's' }, 37 - {"interval", required_argument, NULL, 'i' }, 38 - {"verbose", no_argument, NULL, 'v' }, 39 - {} 40 - }; 41 - 42 - int main(int argc, char **argv) 43 - { 44 - int ifindex_in, ifindex_out, opt; 45 - char str[2 * IF_NAMESIZE + 1]; 46 - char ifname_out[IF_NAMESIZE]; 47 - char ifname_in[IF_NAMESIZE]; 48 - int ret = EXIT_FAIL_OPTION; 49 - unsigned long interval = 2; 50 - struct xdp_redirect *skel; 51 - bool generic = false; 52 - bool force = false; 53 - bool error = true; 54 - 55 - while ((opt = getopt_long(argc, argv, "hSFi:vs", 56 - long_options, NULL)) != -1) { 57 - switch (opt) { 58 - case 'S': 59 - generic = true; 60 - mask &= ~(SAMPLE_DEVMAP_XMIT_CNT | 61 - SAMPLE_DEVMAP_XMIT_CNT_MULTI); 62 - break; 63 - case 'F': 64 - force = true; 65 - break; 66 - case 'i': 67 - interval = strtoul(optarg, NULL, 0); 68 - break; 69 - case 'v': 70 - sample_switch_mode(); 71 - break; 72 - case 's': 73 - mask |= SAMPLE_REDIRECT_CNT; 74 - break; 75 - case 'h': 76 - error = false; 77 - default: 78 - sample_usage(argv, long_options, __doc__, mask, error); 79 - return ret; 80 - } 81 - } 82 - 83 - if (argc <= optind + 1) { 84 - sample_usage(argv, long_options, __doc__, mask, true); 85 - return ret; 86 - } 87 - 88 - ifindex_in = if_nametoindex(argv[optind]); 89 - if (!ifindex_in) 90 - ifindex_in = strtoul(argv[optind], NULL, 0); 91 - 92 - ifindex_out = if_nametoindex(argv[optind + 1]); 93 - if (!ifindex_out) 94 - ifindex_out = strtoul(argv[optind + 1], NULL, 0); 95 - 96 - if (!ifindex_in || !ifindex_out) { 97 - fprintf(stderr, "Bad interface index or name\n"); 98 - sample_usage(argv, long_options, __doc__, mask, true); 99 - goto end; 100 - } 101 - 102 - skel = xdp_redirect__open(); 103 - if (!skel) { 104 - fprintf(stderr, "Failed to xdp_redirect__open: %s\n", strerror(errno)); 105 - ret = EXIT_FAIL_BPF; 106 - goto end; 107 - } 108 - 109 - ret = sample_init_pre_load(skel); 110 - if (ret < 0) { 111 - fprintf(stderr, "Failed to sample_init_pre_load: %s\n", strerror(-ret)); 112 - ret = EXIT_FAIL_BPF; 113 - goto end_destroy; 114 - } 115 - 116 - skel->rodata->from_match[0] = ifindex_in; 117 - skel->rodata->to_match[0] = ifindex_out; 118 - skel->rodata->ifindex_out = ifindex_out; 119 - 120 - ret = xdp_redirect__load(skel); 121 - if (ret < 0) { 122 - fprintf(stderr, "Failed to xdp_redirect__load: %s\n", strerror(errno)); 123 - ret = EXIT_FAIL_BPF; 124 - goto end_destroy; 125 - } 126 - 127 - ret = sample_init(skel, mask); 128 - if (ret < 0) { 129 - fprintf(stderr, "Failed to initialize sample: %s\n", strerror(-ret)); 130 - ret = EXIT_FAIL; 131 - goto end_destroy; 132 - } 133 - 134 - ret = EXIT_FAIL_XDP; 135 - if (sample_install_xdp(skel->progs.xdp_redirect_prog, ifindex_in, 136 - generic, force) < 0) 137 - goto end_destroy; 138 - 139 - /* Loading dummy XDP prog on out-device */ 140 - sample_install_xdp(skel->progs.xdp_redirect_dummy_prog, ifindex_out, 141 - generic, force); 142 - 143 - ret = EXIT_FAIL; 144 - if (!if_indextoname(ifindex_in, ifname_in)) { 145 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_in, 146 - strerror(errno)); 147 - goto end_destroy; 148 - } 149 - 150 - if (!if_indextoname(ifindex_out, ifname_out)) { 151 - fprintf(stderr, "Failed to if_indextoname for %d: %s\n", ifindex_out, 152 - strerror(errno)); 153 - goto end_destroy; 154 - } 155 - 156 - safe_strncpy(str, get_driver_name(ifindex_in), sizeof(str)); 157 - printf("Redirecting from %s (ifindex %d; driver %s) to %s (ifindex %d; driver %s)\n", 158 - ifname_in, ifindex_in, str, ifname_out, ifindex_out, get_driver_name(ifindex_out)); 159 - snprintf(str, sizeof(str), "%s->%s", ifname_in, ifname_out); 160 - 161 - ret = sample_run(interval, NULL, NULL); 162 - if (ret < 0) { 163 - fprintf(stderr, "Failed during sample run: %s\n", strerror(-ret)); 164 - ret = EXIT_FAIL; 165 - goto end_destroy; 166 - } 167 - ret = EXIT_OK; 168 - end_destroy: 169 - xdp_redirect__destroy(skel); 170 - end: 171 - sample_exit(ret); 172 - }

-140

samples/bpf/xdp_rxq_info_kern.c

··· 1 - /* SPDX-License-Identifier: GPL-2.0 2 - * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 3 - * 4 - * Example howto extract XDP RX-queue info 5 - */ 6 - #include <uapi/linux/bpf.h> 7 - #include <uapi/linux/if_ether.h> 8 - #include <uapi/linux/in.h> 9 - #include <bpf/bpf_helpers.h> 10 - 11 - /* Config setup from with userspace 12 - * 13 - * User-side setup ifindex in config_map, to verify that 14 - * ctx->ingress_ifindex is correct (against configured ifindex) 15 - */ 16 - struct config { 17 - __u32 action; 18 - int ifindex; 19 - __u32 options; 20 - }; 21 - enum cfg_options_flags { 22 - NO_TOUCH = 0x0U, 23 - READ_MEM = 0x1U, 24 - SWAP_MAC = 0x2U, 25 - }; 26 - 27 - struct { 28 - __uint(type, BPF_MAP_TYPE_ARRAY); 29 - __type(key, int); 30 - __type(value, struct config); 31 - __uint(max_entries, 1); 32 - } config_map SEC(".maps"); 33 - 34 - /* Common stats data record (shared with userspace) */ 35 - struct datarec { 36 - __u64 processed; 37 - __u64 issue; 38 - }; 39 - 40 - struct { 41 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 42 - __type(key, u32); 43 - __type(value, struct datarec); 44 - __uint(max_entries, 1); 45 - } stats_global_map SEC(".maps"); 46 - 47 - #define MAX_RXQs 64 48 - 49 - /* Stats per rx_queue_index (per CPU) */ 50 - struct { 51 - __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 52 - __type(key, u32); 53 - __type(value, struct datarec); 54 - __uint(max_entries, MAX_RXQs + 1); 55 - } rx_queue_index_map SEC(".maps"); 56 - 57 - static __always_inline 58 - void swap_src_dst_mac(void *data) 59 - { 60 - unsigned short *p = data; 61 - unsigned short dst[3]; 62 - 63 - dst[0] = p[0]; 64 - dst[1] = p[1]; 65 - dst[2] = p[2]; 66 - p[0] = p[3]; 67 - p[1] = p[4]; 68 - p[2] = p[5]; 69 - p[3] = dst[0]; 70 - p[4] = dst[1]; 71 - p[5] = dst[2]; 72 - } 73 - 74 - SEC("xdp_prog0") 75 - int xdp_prognum0(struct xdp_md *ctx) 76 - { 77 - void *data_end = (void *)(long)ctx->data_end; 78 - void *data = (void *)(long)ctx->data; 79 - struct datarec *rec, *rxq_rec; 80 - int ingress_ifindex; 81 - struct config *config; 82 - u32 key = 0; 83 - 84 - /* Global stats record */ 85 - rec = bpf_map_lookup_elem(&stats_global_map, &key); 86 - if (!rec) 87 - return XDP_ABORTED; 88 - rec->processed++; 89 - 90 - /* Accessing ctx->ingress_ifindex, cause BPF to rewrite BPF 91 - * instructions inside kernel to access xdp_rxq->dev->ifindex 92 - */ 93 - ingress_ifindex = ctx->ingress_ifindex; 94 - 95 - config = bpf_map_lookup_elem(&config_map, &key); 96 - if (!config) 97 - return XDP_ABORTED; 98 - 99 - /* Simple test: check ctx provided ifindex is as expected */ 100 - if (ingress_ifindex != config->ifindex) { 101 - /* count this error case */ 102 - rec->issue++; 103 - return XDP_ABORTED; 104 - } 105 - 106 - /* Update stats per rx_queue_index. Handle if rx_queue_index 107 - * is larger than stats map can contain info for. 108 - */ 109 - key = ctx->rx_queue_index; 110 - if (key >= MAX_RXQs) 111 - key = MAX_RXQs; 112 - rxq_rec = bpf_map_lookup_elem(&rx_queue_index_map, &key); 113 - if (!rxq_rec) 114 - return XDP_ABORTED; 115 - rxq_rec->processed++; 116 - if (key == MAX_RXQs) 117 - rxq_rec->issue++; 118 - 119 - /* Default: Don't touch packet data, only count packets */ 120 - if (unlikely(config->options & (READ_MEM|SWAP_MAC))) { 121 - struct ethhdr *eth = data; 122 - 123 - if (eth + 1 > data_end) 124 - return XDP_ABORTED; 125 - 126 - /* Avoid compiler removing this: Drop non 802.3 Ethertypes */ 127 - if (ntohs(eth->h_proto) < ETH_P_802_3_MIN) 128 - return XDP_ABORTED; 129 - 130 - /* XDP_TX requires changing MAC-addrs, else HW may drop. 131 - * Can also be enabled with --swapmac (for test purposes) 132 - */ 133 - if (unlikely(config->options & SWAP_MAC)) 134 - swap_src_dst_mac(data); 135 - } 136 - 137 - return config->action; 138 - } 139 - 140 - char _license[] SEC("license") = "GPL";

-614

samples/bpf/xdp_rxq_info_user.c

··· 1 - /* SPDX-License-Identifier: GPL-2.0 2 - * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 3 - */ 4 - static const char *__doc__ = " XDP RX-queue info extract example\n\n" 5 - "Monitor how many packets per sec (pps) are received\n" 6 - "per NIC RX queue index and which CPU processed the packet\n" 7 - ; 8 - 9 - #include <errno.h> 10 - #include <signal.h> 11 - #include <stdio.h> 12 - #include <stdlib.h> 13 - #include <stdbool.h> 14 - #include <string.h> 15 - #include <unistd.h> 16 - #include <locale.h> 17 - #include <getopt.h> 18 - #include <net/if.h> 19 - #include <time.h> 20 - #include <limits.h> 21 - #include <arpa/inet.h> 22 - #include <linux/if_link.h> 23 - 24 - #include <bpf/bpf.h> 25 - #include <bpf/libbpf.h> 26 - #include "bpf_util.h" 27 - 28 - static int ifindex = -1; 29 - static char ifname_buf[IF_NAMESIZE]; 30 - static char *ifname; 31 - static __u32 prog_id; 32 - 33 - static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 34 - 35 - static struct bpf_map *stats_global_map; 36 - static struct bpf_map *rx_queue_index_map; 37 - 38 - /* Exit return codes */ 39 - #define EXIT_OK 0 40 - #define EXIT_FAIL 1 41 - #define EXIT_FAIL_OPTION 2 42 - #define EXIT_FAIL_XDP 3 43 - #define EXIT_FAIL_BPF 4 44 - #define EXIT_FAIL_MEM 5 45 - 46 - #define FAIL_MEM_SIG INT_MAX 47 - #define FAIL_STAT_SIG (INT_MAX - 1) 48 - 49 - static const struct option long_options[] = { 50 - {"help", no_argument, NULL, 'h' }, 51 - {"dev", required_argument, NULL, 'd' }, 52 - {"skb-mode", no_argument, NULL, 'S' }, 53 - {"sec", required_argument, NULL, 's' }, 54 - {"no-separators", no_argument, NULL, 'z' }, 55 - {"action", required_argument, NULL, 'a' }, 56 - {"readmem", no_argument, NULL, 'r' }, 57 - {"swapmac", no_argument, NULL, 'm' }, 58 - {"force", no_argument, NULL, 'F' }, 59 - {0, 0, NULL, 0 } 60 - }; 61 - 62 - static void int_exit(int sig) 63 - { 64 - __u32 curr_prog_id = 0; 65 - 66 - if (ifindex > -1) { 67 - if (bpf_xdp_query_id(ifindex, xdp_flags, &curr_prog_id)) { 68 - printf("bpf_xdp_query_id failed\n"); 69 - exit(EXIT_FAIL); 70 - } 71 - if (prog_id == curr_prog_id) { 72 - fprintf(stderr, 73 - "Interrupted: Removing XDP program on ifindex:%d device:%s\n", 74 - ifindex, ifname); 75 - bpf_xdp_detach(ifindex, xdp_flags, NULL); 76 - } else if (!curr_prog_id) { 77 - printf("couldn't find a prog id on a given iface\n"); 78 - } else { 79 - printf("program on interface changed, not removing\n"); 80 - } 81 - } 82 - 83 - if (sig == FAIL_MEM_SIG) 84 - exit(EXIT_FAIL_MEM); 85 - else if (sig == FAIL_STAT_SIG) 86 - exit(EXIT_FAIL); 87 - 88 - exit(EXIT_OK); 89 - } 90 - 91 - struct config { 92 - __u32 action; 93 - int ifindex; 94 - __u32 options; 95 - }; 96 - enum cfg_options_flags { 97 - NO_TOUCH = 0x0U, 98 - READ_MEM = 0x1U, 99 - SWAP_MAC = 0x2U, 100 - }; 101 - #define XDP_ACTION_MAX (XDP_TX + 1) 102 - #define XDP_ACTION_MAX_STRLEN 11 103 - static const char *xdp_action_names[XDP_ACTION_MAX] = { 104 - [XDP_ABORTED] = "XDP_ABORTED", 105 - [XDP_DROP] = "XDP_DROP", 106 - [XDP_PASS] = "XDP_PASS", 107 - [XDP_TX] = "XDP_TX", 108 - }; 109 - 110 - static const char *action2str(int action) 111 - { 112 - if (action < XDP_ACTION_MAX) 113 - return xdp_action_names[action]; 114 - return NULL; 115 - } 116 - 117 - static int parse_xdp_action(char *action_str) 118 - { 119 - size_t maxlen; 120 - __u64 action = -1; 121 - int i; 122 - 123 - for (i = 0; i < XDP_ACTION_MAX; i++) { 124 - maxlen = XDP_ACTION_MAX_STRLEN; 125 - if (strncmp(xdp_action_names[i], action_str, maxlen) == 0) { 126 - action = i; 127 - break; 128 - } 129 - } 130 - return action; 131 - } 132 - 133 - static void list_xdp_actions(void) 134 - { 135 - int i; 136 - 137 - printf("Available XDP --action <options>\n"); 138 - for (i = 0; i < XDP_ACTION_MAX; i++) 139 - printf("\t%s\n", xdp_action_names[i]); 140 - printf("\n"); 141 - } 142 - 143 - static char* options2str(enum cfg_options_flags flag) 144 - { 145 - if (flag == NO_TOUCH) 146 - return "no_touch"; 147 - if (flag & SWAP_MAC) 148 - return "swapmac"; 149 - if (flag & READ_MEM) 150 - return "read"; 151 - fprintf(stderr, "ERR: Unknown config option flags"); 152 - int_exit(FAIL_STAT_SIG); 153 - return "unknown"; 154 - } 155 - 156 - static void usage(char *argv[]) 157 - { 158 - int i; 159 - 160 - printf("\nDOCUMENTATION:\n%s\n", __doc__); 161 - printf(" Usage: %s (options-see-below)\n", argv[0]); 162 - printf(" Listing options:\n"); 163 - for (i = 0; long_options[i].name != 0; i++) { 164 - printf(" --%-12s", long_options[i].name); 165 - if (long_options[i].flag != NULL) 166 - printf(" flag (internal value:%d)", 167 - *long_options[i].flag); 168 - else 169 - printf(" short-option: -%c", 170 - long_options[i].val); 171 - printf("\n"); 172 - } 173 - printf("\n"); 174 - list_xdp_actions(); 175 - } 176 - 177 - #define NANOSEC_PER_SEC 1000000000 /* 10^9 */ 178 - static __u64 gettime(void) 179 - { 180 - struct timespec t; 181 - int res; 182 - 183 - res = clock_gettime(CLOCK_MONOTONIC, &t); 184 - if (res < 0) { 185 - fprintf(stderr, "Error with gettimeofday! (%i)\n", res); 186 - int_exit(FAIL_STAT_SIG); 187 - } 188 - return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; 189 - } 190 - 191 - /* Common stats data record shared with _kern.c */ 192 - struct datarec { 193 - __u64 processed; 194 - __u64 issue; 195 - }; 196 - struct record { 197 - __u64 timestamp; 198 - struct datarec total; 199 - struct datarec *cpu; 200 - }; 201 - struct stats_record { 202 - struct record stats; 203 - struct record *rxq; 204 - }; 205 - 206 - static struct datarec *alloc_record_per_cpu(void) 207 - { 208 - unsigned int nr_cpus = bpf_num_possible_cpus(); 209 - struct datarec *array; 210 - 211 - array = calloc(nr_cpus, sizeof(struct datarec)); 212 - if (!array) { 213 - fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); 214 - int_exit(FAIL_MEM_SIG); 215 - } 216 - return array; 217 - } 218 - 219 - static struct record *alloc_record_per_rxq(void) 220 - { 221 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 222 - struct record *array; 223 - 224 - array = calloc(nr_rxqs, sizeof(struct record)); 225 - if (!array) { 226 - fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); 227 - int_exit(FAIL_MEM_SIG); 228 - } 229 - return array; 230 - } 231 - 232 - static struct stats_record *alloc_stats_record(void) 233 - { 234 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 235 - struct stats_record *rec; 236 - int i; 237 - 238 - rec = calloc(1, sizeof(struct stats_record)); 239 - if (!rec) { 240 - fprintf(stderr, "Mem alloc error\n"); 241 - int_exit(FAIL_MEM_SIG); 242 - } 243 - rec->rxq = alloc_record_per_rxq(); 244 - for (i = 0; i < nr_rxqs; i++) 245 - rec->rxq[i].cpu = alloc_record_per_cpu(); 246 - 247 - rec->stats.cpu = alloc_record_per_cpu(); 248 - return rec; 249 - } 250 - 251 - static void free_stats_record(struct stats_record *r) 252 - { 253 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 254 - int i; 255 - 256 - for (i = 0; i < nr_rxqs; i++) 257 - free(r->rxq[i].cpu); 258 - 259 - free(r->rxq); 260 - free(r->stats.cpu); 261 - free(r); 262 - } 263 - 264 - static bool map_collect_percpu(int fd, __u32 key, struct record *rec) 265 - { 266 - /* For percpu maps, userspace gets a value per possible CPU */ 267 - unsigned int nr_cpus = bpf_num_possible_cpus(); 268 - struct datarec values[nr_cpus]; 269 - __u64 sum_processed = 0; 270 - __u64 sum_issue = 0; 271 - int i; 272 - 273 - if ((bpf_map_lookup_elem(fd, &key, values)) != 0) { 274 - fprintf(stderr, 275 - "ERR: bpf_map_lookup_elem failed key:0x%X\n", key); 276 - return false; 277 - } 278 - /* Get time as close as possible to reading map contents */ 279 - rec->timestamp = gettime(); 280 - 281 - /* Record and sum values from each CPU */ 282 - for (i = 0; i < nr_cpus; i++) { 283 - rec->cpu[i].processed = values[i].processed; 284 - sum_processed += values[i].processed; 285 - rec->cpu[i].issue = values[i].issue; 286 - sum_issue += values[i].issue; 287 - } 288 - rec->total.processed = sum_processed; 289 - rec->total.issue = sum_issue; 290 - return true; 291 - } 292 - 293 - static void stats_collect(struct stats_record *rec) 294 - { 295 - int fd, i, max_rxqs; 296 - 297 - fd = bpf_map__fd(stats_global_map); 298 - map_collect_percpu(fd, 0, &rec->stats); 299 - 300 - fd = bpf_map__fd(rx_queue_index_map); 301 - max_rxqs = bpf_map__max_entries(rx_queue_index_map); 302 - for (i = 0; i < max_rxqs; i++) 303 - map_collect_percpu(fd, i, &rec->rxq[i]); 304 - } 305 - 306 - static double calc_period(struct record *r, struct record *p) 307 - { 308 - double period_ = 0; 309 - __u64 period = 0; 310 - 311 - period = r->timestamp - p->timestamp; 312 - if (period > 0) 313 - period_ = ((double) period / NANOSEC_PER_SEC); 314 - 315 - return period_; 316 - } 317 - 318 - static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_) 319 - { 320 - __u64 packets = 0; 321 - __u64 pps = 0; 322 - 323 - if (period_ > 0) { 324 - packets = r->processed - p->processed; 325 - pps = packets / period_; 326 - } 327 - return pps; 328 - } 329 - 330 - static __u64 calc_errs_pps(struct datarec *r, 331 - struct datarec *p, double period_) 332 - { 333 - __u64 packets = 0; 334 - __u64 pps = 0; 335 - 336 - if (period_ > 0) { 337 - packets = r->issue - p->issue; 338 - pps = packets / period_; 339 - } 340 - return pps; 341 - } 342 - 343 - static void stats_print(struct stats_record *stats_rec, 344 - struct stats_record *stats_prev, 345 - int action, __u32 cfg_opt) 346 - { 347 - unsigned int nr_rxqs = bpf_map__max_entries(rx_queue_index_map); 348 - unsigned int nr_cpus = bpf_num_possible_cpus(); 349 - double pps = 0, err = 0; 350 - struct record *rec, *prev; 351 - double t; 352 - int rxq; 353 - int i; 354 - 355 - /* Header */ 356 - printf("\nRunning XDP on dev:%s (ifindex:%d) action:%s options:%s\n", 357 - ifname, ifindex, action2str(action), options2str(cfg_opt)); 358 - 359 - /* stats_global_map */ 360 - { 361 - char *fmt_rx = "%-15s %-7d %'-11.0f %'-10.0f %s\n"; 362 - char *fm2_rx = "%-15s %-7s %'-11.0f\n"; 363 - char *errstr = ""; 364 - 365 - printf("%-15s %-7s %-11s %-11s\n", 366 - "XDP stats", "CPU", "pps", "issue-pps"); 367 - 368 - rec = &stats_rec->stats; 369 - prev = &stats_prev->stats; 370 - t = calc_period(rec, prev); 371 - for (i = 0; i < nr_cpus; i++) { 372 - struct datarec *r = &rec->cpu[i]; 373 - struct datarec *p = &prev->cpu[i]; 374 - 375 - pps = calc_pps (r, p, t); 376 - err = calc_errs_pps(r, p, t); 377 - if (err > 0) 378 - errstr = "invalid-ifindex"; 379 - if (pps > 0) 380 - printf(fmt_rx, "XDP-RX CPU", 381 - i, pps, err, errstr); 382 - } 383 - pps = calc_pps (&rec->total, &prev->total, t); 384 - err = calc_errs_pps(&rec->total, &prev->total, t); 385 - printf(fm2_rx, "XDP-RX CPU", "total", pps, err); 386 - } 387 - 388 - /* rx_queue_index_map */ 389 - printf("\n%-15s %-7s %-11s %-11s\n", 390 - "RXQ stats", "RXQ:CPU", "pps", "issue-pps"); 391 - 392 - for (rxq = 0; rxq < nr_rxqs; rxq++) { 393 - char *fmt_rx = "%-15s %3d:%-3d %'-11.0f %'-10.0f %s\n"; 394 - char *fm2_rx = "%-15s %3d:%-3s %'-11.0f\n"; 395 - char *errstr = ""; 396 - int rxq_ = rxq; 397 - 398 - /* Last RXQ in map catch overflows */ 399 - if (rxq_ == nr_rxqs - 1) 400 - rxq_ = -1; 401 - 402 - rec = &stats_rec->rxq[rxq]; 403 - prev = &stats_prev->rxq[rxq]; 404 - t = calc_period(rec, prev); 405 - for (i = 0; i < nr_cpus; i++) { 406 - struct datarec *r = &rec->cpu[i]; 407 - struct datarec *p = &prev->cpu[i]; 408 - 409 - pps = calc_pps (r, p, t); 410 - err = calc_errs_pps(r, p, t); 411 - if (err > 0) { 412 - if (rxq_ == -1) 413 - errstr = "map-overflow-RXQ"; 414 - else 415 - errstr = "err"; 416 - } 417 - if (pps > 0) 418 - printf(fmt_rx, "rx_queue_index", 419 - rxq_, i, pps, err, errstr); 420 - } 421 - pps = calc_pps (&rec->total, &prev->total, t); 422 - err = calc_errs_pps(&rec->total, &prev->total, t); 423 - if (pps || err) 424 - printf(fm2_rx, "rx_queue_index", rxq_, "sum", pps, err); 425 - } 426 - } 427 - 428 - 429 - /* Pointer swap trick */ 430 - static inline void swap(struct stats_record **a, struct stats_record **b) 431 - { 432 - struct stats_record *tmp; 433 - 434 - tmp = *a; 435 - *a = *b; 436 - *b = tmp; 437 - } 438 - 439 - static void stats_poll(int interval, int action, __u32 cfg_opt) 440 - { 441 - struct stats_record *record, *prev; 442 - 443 - record = alloc_stats_record(); 444 - prev = alloc_stats_record(); 445 - stats_collect(record); 446 - 447 - while (1) { 448 - swap(&prev, &record); 449 - stats_collect(record); 450 - stats_print(record, prev, action, cfg_opt); 451 - sleep(interval); 452 - } 453 - 454 - free_stats_record(record); 455 - free_stats_record(prev); 456 - } 457 - 458 - 459 - int main(int argc, char **argv) 460 - { 461 - __u32 cfg_options= NO_TOUCH ; /* Default: Don't touch packet memory */ 462 - struct bpf_prog_info info = {}; 463 - __u32 info_len = sizeof(info); 464 - int prog_fd, map_fd, opt, err; 465 - bool use_separators = true; 466 - struct config cfg = { 0 }; 467 - struct bpf_program *prog; 468 - struct bpf_object *obj; 469 - struct bpf_map *map; 470 - char filename[256]; 471 - int longindex = 0; 472 - int interval = 2; 473 - __u32 key = 0; 474 - 475 - 476 - char action_str_buf[XDP_ACTION_MAX_STRLEN + 1 /* for \0 */] = { 0 }; 477 - int action = XDP_PASS; /* Default action */ 478 - char *action_str = NULL; 479 - 480 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 481 - 482 - obj = bpf_object__open_file(filename, NULL); 483 - if (libbpf_get_error(obj)) 484 - return EXIT_FAIL; 485 - 486 - prog = bpf_object__next_program(obj, NULL); 487 - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); 488 - 489 - err = bpf_object__load(obj); 490 - if (err) 491 - return EXIT_FAIL; 492 - prog_fd = bpf_program__fd(prog); 493 - 494 - map = bpf_object__find_map_by_name(obj, "config_map"); 495 - stats_global_map = bpf_object__find_map_by_name(obj, "stats_global_map"); 496 - rx_queue_index_map = bpf_object__find_map_by_name(obj, "rx_queue_index_map"); 497 - if (!map || !stats_global_map || !rx_queue_index_map) { 498 - printf("finding a map in obj file failed\n"); 499 - return EXIT_FAIL; 500 - } 501 - map_fd = bpf_map__fd(map); 502 - 503 - if (!prog_fd) { 504 - fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno)); 505 - return EXIT_FAIL; 506 - } 507 - 508 - /* Parse commands line args */ 509 - while ((opt = getopt_long(argc, argv, "FhSrmzd:s:a:", 510 - long_options, &longindex)) != -1) { 511 - switch (opt) { 512 - case 'd': 513 - if (strlen(optarg) >= IF_NAMESIZE) { 514 - fprintf(stderr, "ERR: --dev name too long\n"); 515 - goto error; 516 - } 517 - ifname = (char *)&ifname_buf; 518 - strncpy(ifname, optarg, IF_NAMESIZE); 519 - ifindex = if_nametoindex(ifname); 520 - if (ifindex == 0) { 521 - fprintf(stderr, 522 - "ERR: --dev name unknown err(%d):%s\n", 523 - errno, strerror(errno)); 524 - goto error; 525 - } 526 - break; 527 - case 's': 528 - interval = atoi(optarg); 529 - break; 530 - case 'S': 531 - xdp_flags |= XDP_FLAGS_SKB_MODE; 532 - break; 533 - case 'z': 534 - use_separators = false; 535 - break; 536 - case 'a': 537 - action_str = (char *)&action_str_buf; 538 - strncpy(action_str, optarg, XDP_ACTION_MAX_STRLEN); 539 - break; 540 - case 'r': 541 - cfg_options |= READ_MEM; 542 - break; 543 - case 'm': 544 - cfg_options |= SWAP_MAC; 545 - break; 546 - case 'F': 547 - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 548 - break; 549 - case 'h': 550 - error: 551 - default: 552 - usage(argv); 553 - return EXIT_FAIL_OPTION; 554 - } 555 - } 556 - 557 - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 558 - xdp_flags |= XDP_FLAGS_DRV_MODE; 559 - 560 - /* Required option */ 561 - if (ifindex == -1) { 562 - fprintf(stderr, "ERR: required option --dev missing\n"); 563 - usage(argv); 564 - return EXIT_FAIL_OPTION; 565 - } 566 - cfg.ifindex = ifindex; 567 - 568 - /* Parse action string */ 569 - if (action_str) { 570 - action = parse_xdp_action(action_str); 571 - if (action < 0) { 572 - fprintf(stderr, "ERR: Invalid XDP --action: %s\n", 573 - action_str); 574 - list_xdp_actions(); 575 - return EXIT_FAIL_OPTION; 576 - } 577 - } 578 - cfg.action = action; 579 - 580 - /* XDP_TX requires changing MAC-addrs, else HW may drop */ 581 - if (action == XDP_TX) 582 - cfg_options |= SWAP_MAC; 583 - cfg.options = cfg_options; 584 - 585 - /* Trick to pretty printf with thousands separators use %' */ 586 - if (use_separators) 587 - setlocale(LC_NUMERIC, "en_US"); 588 - 589 - /* User-side setup ifindex in config_map */ 590 - err = bpf_map_update_elem(map_fd, &key, &cfg, 0); 591 - if (err) { 592 - fprintf(stderr, "Store config failed (err:%d)\n", err); 593 - exit(EXIT_FAIL_BPF); 594 - } 595 - 596 - /* Remove XDP program when program is interrupted or killed */ 597 - signal(SIGINT, int_exit); 598 - signal(SIGTERM, int_exit); 599 - 600 - if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) { 601 - fprintf(stderr, "link set xdp fd failed\n"); 602 - return EXIT_FAIL_XDP; 603 - } 604 - 605 - err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len); 606 - if (err) { 607 - printf("can't get prog info - %s\n", strerror(errno)); 608 - return err; 609 - } 610 - prog_id = info.id; 611 - 612 - stats_poll(interval, action, cfg_options); 613 - return EXIT_OK; 614 - }

-57

samples/bpf/xdp_sample_pkts_kern.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <linux/ptrace.h> 3 - #include <linux/version.h> 4 - #include <uapi/linux/bpf.h> 5 - #include <bpf/bpf_helpers.h> 6 - 7 - #define SAMPLE_SIZE 64ul 8 - 9 - struct { 10 - __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 11 - __uint(key_size, sizeof(int)); 12 - __uint(value_size, sizeof(u32)); 13 - } my_map SEC(".maps"); 14 - 15 - SEC("xdp_sample") 16 - int xdp_sample_prog(struct xdp_md *ctx) 17 - { 18 - void *data_end = (void *)(long)ctx->data_end; 19 - void *data = (void *)(long)ctx->data; 20 - 21 - /* Metadata will be in the perf event before the packet data. */ 22 - struct S { 23 - u16 cookie; 24 - u16 pkt_len; 25 - } __packed metadata; 26 - 27 - if (data < data_end) { 28 - /* The XDP perf_event_output handler will use the upper 32 bits 29 - * of the flags argument as a number of bytes to include of the 30 - * packet payload in the event data. If the size is too big, the 31 - * call to bpf_perf_event_output will fail and return -EFAULT. 32 - * 33 - * See bpf_xdp_event_output in net/core/filter.c. 34 - * 35 - * The BPF_F_CURRENT_CPU flag means that the event output fd 36 - * will be indexed by the CPU number in the event map. 37 - */ 38 - u64 flags = BPF_F_CURRENT_CPU; 39 - u16 sample_size; 40 - int ret; 41 - 42 - metadata.cookie = 0xdead; 43 - metadata.pkt_len = (u16)(data_end - data); 44 - sample_size = min(metadata.pkt_len, SAMPLE_SIZE); 45 - flags |= (u64)sample_size << 32; 46 - 47 - ret = bpf_perf_event_output(ctx, &my_map, flags, 48 - &metadata, sizeof(metadata)); 49 - if (ret) 50 - bpf_printk("perf_event_output failed: %d\n", ret); 51 - } 52 - 53 - return XDP_PASS; 54 - } 55 - 56 - char _license[] SEC("license") = "GPL"; 57 - u32 _version SEC("version") = LINUX_VERSION_CODE;

-196

samples/bpf/xdp_sample_pkts_user.c

··· 1 - // SPDX-License-Identifier: GPL-2.0 2 - #include <stdio.h> 3 - #include <stdlib.h> 4 - #include <string.h> 5 - #include <linux/perf_event.h> 6 - #include <linux/bpf.h> 7 - #include <net/if.h> 8 - #include <errno.h> 9 - #include <assert.h> 10 - #include <sys/sysinfo.h> 11 - #include <sys/ioctl.h> 12 - #include <signal.h> 13 - #include <bpf/libbpf.h> 14 - #include <bpf/bpf.h> 15 - #include <libgen.h> 16 - #include <linux/if_link.h> 17 - 18 - #include "perf-sys.h" 19 - 20 - static int if_idx; 21 - static char *if_name; 22 - static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; 23 - static __u32 prog_id; 24 - static struct perf_buffer *pb = NULL; 25 - 26 - static int do_attach(int idx, int fd, const char *name) 27 - { 28 - struct bpf_prog_info info = {}; 29 - __u32 info_len = sizeof(info); 30 - int err; 31 - 32 - err = bpf_xdp_attach(idx, fd, xdp_flags, NULL); 33 - if (err < 0) { 34 - printf("ERROR: failed to attach program to %s\n", name); 35 - return err; 36 - } 37 - 38 - err = bpf_prog_get_info_by_fd(fd, &info, &info_len); 39 - if (err) { 40 - printf("can't get prog info - %s\n", strerror(errno)); 41 - return err; 42 - } 43 - prog_id = info.id; 44 - 45 - return err; 46 - } 47 - 48 - static int do_detach(int idx, const char *name) 49 - { 50 - __u32 curr_prog_id = 0; 51 - int err = 0; 52 - 53 - err = bpf_xdp_query_id(idx, xdp_flags, &curr_prog_id); 54 - if (err) { 55 - printf("bpf_xdp_query_id failed\n"); 56 - return err; 57 - } 58 - if (prog_id == curr_prog_id) { 59 - err = bpf_xdp_detach(idx, xdp_flags, NULL); 60 - if (err < 0) 61 - printf("ERROR: failed to detach prog from %s\n", name); 62 - } else if (!curr_prog_id) { 63 - printf("couldn't find a prog id on a %s\n", name); 64 - } else { 65 - printf("program on interface changed, not removing\n"); 66 - } 67 - 68 - return err; 69 - } 70 - 71 - #define SAMPLE_SIZE 64 72 - 73 - static void print_bpf_output(void *ctx, int cpu, void *data, __u32 size) 74 - { 75 - struct { 76 - __u16 cookie; 77 - __u16 pkt_len; 78 - __u8 pkt_data[SAMPLE_SIZE]; 79 - } __packed *e = data; 80 - int i; 81 - 82 - if (e->cookie != 0xdead) { 83 - printf("BUG cookie %x sized %d\n", e->cookie, size); 84 - return; 85 - } 86 - 87 - printf("Pkt len: %-5d bytes. Ethernet hdr: ", e->pkt_len); 88 - for (i = 0; i < 14 && i < e->pkt_len; i++) 89 - printf("%02x ", e->pkt_data[i]); 90 - printf("\n"); 91 - } 92 - 93 - static void sig_handler(int signo) 94 - { 95 - do_detach(if_idx, if_name); 96 - perf_buffer__free(pb); 97 - exit(0); 98 - } 99 - 100 - static void usage(const char *prog) 101 - { 102 - fprintf(stderr, 103 - "%s: %s [OPTS] <ifname|ifindex>\n\n" 104 - "OPTS:\n" 105 - " -F force loading prog\n" 106 - " -S use skb-mode\n", 107 - __func__, prog); 108 - } 109 - 110 - int main(int argc, char **argv) 111 - { 112 - const char *optstr = "FS"; 113 - int prog_fd, map_fd, opt; 114 - struct bpf_program *prog; 115 - struct bpf_object *obj; 116 - struct bpf_map *map; 117 - char filename[256]; 118 - int ret, err; 119 - 120 - while ((opt = getopt(argc, argv, optstr)) != -1) { 121 - switch (opt) { 122 - case 'F': 123 - xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; 124 - break; 125 - case 'S': 126 - xdp_flags |= XDP_FLAGS_SKB_MODE; 127 - break; 128 - default: 129 - usage(basename(argv[0])); 130 - return 1; 131 - } 132 - } 133 - 134 - if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) 135 - xdp_flags |= XDP_FLAGS_DRV_MODE; 136 - 137 - if (optind == argc) { 138 - usage(basename(argv[0])); 139 - return 1; 140 - } 141 - 142 - snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); 143 - 144 - obj = bpf_object__open_file(filename, NULL); 145 - if (libbpf_get_error(obj)) 146 - return 1; 147 - 148 - prog = bpf_object__next_program(obj, NULL); 149 - bpf_program__set_type(prog, BPF_PROG_TYPE_XDP); 150 - 151 - err = bpf_object__load(obj); 152 - if (err) 153 - return 1; 154 - 155 - prog_fd = bpf_program__fd(prog); 156 - 157 - map = bpf_object__next_map(obj, NULL); 158 - if (!map) { 159 - printf("finding a map in obj file failed\n"); 160 - return 1; 161 - } 162 - map_fd = bpf_map__fd(map); 163 - 164 - if_idx = if_nametoindex(argv[optind]); 165 - if (!if_idx) 166 - if_idx = strtoul(argv[optind], NULL, 0); 167 - 168 - if (!if_idx) { 169 - fprintf(stderr, "Invalid ifname\n"); 170 - return 1; 171 - } 172 - if_name = argv[optind]; 173 - err = do_attach(if_idx, prog_fd, if_name); 174 - if (err) 175 - return err; 176 - 177 - if (signal(SIGINT, sig_handler) || 178 - signal(SIGHUP, sig_handler) || 179 - signal(SIGTERM, sig_handler)) { 180 - perror("signal"); 181 - return 1; 182 - } 183 - 184 - pb = perf_buffer__new(map_fd, 8, print_bpf_output, NULL, NULL, NULL); 185 - err = libbpf_get_error(pb); 186 - if (err) { 187 - perror("perf_buffer setup failed"); 188 - return 1; 189 - } 190 - 191 - while ((ret = perf_buffer__poll(pb, 1000)) >= 0) { 192 - } 193 - 194 - kill(0, SIGINT); 195 - return ret; 196 - }

+21 -1

tools/include/uapi/linux/bpf.h

··· 1039 1039 BPF_NETFILTER, 1040 1040 BPF_TCX_INGRESS, 1041 1041 BPF_TCX_EGRESS, 1042 + BPF_TRACE_UPROBE_MULTI, 1042 1043 __MAX_BPF_ATTACH_TYPE 1043 1044 }; 1044 1045 ··· 1058 1057 BPF_LINK_TYPE_STRUCT_OPS = 9, 1059 1058 BPF_LINK_TYPE_NETFILTER = 10, 1060 1059 BPF_LINK_TYPE_TCX = 11, 1060 + BPF_LINK_TYPE_UPROBE_MULTI = 12, 1061 1061 MAX_BPF_LINK_TYPE, 1062 1062 }; 1063 1063 ··· 1188 1186 /* link_create.kprobe_multi.flags used in LINK_CREATE command for 1189 1187 * BPF_TRACE_KPROBE_MULTI attach type to create return probe. 1190 1188 */ 1191 - #define BPF_F_KPROBE_MULTI_RETURN (1U << 0) 1189 + enum { 1190 + BPF_F_KPROBE_MULTI_RETURN = (1U << 0) 1191 + }; 1192 + 1193 + /* link_create.uprobe_multi.flags used in LINK_CREATE command for 1194 + * BPF_TRACE_UPROBE_MULTI attach type to create return probe. 1195 + */ 1196 + enum { 1197 + BPF_F_UPROBE_MULTI_RETURN = (1U << 0) 1198 + }; 1192 1199 1193 1200 /* link_create.netfilter.flags used in LINK_CREATE command for 1194 1201 * BPF_PROG_TYPE_NETFILTER to enable IP packet defragmentation. ··· 1635 1624 }; 1636 1625 __u64 expected_revision; 1637 1626 } tcx; 1627 + struct { 1628 + __aligned_u64 path; 1629 + __aligned_u64 offsets; 1630 + __aligned_u64 ref_ctr_offsets; 1631 + __aligned_u64 cookies; 1632 + __u32 cnt; 1633 + __u32 flags; 1634 + __u32 pid; 1635 + } uprobe_multi; 1638 1636 }; 1639 1637 } link_create; 1640 1638

+1 -1

tools/lib/bpf/Build

··· 1 1 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ 2 2 netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ 3 3 btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ 4 - usdt.o zip.o 4 + usdt.o zip.o elf.o

+11

tools/lib/bpf/bpf.c

··· 767 767 if (!OPTS_ZEROED(opts, kprobe_multi)) 768 768 return libbpf_err(-EINVAL); 769 769 break; 770 + case BPF_TRACE_UPROBE_MULTI: 771 + attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0); 772 + attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0); 773 + attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0)); 774 + attr.link_create.uprobe_multi.offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.offsets, 0)); 775 + attr.link_create.uprobe_multi.ref_ctr_offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.ref_ctr_offsets, 0)); 776 + attr.link_create.uprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, uprobe_multi.cookies, 0)); 777 + attr.link_create.uprobe_multi.pid = OPTS_GET(opts, uprobe_multi.pid, 0); 778 + if (!OPTS_ZEROED(opts, uprobe_multi)) 779 + return libbpf_err(-EINVAL); 780 + break; 770 781 case BPF_TRACE_FENTRY: 771 782 case BPF_TRACE_FEXIT: 772 783 case BPF_MODIFY_RETURN:

+10 -1

tools/lib/bpf/bpf.h

··· 393 393 const __u64 *cookies; 394 394 } kprobe_multi; 395 395 struct { 396 + __u32 flags; 397 + __u32 cnt; 398 + const char *path; 399 + const unsigned long *offsets; 400 + const unsigned long *ref_ctr_offsets; 401 + const __u64 *cookies; 402 + __u32 pid; 403 + } uprobe_multi; 404 + struct { 396 405 __u64 cookie; 397 406 } tracing; 398 407 struct { ··· 418 409 }; 419 410 size_t :0; 420 411 }; 421 - #define bpf_link_create_opts__last_field kprobe_multi.cookies 412 + #define bpf_link_create_opts__last_field uprobe_multi.pid 422 413 423 414 LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, 424 415 enum bpf_attach_type attach_type,

+440

tools/lib/bpf/elf.c

··· 1 + // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 + 3 + #include <libelf.h> 4 + #include <gelf.h> 5 + #include <fcntl.h> 6 + #include <linux/kernel.h> 7 + 8 + #include "libbpf_internal.h" 9 + #include "str_error.h" 10 + 11 + #define STRERR_BUFSIZE 128 12 + 13 + int elf_open(const char *binary_path, struct elf_fd *elf_fd) 14 + { 15 + char errmsg[STRERR_BUFSIZE]; 16 + int fd, ret; 17 + Elf *elf; 18 + 19 + if (elf_version(EV_CURRENT) == EV_NONE) { 20 + pr_warn("elf: failed to init libelf for %s\n", binary_path); 21 + return -LIBBPF_ERRNO__LIBELF; 22 + } 23 + fd = open(binary_path, O_RDONLY | O_CLOEXEC); 24 + if (fd < 0) { 25 + ret = -errno; 26 + pr_warn("elf: failed to open %s: %s\n", binary_path, 27 + libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); 28 + return ret; 29 + } 30 + elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 31 + if (!elf) { 32 + pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); 33 + close(fd); 34 + return -LIBBPF_ERRNO__FORMAT; 35 + } 36 + elf_fd->fd = fd; 37 + elf_fd->elf = elf; 38 + return 0; 39 + } 40 + 41 + void elf_close(struct elf_fd *elf_fd) 42 + { 43 + if (!elf_fd) 44 + return; 45 + elf_end(elf_fd->elf); 46 + close(elf_fd->fd); 47 + } 48 + 49 + /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ 50 + static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) 51 + { 52 + while ((scn = elf_nextscn(elf, scn)) != NULL) { 53 + GElf_Shdr sh; 54 + 55 + if (!gelf_getshdr(scn, &sh)) 56 + continue; 57 + if (sh.sh_type == sh_type) 58 + return scn; 59 + } 60 + return NULL; 61 + } 62 + 63 + struct elf_sym { 64 + const char *name; 65 + GElf_Sym sym; 66 + GElf_Shdr sh; 67 + }; 68 + 69 + struct elf_sym_iter { 70 + Elf *elf; 71 + Elf_Data *syms; 72 + size_t nr_syms; 73 + size_t strtabidx; 74 + size_t next_sym_idx; 75 + struct elf_sym sym; 76 + int st_type; 77 + }; 78 + 79 + static int elf_sym_iter_new(struct elf_sym_iter *iter, 80 + Elf *elf, const char *binary_path, 81 + int sh_type, int st_type) 82 + { 83 + Elf_Scn *scn = NULL; 84 + GElf_Ehdr ehdr; 85 + GElf_Shdr sh; 86 + 87 + memset(iter, 0, sizeof(*iter)); 88 + 89 + if (!gelf_getehdr(elf, &ehdr)) { 90 + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); 91 + return -EINVAL; 92 + } 93 + 94 + scn = elf_find_next_scn_by_type(elf, sh_type, NULL); 95 + if (!scn) { 96 + pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", 97 + binary_path); 98 + return -ENOENT; 99 + } 100 + 101 + if (!gelf_getshdr(scn, &sh)) 102 + return -EINVAL; 103 + 104 + iter->strtabidx = sh.sh_link; 105 + iter->syms = elf_getdata(scn, 0); 106 + if (!iter->syms) { 107 + pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", 108 + binary_path, elf_errmsg(-1)); 109 + return -EINVAL; 110 + } 111 + iter->nr_syms = iter->syms->d_size / sh.sh_entsize; 112 + iter->elf = elf; 113 + iter->st_type = st_type; 114 + return 0; 115 + } 116 + 117 + static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) 118 + { 119 + struct elf_sym *ret = &iter->sym; 120 + GElf_Sym *sym = &ret->sym; 121 + const char *name = NULL; 122 + Elf_Scn *sym_scn; 123 + size_t idx; 124 + 125 + for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) { 126 + if (!gelf_getsym(iter->syms, idx, sym)) 127 + continue; 128 + if (GELF_ST_TYPE(sym->st_info) != iter->st_type) 129 + continue; 130 + name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name); 131 + if (!name) 132 + continue; 133 + sym_scn = elf_getscn(iter->elf, sym->st_shndx); 134 + if (!sym_scn) 135 + continue; 136 + if (!gelf_getshdr(sym_scn, &ret->sh)) 137 + continue; 138 + 139 + iter->next_sym_idx = idx + 1; 140 + ret->name = name; 141 + return ret; 142 + } 143 + 144 + return NULL; 145 + } 146 + 147 + 148 + /* Transform symbol's virtual address (absolute for binaries and relative 149 + * for shared libs) into file offset, which is what kernel is expecting 150 + * for uprobe/uretprobe attachment. 151 + * See Documentation/trace/uprobetracer.rst for more details. This is done 152 + * by looking up symbol's containing section's header and using iter's virtual 153 + * address (sh_addr) and corresponding file offset (sh_offset) to transform 154 + * sym.st_value (virtual address) into desired final file offset. 155 + */ 156 + static unsigned long elf_sym_offset(struct elf_sym *sym) 157 + { 158 + return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset; 159 + } 160 + 161 + /* Find offset of function name in the provided ELF object. "binary_path" is 162 + * the path to the ELF binary represented by "elf", and only used for error 163 + * reporting matters. "name" matches symbol name or name@@LIB for library 164 + * functions. 165 + */ 166 + long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) 167 + { 168 + int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; 169 + bool is_shared_lib, is_name_qualified; 170 + long ret = -ENOENT; 171 + size_t name_len; 172 + GElf_Ehdr ehdr; 173 + 174 + if (!gelf_getehdr(elf, &ehdr)) { 175 + pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); 176 + ret = -LIBBPF_ERRNO__FORMAT; 177 + goto out; 178 + } 179 + /* for shared lib case, we do not need to calculate relative offset */ 180 + is_shared_lib = ehdr.e_type == ET_DYN; 181 + 182 + name_len = strlen(name); 183 + /* Does name specify "@@LIB"? */ 184 + is_name_qualified = strstr(name, "@@") != NULL; 185 + 186 + /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if 187 + * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically 188 + * linked binary may not have SHT_DYMSYM, so absence of a section should not be 189 + * reported as a warning/error. 190 + */ 191 + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 192 + struct elf_sym_iter iter; 193 + struct elf_sym *sym; 194 + int last_bind = -1; 195 + int cur_bind; 196 + 197 + ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC); 198 + if (ret == -ENOENT) 199 + continue; 200 + if (ret) 201 + goto out; 202 + 203 + while ((sym = elf_sym_iter_next(&iter))) { 204 + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ 205 + if (strncmp(sym->name, name, name_len) != 0) 206 + continue; 207 + /* ...but we don't want a search for "foo" to match 'foo2" also, so any 208 + * additional characters in sname should be of the form "@@LIB". 209 + */ 210 + if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') 211 + continue; 212 + 213 + cur_bind = GELF_ST_BIND(sym->sym.st_info); 214 + 215 + if (ret > 0) { 216 + /* handle multiple matches */ 217 + if (last_bind != STB_WEAK && cur_bind != STB_WEAK) { 218 + /* Only accept one non-weak bind. */ 219 + pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", 220 + sym->name, name, binary_path); 221 + ret = -LIBBPF_ERRNO__FORMAT; 222 + goto out; 223 + } else if (cur_bind == STB_WEAK) { 224 + /* already have a non-weak bind, and 225 + * this is a weak bind, so ignore. 226 + */ 227 + continue; 228 + } 229 + } 230 + 231 + ret = elf_sym_offset(sym); 232 + last_bind = cur_bind; 233 + } 234 + if (ret > 0) 235 + break; 236 + } 237 + 238 + if (ret > 0) { 239 + pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, 240 + ret); 241 + } else { 242 + if (ret == 0) { 243 + pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, 244 + is_shared_lib ? "should not be 0 in a shared library" : 245 + "try using shared library path instead"); 246 + ret = -ENOENT; 247 + } else { 248 + pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); 249 + } 250 + } 251 + out: 252 + return ret; 253 + } 254 + 255 + /* Find offset of function name in ELF object specified by path. "name" matches 256 + * symbol name or name@@LIB for library functions. 257 + */ 258 + long elf_find_func_offset_from_file(const char *binary_path, const char *name) 259 + { 260 + struct elf_fd elf_fd; 261 + long ret = -ENOENT; 262 + 263 + ret = elf_open(binary_path, &elf_fd); 264 + if (ret) 265 + return ret; 266 + ret = elf_find_func_offset(elf_fd.elf, binary_path, name); 267 + elf_close(&elf_fd); 268 + return ret; 269 + } 270 + 271 + struct symbol { 272 + const char *name; 273 + int bind; 274 + int idx; 275 + }; 276 + 277 + static int symbol_cmp(const void *a, const void *b) 278 + { 279 + const struct symbol *sym_a = a; 280 + const struct symbol *sym_b = b; 281 + 282 + return strcmp(sym_a->name, sym_b->name); 283 + } 284 + 285 + /* 286 + * Return offsets in @poffsets for symbols specified in @syms array argument. 287 + * On success returns 0 and offsets are returned in allocated array with @cnt 288 + * size, that needs to be released by the caller. 289 + */ 290 + int elf_resolve_syms_offsets(const char *binary_path, int cnt, 291 + const char **syms, unsigned long **poffsets) 292 + { 293 + int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; 294 + int err = 0, i, cnt_done = 0; 295 + unsigned long *offsets; 296 + struct symbol *symbols; 297 + struct elf_fd elf_fd; 298 + 299 + err = elf_open(binary_path, &elf_fd); 300 + if (err) 301 + return err; 302 + 303 + offsets = calloc(cnt, sizeof(*offsets)); 304 + symbols = calloc(cnt, sizeof(*symbols)); 305 + 306 + if (!offsets || !symbols) { 307 + err = -ENOMEM; 308 + goto out; 309 + } 310 + 311 + for (i = 0; i < cnt; i++) { 312 + symbols[i].name = syms[i]; 313 + symbols[i].idx = i; 314 + } 315 + 316 + qsort(symbols, cnt, sizeof(*symbols), symbol_cmp); 317 + 318 + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 319 + struct elf_sym_iter iter; 320 + struct elf_sym *sym; 321 + 322 + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); 323 + if (err == -ENOENT) 324 + continue; 325 + if (err) 326 + goto out; 327 + 328 + while ((sym = elf_sym_iter_next(&iter))) { 329 + unsigned long sym_offset = elf_sym_offset(sym); 330 + int bind = GELF_ST_BIND(sym->sym.st_info); 331 + struct symbol *found, tmp = { 332 + .name = sym->name, 333 + }; 334 + unsigned long *offset; 335 + 336 + found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp); 337 + if (!found) 338 + continue; 339 + 340 + offset = &offsets[found->idx]; 341 + if (*offset > 0) { 342 + /* same offset, no problem */ 343 + if (*offset == sym_offset) 344 + continue; 345 + /* handle multiple matches */ 346 + if (found->bind != STB_WEAK && bind != STB_WEAK) { 347 + /* Only accept one non-weak bind. */ 348 + pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n", 349 + sym->name, sym_offset, binary_path, *offset); 350 + err = -ESRCH; 351 + goto out; 352 + } else if (bind == STB_WEAK) { 353 + /* already have a non-weak bind, and 354 + * this is a weak bind, so ignore. 355 + */ 356 + continue; 357 + } 358 + } else { 359 + cnt_done++; 360 + } 361 + *offset = sym_offset; 362 + found->bind = bind; 363 + } 364 + } 365 + 366 + if (cnt != cnt_done) { 367 + err = -ENOENT; 368 + goto out; 369 + } 370 + 371 + *poffsets = offsets; 372 + 373 + out: 374 + free(symbols); 375 + if (err) 376 + free(offsets); 377 + elf_close(&elf_fd); 378 + return err; 379 + } 380 + 381 + /* 382 + * Return offsets in @poffsets for symbols specified by @pattern argument. 383 + * On success returns 0 and offsets are returned in allocated @poffsets 384 + * array with the @pctn size, that needs to be released by the caller. 385 + */ 386 + int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, 387 + unsigned long **poffsets, size_t *pcnt) 388 + { 389 + int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM }; 390 + unsigned long *offsets = NULL; 391 + size_t cap = 0, cnt = 0; 392 + struct elf_fd elf_fd; 393 + int err = 0, i; 394 + 395 + err = elf_open(binary_path, &elf_fd); 396 + if (err) 397 + return err; 398 + 399 + for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 400 + struct elf_sym_iter iter; 401 + struct elf_sym *sym; 402 + 403 + err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC); 404 + if (err == -ENOENT) 405 + continue; 406 + if (err) 407 + goto out; 408 + 409 + while ((sym = elf_sym_iter_next(&iter))) { 410 + if (!glob_match(sym->name, pattern)) 411 + continue; 412 + 413 + err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets), 414 + cnt + 1); 415 + if (err) 416 + goto out; 417 + 418 + offsets[cnt++] = elf_sym_offset(sym); 419 + } 420 + 421 + /* If we found anything in the first symbol section, 422 + * do not search others to avoid duplicates. 423 + */ 424 + if (cnt) 425 + break; 426 + } 427 + 428 + if (cnt) { 429 + *poffsets = offsets; 430 + *pcnt = cnt; 431 + } else { 432 + err = -ENOENT; 433 + } 434 + 435 + out: 436 + if (err) 437 + free(offsets); 438 + elf_close(&elf_fd); 439 + return err; 440 + }

+235 -189

tools/lib/bpf/libbpf.c

··· 120 120 [BPF_NETFILTER] = "netfilter", 121 121 [BPF_TCX_INGRESS] = "tcx_ingress", 122 122 [BPF_TCX_EGRESS] = "tcx_egress", 123 + [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", 123 124 }; 124 125 125 126 static const char * const link_type_name[] = { ··· 136 135 [BPF_LINK_TYPE_STRUCT_OPS] = "struct_ops", 137 136 [BPF_LINK_TYPE_NETFILTER] = "netfilter", 138 137 [BPF_LINK_TYPE_TCX] = "tcx", 138 + [BPF_LINK_TYPE_UPROBE_MULTI] = "uprobe_multi", 139 139 }; 140 140 141 141 static const char * const map_type_name[] = { ··· 367 365 SEC_SLEEPABLE = 8, 368 366 /* BPF program support non-linear XDP buffer */ 369 367 SEC_XDP_FRAGS = 16, 368 + /* Setup proper attach type for usdt probes. */ 369 + SEC_USDT = 32, 370 370 }; 371 371 372 372 struct bpf_sec_def { ··· 554 550 int btf_id; 555 551 int sec_btf_id; 556 552 const char *name; 553 + char *essent_name; 557 554 bool is_set; 558 555 bool is_weak; 559 556 union { ··· 3775 3770 struct extern_desc *ext; 3776 3771 int i, n, off, dummy_var_btf_id; 3777 3772 const char *ext_name, *sec_name; 3773 + size_t ext_essent_len; 3778 3774 Elf_Scn *scn; 3779 3775 Elf64_Shdr *sh; 3780 3776 ··· 3824 3818 ext->name = btf__name_by_offset(obj->btf, t->name_off); 3825 3819 ext->sym_idx = i; 3826 3820 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; 3821 + 3822 + ext_essent_len = bpf_core_essential_name_len(ext->name); 3823 + ext->essent_name = NULL; 3824 + if (ext_essent_len != strlen(ext->name)) { 3825 + ext->essent_name = strndup(ext->name, ext_essent_len); 3826 + if (!ext->essent_name) 3827 + return -ENOMEM; 3828 + } 3827 3829 3828 3830 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); 3829 3831 if (ext->sec_btf_id <= 0) { ··· 4831 4817 return link_fd < 0 && err == -EBADF; 4832 4818 } 4833 4819 4820 + static int probe_uprobe_multi_link(void) 4821 + { 4822 + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, 4823 + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, 4824 + ); 4825 + LIBBPF_OPTS(bpf_link_create_opts, link_opts); 4826 + struct bpf_insn insns[] = { 4827 + BPF_MOV64_IMM(BPF_REG_0, 0), 4828 + BPF_EXIT_INSN(), 4829 + }; 4830 + int prog_fd, link_fd, err; 4831 + unsigned long offset = 0; 4832 + 4833 + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", 4834 + insns, ARRAY_SIZE(insns), &load_opts); 4835 + if (prog_fd < 0) 4836 + return -errno; 4837 + 4838 + /* Creating uprobe in '/' binary should fail with -EBADF. */ 4839 + link_opts.uprobe_multi.path = "/"; 4840 + link_opts.uprobe_multi.offsets = &offset; 4841 + link_opts.uprobe_multi.cnt = 1; 4842 + 4843 + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); 4844 + err = -errno; /* close() can clobber errno */ 4845 + 4846 + if (link_fd >= 0) 4847 + close(link_fd); 4848 + close(prog_fd); 4849 + 4850 + return link_fd < 0 && err == -EBADF; 4851 + } 4852 + 4834 4853 static int probe_kern_bpf_cookie(void) 4835 4854 { 4836 4855 struct bpf_insn insns[] = { ··· 4959 4912 }, 4960 4913 [FEAT_SYSCALL_WRAPPER] = { 4961 4914 "Kernel using syscall wrapper", probe_kern_syscall_wrapper, 4915 + }, 4916 + [FEAT_UPROBE_MULTI_LINK] = { 4917 + "BPF multi-uprobe link support", probe_uprobe_multi_link, 4962 4918 }, 4963 4919 }; 4964 4920 ··· 6830 6780 if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS)) 6831 6781 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; 6832 6782 6783 + /* special check for usdt to use uprobe_multi link */ 6784 + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) 6785 + prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; 6786 + 6833 6787 if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { 6834 6788 int btf_obj_fd = 0, btf_type_id = 0, err; 6835 6789 const char *attach_name; ··· 6902 6848 if (!insns || !insns_cnt) 6903 6849 return -EINVAL; 6904 6850 6905 - load_attr.expected_attach_type = prog->expected_attach_type; 6906 6851 if (kernel_supports(obj, FEAT_PROG_NAME)) 6907 6852 prog_name = prog->name; 6908 6853 load_attr.attach_prog_fd = prog->attach_prog_fd; ··· 6936 6883 insns = prog->insns; 6937 6884 insns_cnt = prog->insns_cnt; 6938 6885 } 6886 + 6887 + /* allow prog_prepare_load_fn to change expected_attach_type */ 6888 + load_attr.expected_attach_type = prog->expected_attach_type; 6939 6889 6940 6890 if (obj->gen_loader) { 6941 6891 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, ··· 7680 7624 7681 7625 local_func_proto_id = ext->ksym.type_id; 7682 7626 7683 - kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf); 7627 + kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf, 7628 + &mod_btf); 7684 7629 if (kfunc_id < 0) { 7685 7630 if (kfunc_id == -ESRCH && ext->is_weak) 7686 7631 return 0; ··· 7696 7639 ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id, 7697 7640 kern_btf, kfunc_proto_id); 7698 7641 if (ret <= 0) { 7642 + if (ext->is_weak) 7643 + return 0; 7644 + 7699 7645 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n", 7700 7646 ext->name, local_func_proto_id, 7701 7647 mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id); ··· 8376 8316 return 0; 8377 8317 } 8378 8318 8319 + int bpf_object__unpin(struct bpf_object *obj, const char *path) 8320 + { 8321 + int err; 8322 + 8323 + err = bpf_object__unpin_programs(obj, path); 8324 + if (err) 8325 + return libbpf_err(err); 8326 + 8327 + err = bpf_object__unpin_maps(obj, path); 8328 + if (err) 8329 + return libbpf_err(err); 8330 + 8331 + return 0; 8332 + } 8333 + 8379 8334 static void bpf_map__destroy(struct bpf_map *map) 8380 8335 { 8381 8336 if (map->inner_map) { ··· 8438 8363 bpf_object__elf_finish(obj); 8439 8364 bpf_object_unload(obj); 8440 8365 btf__free(obj->btf); 8366 + btf__free(obj->btf_vmlinux); 8441 8367 btf_ext__free(obj->btf_ext); 8442 8368 8443 8369 for (i = 0; i < obj->nr_maps; i++) ··· 8446 8370 8447 8371 zfree(&obj->btf_custom_path); 8448 8372 zfree(&obj->kconfig); 8373 + 8374 + for (i = 0; i < obj->nr_extern; i++) 8375 + zfree(&obj->externs[i].essent_name); 8376 + 8449 8377 zfree(&obj->externs); 8450 8378 obj->nr_extern = 0; 8451 8379 ··· 8761 8681 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8762 8682 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8763 8683 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8684 + static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8764 8685 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8765 8686 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); 8766 8687 ··· 8777 8696 SEC_DEF("uretprobe.s+", KPROBE, 0, SEC_SLEEPABLE, attach_uprobe), 8778 8697 SEC_DEF("kprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8779 8698 SEC_DEF("kretprobe.multi+", KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi), 8699 + SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 8700 + SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), 8701 + SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 8702 + SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), 8780 8703 SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8781 8704 SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), 8782 - SEC_DEF("usdt+", KPROBE, 0, SEC_NONE, attach_usdt), 8705 + SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), 8706 + SEC_DEF("usdt.s+", KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt), 8783 8707 SEC_DEF("tc/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */ 8784 8708 SEC_DEF("tc/egress", SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE), /* alias for tcx */ 8785 8709 SEC_DEF("tcx/ingress", SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), ··· 10635 10549 } 10636 10550 10637 10551 /* Adapted from perf/util/string.c */ 10638 - static bool glob_match(const char *str, const char *pat) 10552 + bool glob_match(const char *str, const char *pat) 10639 10553 { 10640 10554 while (*str && *pat && *pat != '*') { 10641 10555 if (*pat == '?') { /* Matches any single character */ ··· 10988 10902 return libbpf_get_error(*link); 10989 10903 } 10990 10904 10905 + static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link) 10906 + { 10907 + char *probe_type = NULL, *binary_path = NULL, *func_name = NULL; 10908 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 10909 + int n, ret = -EINVAL; 10910 + 10911 + *link = NULL; 10912 + 10913 + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%ms", 10914 + &probe_type, &binary_path, &func_name); 10915 + switch (n) { 10916 + case 1: 10917 + /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */ 10918 + ret = 0; 10919 + break; 10920 + case 3: 10921 + opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0; 10922 + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); 10923 + ret = libbpf_get_error(*link); 10924 + break; 10925 + default: 10926 + pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name, 10927 + prog->sec_name); 10928 + break; 10929 + } 10930 + free(probe_type); 10931 + free(binary_path); 10932 + free(func_name); 10933 + return ret; 10934 + } 10935 + 10991 10936 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz, 10992 10937 const char *binary_path, uint64_t offset) 10993 10938 { ··· 11099 10982 /* Clear the newly added legacy uprobe_event */ 11100 10983 remove_uprobe_event_legacy(probe_name, retprobe); 11101 10984 return err; 11102 - } 11103 - 11104 - /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ 11105 - static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) 11106 - { 11107 - while ((scn = elf_nextscn(elf, scn)) != NULL) { 11108 - GElf_Shdr sh; 11109 - 11110 - if (!gelf_getshdr(scn, &sh)) 11111 - continue; 11112 - if (sh.sh_type == sh_type) 11113 - return scn; 11114 - } 11115 - return NULL; 11116 - } 11117 - 11118 - /* Find offset of function name in the provided ELF object. "binary_path" is 11119 - * the path to the ELF binary represented by "elf", and only used for error 11120 - * reporting matters. "name" matches symbol name or name@@LIB for library 11121 - * functions. 11122 - */ 11123 - static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) 11124 - { 11125 - int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB }; 11126 - bool is_shared_lib, is_name_qualified; 11127 - long ret = -ENOENT; 11128 - size_t name_len; 11129 - GElf_Ehdr ehdr; 11130 - 11131 - if (!gelf_getehdr(elf, &ehdr)) { 11132 - pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1)); 11133 - ret = -LIBBPF_ERRNO__FORMAT; 11134 - goto out; 11135 - } 11136 - /* for shared lib case, we do not need to calculate relative offset */ 11137 - is_shared_lib = ehdr.e_type == ET_DYN; 11138 - 11139 - name_len = strlen(name); 11140 - /* Does name specify "@@LIB"? */ 11141 - is_name_qualified = strstr(name, "@@") != NULL; 11142 - 11143 - /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if 11144 - * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically 11145 - * linked binary may not have SHT_DYMSYM, so absence of a section should not be 11146 - * reported as a warning/error. 11147 - */ 11148 - for (i = 0; i < ARRAY_SIZE(sh_types); i++) { 11149 - size_t nr_syms, strtabidx, idx; 11150 - Elf_Data *symbols = NULL; 11151 - Elf_Scn *scn = NULL; 11152 - int last_bind = -1; 11153 - const char *sname; 11154 - GElf_Shdr sh; 11155 - 11156 - scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL); 11157 - if (!scn) { 11158 - pr_debug("elf: failed to find symbol table ELF sections in '%s'\n", 11159 - binary_path); 11160 - continue; 11161 - } 11162 - if (!gelf_getshdr(scn, &sh)) 11163 - continue; 11164 - strtabidx = sh.sh_link; 11165 - symbols = elf_getdata(scn, 0); 11166 - if (!symbols) { 11167 - pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n", 11168 - binary_path, elf_errmsg(-1)); 11169 - ret = -LIBBPF_ERRNO__FORMAT; 11170 - goto out; 11171 - } 11172 - nr_syms = symbols->d_size / sh.sh_entsize; 11173 - 11174 - for (idx = 0; idx < nr_syms; idx++) { 11175 - int curr_bind; 11176 - GElf_Sym sym; 11177 - Elf_Scn *sym_scn; 11178 - GElf_Shdr sym_sh; 11179 - 11180 - if (!gelf_getsym(symbols, idx, &sym)) 11181 - continue; 11182 - 11183 - if (GELF_ST_TYPE(sym.st_info) != STT_FUNC) 11184 - continue; 11185 - 11186 - sname = elf_strptr(elf, strtabidx, sym.st_name); 11187 - if (!sname) 11188 - continue; 11189 - 11190 - curr_bind = GELF_ST_BIND(sym.st_info); 11191 - 11192 - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ 11193 - if (strncmp(sname, name, name_len) != 0) 11194 - continue; 11195 - /* ...but we don't want a search for "foo" to match 'foo2" also, so any 11196 - * additional characters in sname should be of the form "@@LIB". 11197 - */ 11198 - if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@') 11199 - continue; 11200 - 11201 - if (ret >= 0) { 11202 - /* handle multiple matches */ 11203 - if (last_bind != STB_WEAK && curr_bind != STB_WEAK) { 11204 - /* Only accept one non-weak bind. */ 11205 - pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n", 11206 - sname, name, binary_path); 11207 - ret = -LIBBPF_ERRNO__FORMAT; 11208 - goto out; 11209 - } else if (curr_bind == STB_WEAK) { 11210 - /* already have a non-weak bind, and 11211 - * this is a weak bind, so ignore. 11212 - */ 11213 - continue; 11214 - } 11215 - } 11216 - 11217 - /* Transform symbol's virtual address (absolute for 11218 - * binaries and relative for shared libs) into file 11219 - * offset, which is what kernel is expecting for 11220 - * uprobe/uretprobe attachment. 11221 - * See Documentation/trace/uprobetracer.rst for more 11222 - * details. 11223 - * This is done by looking up symbol's containing 11224 - * section's header and using it's virtual address 11225 - * (sh_addr) and corresponding file offset (sh_offset) 11226 - * to transform sym.st_value (virtual address) into 11227 - * desired final file offset. 11228 - */ 11229 - sym_scn = elf_getscn(elf, sym.st_shndx); 11230 - if (!sym_scn) 11231 - continue; 11232 - if (!gelf_getshdr(sym_scn, &sym_sh)) 11233 - continue; 11234 - 11235 - ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset; 11236 - last_bind = curr_bind; 11237 - } 11238 - if (ret > 0) 11239 - break; 11240 - } 11241 - 11242 - if (ret > 0) { 11243 - pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path, 11244 - ret); 11245 - } else { 11246 - if (ret == 0) { 11247 - pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path, 11248 - is_shared_lib ? "should not be 0 in a shared library" : 11249 - "try using shared library path instead"); 11250 - ret = -ENOENT; 11251 - } else { 11252 - pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path); 11253 - } 11254 - } 11255 - out: 11256 - return ret; 11257 - } 11258 - 11259 - /* Find offset of function name in ELF object specified by path. "name" matches 11260 - * symbol name or name@@LIB for library functions. 11261 - */ 11262 - static long elf_find_func_offset_from_file(const char *binary_path, const char *name) 11263 - { 11264 - char errmsg[STRERR_BUFSIZE]; 11265 - long ret = -ENOENT; 11266 - Elf *elf; 11267 - int fd; 11268 - 11269 - fd = open(binary_path, O_RDONLY | O_CLOEXEC); 11270 - if (fd < 0) { 11271 - ret = -errno; 11272 - pr_warn("failed to open %s: %s\n", binary_path, 11273 - libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); 11274 - return ret; 11275 - } 11276 - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 11277 - if (!elf) { 11278 - pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1)); 11279 - close(fd); 11280 - return -LIBBPF_ERRNO__FORMAT; 11281 - } 11282 - 11283 - ret = elf_find_func_offset(elf, binary_path, name); 11284 - elf_end(elf); 11285 - close(fd); 11286 - return ret; 11287 10985 } 11288 10986 11289 10987 /* Find offset of function name in archive specified by path. Currently ··· 11241 11309 } 11242 11310 } 11243 11311 return -ENOENT; 11312 + } 11313 + 11314 + struct bpf_link * 11315 + bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 11316 + pid_t pid, 11317 + const char *path, 11318 + const char *func_pattern, 11319 + const struct bpf_uprobe_multi_opts *opts) 11320 + { 11321 + const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; 11322 + LIBBPF_OPTS(bpf_link_create_opts, lopts); 11323 + unsigned long *resolved_offsets = NULL; 11324 + int err = 0, link_fd, prog_fd; 11325 + struct bpf_link *link = NULL; 11326 + char errmsg[STRERR_BUFSIZE]; 11327 + char full_path[PATH_MAX]; 11328 + const __u64 *cookies; 11329 + const char **syms; 11330 + size_t cnt; 11331 + 11332 + if (!OPTS_VALID(opts, bpf_uprobe_multi_opts)) 11333 + return libbpf_err_ptr(-EINVAL); 11334 + 11335 + syms = OPTS_GET(opts, syms, NULL); 11336 + offsets = OPTS_GET(opts, offsets, NULL); 11337 + ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); 11338 + cookies = OPTS_GET(opts, cookies, NULL); 11339 + cnt = OPTS_GET(opts, cnt, 0); 11340 + 11341 + /* 11342 + * User can specify 2 mutually exclusive set of inputs: 11343 + * 11344 + * 1) use only path/func_pattern/pid arguments 11345 + * 11346 + * 2) use path/pid with allowed combinations of: 11347 + * syms/offsets/ref_ctr_offsets/cookies/cnt 11348 + * 11349 + * - syms and offsets are mutually exclusive 11350 + * - ref_ctr_offsets and cookies are optional 11351 + * 11352 + * Any other usage results in error. 11353 + */ 11354 + 11355 + if (!path) 11356 + return libbpf_err_ptr(-EINVAL); 11357 + if (!func_pattern && cnt == 0) 11358 + return libbpf_err_ptr(-EINVAL); 11359 + 11360 + if (func_pattern) { 11361 + if (syms || offsets || ref_ctr_offsets || cookies || cnt) 11362 + return libbpf_err_ptr(-EINVAL); 11363 + } else { 11364 + if (!!syms == !!offsets) 11365 + return libbpf_err_ptr(-EINVAL); 11366 + } 11367 + 11368 + if (func_pattern) { 11369 + if (!strchr(path, '/')) { 11370 + err = resolve_full_path(path, full_path, sizeof(full_path)); 11371 + if (err) { 11372 + pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", 11373 + prog->name, path, err); 11374 + return libbpf_err_ptr(err); 11375 + } 11376 + path = full_path; 11377 + } 11378 + 11379 + err = elf_resolve_pattern_offsets(path, func_pattern, 11380 + &resolved_offsets, &cnt); 11381 + if (err < 0) 11382 + return libbpf_err_ptr(err); 11383 + offsets = resolved_offsets; 11384 + } else if (syms) { 11385 + err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets); 11386 + if (err < 0) 11387 + return libbpf_err_ptr(err); 11388 + offsets = resolved_offsets; 11389 + } 11390 + 11391 + lopts.uprobe_multi.path = path; 11392 + lopts.uprobe_multi.offsets = offsets; 11393 + lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; 11394 + lopts.uprobe_multi.cookies = cookies; 11395 + lopts.uprobe_multi.cnt = cnt; 11396 + lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; 11397 + 11398 + if (pid == 0) 11399 + pid = getpid(); 11400 + if (pid > 0) 11401 + lopts.uprobe_multi.pid = pid; 11402 + 11403 + link = calloc(1, sizeof(*link)); 11404 + if (!link) { 11405 + err = -ENOMEM; 11406 + goto error; 11407 + } 11408 + link->detach = &bpf_link__detach_fd; 11409 + 11410 + prog_fd = bpf_program__fd(prog); 11411 + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); 11412 + if (link_fd < 0) { 11413 + err = -errno; 11414 + pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", 11415 + prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 11416 + goto error; 11417 + } 11418 + link->fd = link_fd; 11419 + free(resolved_offsets); 11420 + return link; 11421 + 11422 + error: 11423 + free(resolved_offsets); 11424 + free(link); 11425 + return libbpf_err_ptr(err); 11244 11426 } 11245 11427 11246 11428 LIBBPF_API struct bpf_link *

+52

tools/lib/bpf/libbpf.h

··· 266 266 LIBBPF_API int bpf_object__unpin_programs(struct bpf_object *obj, 267 267 const char *path); 268 268 LIBBPF_API int bpf_object__pin(struct bpf_object *object, const char *path); 269 + LIBBPF_API int bpf_object__unpin(struct bpf_object *object, const char *path); 269 270 270 271 LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); 271 272 LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); ··· 529 528 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, 530 529 const char *pattern, 531 530 const struct bpf_kprobe_multi_opts *opts); 531 + 532 + struct bpf_uprobe_multi_opts { 533 + /* size of this struct, for forward/backward compatibility */ 534 + size_t sz; 535 + /* array of function symbols to attach to */ 536 + const char **syms; 537 + /* array of function addresses to attach to */ 538 + const unsigned long *offsets; 539 + /* optional, array of associated ref counter offsets */ 540 + const unsigned long *ref_ctr_offsets; 541 + /* optional, array of associated BPF cookies */ 542 + const __u64 *cookies; 543 + /* number of elements in syms/addrs/cookies arrays */ 544 + size_t cnt; 545 + /* create return uprobes */ 546 + bool retprobe; 547 + size_t :0; 548 + }; 549 + 550 + #define bpf_uprobe_multi_opts__last_field retprobe 551 + 552 + /** 553 + * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program 554 + * to multiple uprobes with uprobe_multi link. 555 + * 556 + * User can specify 2 mutually exclusive set of inputs: 557 + * 558 + * 1) use only path/func_pattern/pid arguments 559 + * 560 + * 2) use path/pid with allowed combinations of 561 + * syms/offsets/ref_ctr_offsets/cookies/cnt 562 + * 563 + * - syms and offsets are mutually exclusive 564 + * - ref_ctr_offsets and cookies are optional 565 + * 566 + * 567 + * @param prog BPF program to attach 568 + * @param pid Process ID to attach the uprobe to, 0 for self (own process), 569 + * -1 for all processes 570 + * @param binary_path Path to binary 571 + * @param func_pattern Regular expression to specify functions to attach 572 + * BPF program to 573 + * @param opts Additional options (see **struct bpf_uprobe_multi_opts**) 574 + * @return 0, on success; negative error code, otherwise 575 + */ 576 + LIBBPF_API struct bpf_link * 577 + bpf_program__attach_uprobe_multi(const struct bpf_program *prog, 578 + pid_t pid, 579 + const char *binary_path, 580 + const char *func_pattern, 581 + const struct bpf_uprobe_multi_opts *opts); 532 582 533 583 struct bpf_ksyscall_opts { 534 584 /* size of this struct, for forward/backward compatibility */

+2

tools/lib/bpf/libbpf.map

··· 395 395 LIBBPF_1.3.0 { 396 396 global: 397 397 bpf_obj_pin_opts; 398 + bpf_object__unpin; 398 399 bpf_prog_detach_opts; 399 400 bpf_program__attach_netfilter; 400 401 bpf_program__attach_tcx; 402 + bpf_program__attach_uprobe_multi; 401 403 } LIBBPF_1.2.0;

+21

tools/lib/bpf/libbpf_internal.h

··· 15 15 #include <linux/err.h> 16 16 #include <fcntl.h> 17 17 #include <unistd.h> 18 + #include <libelf.h> 18 19 #include "relo_core.h" 19 20 20 21 /* make sure libbpf doesn't use kernel-only integer typedefs */ ··· 355 354 FEAT_BTF_ENUM64, 356 355 /* Kernel uses syscall wrapper (CONFIG_ARCH_HAS_SYSCALL_WRAPPER) */ 357 356 FEAT_SYSCALL_WRAPPER, 357 + /* BPF multi-uprobe link support */ 358 + FEAT_UPROBE_MULTI_LINK, 358 359 __FEAT_CNT, 359 360 }; 360 361 ··· 579 576 580 577 #define PROG_LOAD_ATTEMPTS 5 581 578 int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts); 579 + 580 + bool glob_match(const char *str, const char *pat); 581 + 582 + long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name); 583 + long elf_find_func_offset_from_file(const char *binary_path, const char *name); 584 + 585 + struct elf_fd { 586 + Elf *elf; 587 + int fd; 588 + }; 589 + 590 + int elf_open(const char *binary_path, struct elf_fd *elf_fd); 591 + void elf_close(struct elf_fd *elf_fd); 592 + 593 + int elf_resolve_syms_offsets(const char *binary_path, int cnt, 594 + const char **syms, unsigned long **poffsets); 595 + int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, 596 + unsigned long **poffsets, size_t *pcnt); 582 597 583 598 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */

+1 -1

tools/lib/bpf/relo_core.c

··· 776 776 break; 777 777 case BPF_CORE_FIELD_SIGNED: 778 778 *val = (btf_is_any_enum(mt) && BTF_INFO_KFLAG(mt->info)) || 779 - (btf_int_encoding(mt) & BTF_INT_SIGNED); 779 + (btf_is_int(mt) && (btf_int_encoding(mt) & BTF_INT_SIGNED)); 780 780 if (validate) 781 781 *validate = true; /* signedness is never ambiguous */ 782 782 break;

+79 -37

tools/lib/bpf/usdt.c

··· 250 250 251 251 bool has_bpf_cookie; 252 252 bool has_sema_refcnt; 253 + bool has_uprobe_multi; 253 254 }; 254 255 255 256 struct usdt_manager *usdt_manager_new(struct bpf_object *obj) ··· 285 284 */ 286 285 man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0; 287 286 287 + /* 288 + * Detect kernel support for uprobe multi link to be used for attaching 289 + * usdt probes. 290 + */ 291 + man->has_uprobe_multi = kernel_supports(obj, FEAT_UPROBE_MULTI_LINK); 288 292 return man; 289 293 } 290 294 ··· 814 808 long abs_ip; 815 809 struct bpf_link *link; 816 810 } *uprobes; 811 + 812 + struct bpf_link *multi_link; 817 813 }; 818 814 819 815 static int bpf_link_usdt_detach(struct bpf_link *link) ··· 824 816 struct usdt_manager *man = usdt_link->usdt_man; 825 817 int i; 826 818 819 + bpf_link__destroy(usdt_link->multi_link); 820 + 821 + /* When having multi_link, uprobe_cnt is 0 */ 827 822 for (i = 0; i < usdt_link->uprobe_cnt; i++) { 828 823 /* detach underlying uprobe link */ 829 824 bpf_link__destroy(usdt_link->uprobes[i].link); ··· 957 946 const char *usdt_provider, const char *usdt_name, 958 947 __u64 usdt_cookie) 959 948 { 960 - int i, fd, err, spec_map_fd, ip_map_fd; 949 + unsigned long *offsets = NULL, *ref_ctr_offsets = NULL; 950 + int i, err, spec_map_fd, ip_map_fd; 961 951 LIBBPF_OPTS(bpf_uprobe_opts, opts); 962 952 struct hashmap *specs_hash = NULL; 963 953 struct bpf_link_usdt *link = NULL; 964 954 struct usdt_target *targets = NULL; 955 + __u64 *cookies = NULL; 956 + struct elf_fd elf_fd; 965 957 size_t target_cnt; 966 - Elf *elf; 967 958 968 959 spec_map_fd = bpf_map__fd(man->specs_map); 969 960 ip_map_fd = bpf_map__fd(man->ip_to_spec_id_map); 970 961 971 - fd = open(path, O_RDONLY | O_CLOEXEC); 972 - if (fd < 0) { 973 - err = -errno; 974 - pr_warn("usdt: failed to open ELF binary '%s': %d\n", path, err); 962 + err = elf_open(path, &elf_fd); 963 + if (err) 975 964 return libbpf_err_ptr(err); 976 - } 977 965 978 - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); 979 - if (!elf) { 980 - err = -EBADF; 981 - pr_warn("usdt: failed to parse ELF binary '%s': %s\n", path, elf_errmsg(-1)); 982 - goto err_out; 983 - } 984 - 985 - err = sanity_check_usdt_elf(elf, path); 966 + err = sanity_check_usdt_elf(elf_fd.elf, path); 986 967 if (err) 987 968 goto err_out; 988 969 ··· 987 984 /* discover USDT in given binary, optionally limiting 988 985 * activations to a given PID, if pid > 0 989 986 */ 990 - err = collect_usdt_targets(man, elf, path, pid, usdt_provider, usdt_name, 987 + err = collect_usdt_targets(man, elf_fd.elf, path, pid, usdt_provider, usdt_name, 991 988 usdt_cookie, &targets, &target_cnt); 992 989 if (err <= 0) { 993 990 err = (err == 0) ? -ENOENT : err; ··· 1010 1007 link->link.detach = &bpf_link_usdt_detach; 1011 1008 link->link.dealloc = &bpf_link_usdt_dealloc; 1012 1009 1013 - link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); 1014 - if (!link->uprobes) { 1015 - err = -ENOMEM; 1016 - goto err_out; 1010 + if (man->has_uprobe_multi) { 1011 + offsets = calloc(target_cnt, sizeof(*offsets)); 1012 + cookies = calloc(target_cnt, sizeof(*cookies)); 1013 + ref_ctr_offsets = calloc(target_cnt, sizeof(*ref_ctr_offsets)); 1014 + 1015 + if (!offsets || !ref_ctr_offsets || !cookies) { 1016 + err = -ENOMEM; 1017 + goto err_out; 1018 + } 1019 + } else { 1020 + link->uprobes = calloc(target_cnt, sizeof(*link->uprobes)); 1021 + if (!link->uprobes) { 1022 + err = -ENOMEM; 1023 + goto err_out; 1024 + } 1017 1025 } 1018 1026 1019 1027 for (i = 0; i < target_cnt; i++) { ··· 1065 1051 goto err_out; 1066 1052 } 1067 1053 1068 - opts.ref_ctr_offset = target->sema_off; 1069 - opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; 1070 - uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, 1071 - target->rel_ip, &opts); 1072 - err = libbpf_get_error(uprobe_link); 1073 - if (err) { 1074 - pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", 1075 - i, usdt_provider, usdt_name, path, err); 1054 + if (man->has_uprobe_multi) { 1055 + offsets[i] = target->rel_ip; 1056 + ref_ctr_offsets[i] = target->sema_off; 1057 + cookies[i] = spec_id; 1058 + } else { 1059 + opts.ref_ctr_offset = target->sema_off; 1060 + opts.bpf_cookie = man->has_bpf_cookie ? spec_id : 0; 1061 + uprobe_link = bpf_program__attach_uprobe_opts(prog, pid, path, 1062 + target->rel_ip, &opts); 1063 + err = libbpf_get_error(uprobe_link); 1064 + if (err) { 1065 + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", 1066 + i, usdt_provider, usdt_name, path, err); 1067 + goto err_out; 1068 + } 1069 + 1070 + link->uprobes[i].link = uprobe_link; 1071 + link->uprobes[i].abs_ip = target->abs_ip; 1072 + link->uprobe_cnt++; 1073 + } 1074 + } 1075 + 1076 + if (man->has_uprobe_multi) { 1077 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts_multi, 1078 + .ref_ctr_offsets = ref_ctr_offsets, 1079 + .offsets = offsets, 1080 + .cookies = cookies, 1081 + .cnt = target_cnt, 1082 + ); 1083 + 1084 + link->multi_link = bpf_program__attach_uprobe_multi(prog, pid, path, 1085 + NULL, &opts_multi); 1086 + if (!link->multi_link) { 1087 + err = -errno; 1088 + pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %d\n", 1089 + usdt_provider, usdt_name, path, err); 1076 1090 goto err_out; 1077 1091 } 1078 1092 1079 - link->uprobes[i].link = uprobe_link; 1080 - link->uprobes[i].abs_ip = target->abs_ip; 1081 - link->uprobe_cnt++; 1093 + free(offsets); 1094 + free(ref_ctr_offsets); 1095 + free(cookies); 1082 1096 } 1083 1097 1084 1098 free(targets); 1085 1099 hashmap__free(specs_hash); 1086 - elf_end(elf); 1087 - close(fd); 1088 - 1100 + elf_close(&elf_fd); 1089 1101 return &link->link; 1090 1102 1091 1103 err_out: 1104 + free(offsets); 1105 + free(ref_ctr_offsets); 1106 + free(cookies); 1107 + 1092 1108 if (link) 1093 1109 bpf_link__destroy(&link->link); 1094 1110 free(targets); 1095 1111 hashmap__free(specs_hash); 1096 - if (elf) 1097 - elf_end(elf); 1098 - close(fd); 1112 + elf_close(&elf_fd); 1099 1113 return libbpf_err_ptr(err); 1100 1114 } 1101 1115

+1

tools/testing/selftests/bpf/.gitignore

··· 44 44 /bench 45 45 /veristat 46 46 /sign-file 47 + /uprobe_multi 47 48 *.ko 48 49 *.tmp 49 50 xskxceiver

+5

tools/testing/selftests/bpf/Makefile

··· 585 585 $(OUTPUT)/liburandom_read.so \ 586 586 $(OUTPUT)/xdp_synproxy \ 587 587 $(OUTPUT)/sign-file \ 588 + $(OUTPUT)/uprobe_multi \ 588 589 ima_setup.sh \ 589 590 verify_sig_setup.sh \ 590 591 $(wildcard progs/btf_dump_test_case_*.c) \ ··· 698 697 $(OUTPUT)/veristat: $(OUTPUT)/veristat.o 699 698 $(call msg,BINARY,,$@) 700 699 $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ 700 + 701 + $(OUTPUT)/uprobe_multi: uprobe_multi.c 702 + $(call msg,BINARY,,$@) 703 + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ 701 704 702 705 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ 703 706 prog_tests/tests.h map_tests/tests.h verifier/tests.h \

-9

tools/testing/selftests/bpf/bench.h

··· 81 81 void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, 82 82 struct basic_stats *gp_stat); 83 83 84 - static inline __u64 get_time_ns(void) 85 - { 86 - struct timespec t; 87 - 88 - clock_gettime(CLOCK_MONOTONIC, &t); 89 - 90 - return (u64)t.tv_sec * 1000000000 + t.tv_nsec; 91 - } 92 - 93 84 static inline void atomic_inc(long *value) 94 85 { 95 86 (void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);

+2

tools/testing/selftests/bpf/config

··· 16 16 CONFIG_DEBUG_INFO=y 17 17 CONFIG_DEBUG_INFO_BTF=y 18 18 CONFIG_DEBUG_INFO_DWARF4=y 19 + CONFIG_DUMMY=y 19 20 CONFIG_DYNAMIC_FTRACE=y 20 21 CONFIG_FPROBE=y 21 22 CONFIG_FTRACE_SYSCALLS=y ··· 60 59 CONFIG_NET_IPGRE_DEMUX=y 61 60 CONFIG_NET_IPIP=y 62 61 CONFIG_NET_MPLS_GSO=y 62 + CONFIG_NET_SCH_FQ=y 63 63 CONFIG_NET_SCH_INGRESS=y 64 64 CONFIG_NET_SCHED=y 65 65 CONFIG_NETDEVSIM=y

+78

tools/testing/selftests/bpf/prog_tests/bpf_cookie.c

··· 11 11 #include <bpf/btf.h> 12 12 #include "test_bpf_cookie.skel.h" 13 13 #include "kprobe_multi.skel.h" 14 + #include "uprobe_multi.skel.h" 14 15 15 16 /* uprobe attach point */ 16 17 static noinline void trigger_func(void) ··· 240 239 bpf_link__destroy(link1); 241 240 kprobe_multi__destroy(skel); 242 241 } 242 + 243 + /* defined in prog_tests/uprobe_multi_test.c */ 244 + void uprobe_multi_func_1(void); 245 + void uprobe_multi_func_2(void); 246 + void uprobe_multi_func_3(void); 247 + 248 + static void uprobe_multi_test_run(struct uprobe_multi *skel) 249 + { 250 + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; 251 + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; 252 + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; 253 + 254 + skel->bss->pid = getpid(); 255 + skel->bss->test_cookie = true; 256 + 257 + uprobe_multi_func_1(); 258 + uprobe_multi_func_2(); 259 + uprobe_multi_func_3(); 260 + 261 + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 1, "uprobe_multi_func_1_result"); 262 + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 1, "uprobe_multi_func_2_result"); 263 + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 1, "uprobe_multi_func_3_result"); 264 + 265 + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 1, "uretprobe_multi_func_1_result"); 266 + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 1, "uretprobe_multi_func_2_result"); 267 + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 1, "uretprobe_multi_func_3_result"); 268 + } 269 + 270 + static void uprobe_multi_attach_api_subtest(void) 271 + { 272 + struct bpf_link *link1 = NULL, *link2 = NULL; 273 + struct uprobe_multi *skel = NULL; 274 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 275 + const char *syms[3] = { 276 + "uprobe_multi_func_1", 277 + "uprobe_multi_func_2", 278 + "uprobe_multi_func_3", 279 + }; 280 + __u64 cookies[3]; 281 + 282 + cookies[0] = 3; /* uprobe_multi_func_1 */ 283 + cookies[1] = 1; /* uprobe_multi_func_2 */ 284 + cookies[2] = 2; /* uprobe_multi_func_3 */ 285 + 286 + opts.syms = syms; 287 + opts.cnt = ARRAY_SIZE(syms); 288 + opts.cookies = &cookies[0]; 289 + 290 + skel = uprobe_multi__open_and_load(); 291 + if (!ASSERT_OK_PTR(skel, "uprobe_multi")) 292 + goto cleanup; 293 + 294 + link1 = bpf_program__attach_uprobe_multi(skel->progs.uprobe, -1, 295 + "/proc/self/exe", NULL, &opts); 296 + if (!ASSERT_OK_PTR(link1, "bpf_program__attach_uprobe_multi")) 297 + goto cleanup; 298 + 299 + cookies[0] = 2; /* uprobe_multi_func_1 */ 300 + cookies[1] = 3; /* uprobe_multi_func_2 */ 301 + cookies[2] = 1; /* uprobe_multi_func_3 */ 302 + 303 + opts.retprobe = true; 304 + link2 = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, -1, 305 + "/proc/self/exe", NULL, &opts); 306 + if (!ASSERT_OK_PTR(link2, "bpf_program__attach_uprobe_multi_retprobe")) 307 + goto cleanup; 308 + 309 + uprobe_multi_test_run(skel); 310 + 311 + cleanup: 312 + bpf_link__destroy(link2); 313 + bpf_link__destroy(link1); 314 + uprobe_multi__destroy(skel); 315 + } 316 + 243 317 static void uprobe_subtest(struct test_bpf_cookie *skel) 244 318 { 245 319 DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); ··· 591 515 kprobe_multi_attach_api_subtest(); 592 516 if (test__start_subtest("uprobe")) 593 517 uprobe_subtest(skel); 518 + if (test__start_subtest("multi_uprobe_attach_api")) 519 + uprobe_multi_attach_api_subtest(); 594 520 if (test__start_subtest("tracepoint")) 595 521 tp_subtest(skel); 596 522 if (test__start_subtest("perf_event"))

-8

tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c

··· 304 304 kprobe_multi__destroy(skel); 305 305 } 306 306 307 - static inline __u64 get_time_ns(void) 308 - { 309 - struct timespec t; 310 - 311 - clock_gettime(CLOCK_MONOTONIC, &t); 312 - return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; 313 - } 314 - 315 307 static size_t symbol_hash(long key, void *ctx __maybe_unused) 316 308 { 317 309 return str_hash((const char *) key);

+32 -1

tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c

··· 5 5 #include <network_helpers.h> 6 6 7 7 #include "local_kptr_stash.skel.h" 8 + #include "local_kptr_stash_fail.skel.h" 8 9 static void test_local_kptr_stash_simple(void) 9 10 { 10 11 LIBBPF_OPTS(bpf_test_run_opts, opts, ··· 23 22 ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts); 24 23 ASSERT_OK(ret, "local_kptr_stash_add_nodes run"); 25 24 ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval"); 25 + 26 + local_kptr_stash__destroy(skel); 27 + } 28 + 29 + static void test_local_kptr_stash_plain(void) 30 + { 31 + LIBBPF_OPTS(bpf_test_run_opts, opts, 32 + .data_in = &pkt_v4, 33 + .data_size_in = sizeof(pkt_v4), 34 + .repeat = 1, 35 + ); 36 + struct local_kptr_stash *skel; 37 + int ret; 38 + 39 + skel = local_kptr_stash__open_and_load(); 40 + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) 41 + return; 42 + 43 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_plain), &opts); 44 + ASSERT_OK(ret, "local_kptr_stash_add_plain run"); 45 + ASSERT_OK(opts.retval, "local_kptr_stash_add_plain retval"); 26 46 27 47 local_kptr_stash__destroy(skel); 28 48 } ··· 73 51 local_kptr_stash__destroy(skel); 74 52 } 75 53 76 - void test_local_kptr_stash_success(void) 54 + static void test_local_kptr_stash_fail(void) 55 + { 56 + RUN_TESTS(local_kptr_stash_fail); 57 + } 58 + 59 + void test_local_kptr_stash(void) 77 60 { 78 61 if (test__start_subtest("local_kptr_stash_simple")) 79 62 test_local_kptr_stash_simple(); 63 + if (test__start_subtest("local_kptr_stash_plain")) 64 + test_local_kptr_stash_plain(); 80 65 if (test__start_subtest("local_kptr_stash_unstash")) 81 66 test_local_kptr_stash_unstash(); 67 + if (test__start_subtest("local_kptr_stash_fail")) 68 + test_local_kptr_stash_fail(); 82 69 }

+139

tools/testing/selftests/bpf/prog_tests/lwt_helpers.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + 3 + #ifndef __LWT_HELPERS_H 4 + #define __LWT_HELPERS_H 5 + 6 + #include <time.h> 7 + #include <net/if.h> 8 + #include <linux/if_tun.h> 9 + #include <linux/icmp.h> 10 + 11 + #include "test_progs.h" 12 + 13 + #define log_err(MSG, ...) \ 14 + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ 15 + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) 16 + 17 + #define RUN_TEST(name) \ 18 + ({ \ 19 + if (test__start_subtest(#name)) \ 20 + if (ASSERT_OK(netns_create(), "netns_create")) { \ 21 + struct nstoken *token = open_netns(NETNS); \ 22 + if (ASSERT_OK_PTR(token, "setns")) { \ 23 + test_ ## name(); \ 24 + close_netns(token); \ 25 + } \ 26 + netns_delete(); \ 27 + } \ 28 + }) 29 + 30 + #define NETNS "ns_lwt" 31 + 32 + static inline int netns_create(void) 33 + { 34 + return system("ip netns add " NETNS); 35 + } 36 + 37 + static inline int netns_delete(void) 38 + { 39 + return system("ip netns del " NETNS ">/dev/null 2>&1"); 40 + } 41 + 42 + static int open_tuntap(const char *dev_name, bool need_mac) 43 + { 44 + int err = 0; 45 + struct ifreq ifr; 46 + int fd = open("/dev/net/tun", O_RDWR); 47 + 48 + if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)")) 49 + return -1; 50 + 51 + ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); 52 + memcpy(ifr.ifr_name, dev_name, IFNAMSIZ); 53 + 54 + err = ioctl(fd, TUNSETIFF, &ifr); 55 + if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { 56 + close(fd); 57 + return -1; 58 + } 59 + 60 + err = fcntl(fd, F_SETFL, O_NONBLOCK); 61 + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 62 + close(fd); 63 + return -1; 64 + } 65 + 66 + return fd; 67 + } 68 + 69 + #define ICMP_PAYLOAD_SIZE 100 70 + 71 + /* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */ 72 + static int __expect_icmp_ipv4(char *buf, ssize_t len) 73 + { 74 + struct iphdr *ip = (struct iphdr *)buf; 75 + struct icmphdr *icmp = (struct icmphdr *)(ip + 1); 76 + ssize_t min_header_len = sizeof(*ip) + sizeof(*icmp); 77 + 78 + if (len < min_header_len) 79 + return -1; 80 + 81 + if (ip->protocol != IPPROTO_ICMP) 82 + return -1; 83 + 84 + if (icmp->type != ICMP_ECHO) 85 + return -1; 86 + 87 + return len == ICMP_PAYLOAD_SIZE + min_header_len; 88 + } 89 + 90 + typedef int (*filter_t) (char *, ssize_t); 91 + 92 + /* wait_for_packet - wait for a packet that matches the filter 93 + * 94 + * @fd: tun fd/packet socket to read packet 95 + * @filter: filter function, returning 1 if matches 96 + * @timeout: timeout to wait for the packet 97 + * 98 + * Returns 1 if a matching packet is read, 0 if timeout expired, -1 on error. 99 + */ 100 + static int wait_for_packet(int fd, filter_t filter, struct timeval *timeout) 101 + { 102 + char buf[4096]; 103 + int max_retry = 5; /* in case we read some spurious packets */ 104 + fd_set fds; 105 + 106 + FD_ZERO(&fds); 107 + while (max_retry--) { 108 + /* Linux modifies timeout arg... So make a copy */ 109 + struct timeval copied_timeout = *timeout; 110 + ssize_t ret = -1; 111 + 112 + FD_SET(fd, &fds); 113 + 114 + ret = select(1 + fd, &fds, NULL, NULL, &copied_timeout); 115 + if (ret <= 0) { 116 + if (errno == EINTR) 117 + continue; 118 + else if (errno == EAGAIN || ret == 0) 119 + return 0; 120 + 121 + log_err("select failed"); 122 + return -1; 123 + } 124 + 125 + ret = read(fd, buf, sizeof(buf)); 126 + 127 + if (ret <= 0) { 128 + log_err("read(dev): %ld", ret); 129 + return -1; 130 + } 131 + 132 + if (filter && filter(buf, ret) > 0) 133 + return 1; 134 + } 135 + 136 + return 0; 137 + } 138 + 139 + #endif /* __LWT_HELPERS_H */

+330

tools/testing/selftests/bpf/prog_tests/lwt_redirect.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + 3 + /* 4 + * Test suite of lwt_xmit BPF programs that redirect packets 5 + * The file tests focus not only if these programs work as expected normally, 6 + * but also if they can handle abnormal situations gracefully. 7 + * 8 + * WARNING 9 + * ------- 10 + * This test suite may crash the kernel, thus should be run in a VM. 11 + * 12 + * Setup: 13 + * --------- 14 + * All tests are performed in a single netns. Two lwt encap routes are setup for 15 + * each subtest: 16 + * 17 + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err 18 + * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err 19 + * 20 + * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section 21 + * of this object holds a program entry to test. The BPF object is built from 22 + * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the 23 + * attachment for lwt programs are not supported by libbpf yet. 24 + * 25 + * For testing, ping commands are run in the test netns: 26 + * 27 + * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100 28 + * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100 29 + * 30 + * Scenarios: 31 + * -------------------------------- 32 + * 1. Redirect to a running tap/tun device 33 + * 2. Redirect to a down tap/tun device 34 + * 3. Redirect to a vlan device with lower layer down 35 + * 36 + * Case 1, ping packets should be received by packet socket on target device 37 + * when redirected to ingress, and by tun/tap fd when redirected to egress. 38 + * 39 + * Case 2,3 are considered successful as long as they do not crash the kernel 40 + * as a regression. 41 + * 42 + * Case 1,2 use tap device to test redirect to device that requires MAC 43 + * header, and tun device to test the case with no MAC header added. 44 + */ 45 + #include <sys/socket.h> 46 + #include <net/if.h> 47 + #include <linux/if_ether.h> 48 + #include <linux/if_packet.h> 49 + #include <linux/if_tun.h> 50 + #include <linux/icmp.h> 51 + #include <arpa/inet.h> 52 + #include <unistd.h> 53 + #include <errno.h> 54 + #include <stdbool.h> 55 + #include <stdlib.h> 56 + 57 + #include "lwt_helpers.h" 58 + #include "test_progs.h" 59 + #include "network_helpers.h" 60 + 61 + #define BPF_OBJECT "test_lwt_redirect.bpf.o" 62 + #define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac") 63 + #define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac") 64 + #define LOCAL_SRC "10.0.0.1" 65 + #define CIDR_TO_INGRESS "10.0.0.0/24" 66 + #define CIDR_TO_EGRESS "20.0.0.0/24" 67 + 68 + /* ping to redirect toward given dev, with last byte of dest IP being the target 69 + * device index. 70 + * 71 + * Note: ping command inside BPF-CI is busybox version, so it does not have certain 72 + * function, such like -m option to set packet mark. 73 + */ 74 + static void ping_dev(const char *dev, bool is_ingress) 75 + { 76 + int link_index = if_nametoindex(dev); 77 + char ip[256]; 78 + 79 + if (!ASSERT_GE(link_index, 0, "if_nametoindex")) 80 + return; 81 + 82 + if (is_ingress) 83 + snprintf(ip, sizeof(ip), "10.0.0.%d", link_index); 84 + else 85 + snprintf(ip, sizeof(ip), "20.0.0.%d", link_index); 86 + 87 + /* We won't get a reply. Don't fail here */ 88 + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", 89 + ip, ICMP_PAYLOAD_SIZE); 90 + } 91 + 92 + static int new_packet_sock(const char *ifname) 93 + { 94 + int err = 0; 95 + int ignore_outgoing = 1; 96 + int ifindex = -1; 97 + int s = -1; 98 + 99 + s = socket(AF_PACKET, SOCK_RAW, 0); 100 + if (!ASSERT_GE(s, 0, "socket(AF_PACKET)")) 101 + return -1; 102 + 103 + ifindex = if_nametoindex(ifname); 104 + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) { 105 + close(s); 106 + return -1; 107 + } 108 + 109 + struct sockaddr_ll addr = { 110 + .sll_family = AF_PACKET, 111 + .sll_protocol = htons(ETH_P_IP), 112 + .sll_ifindex = ifindex, 113 + }; 114 + 115 + err = bind(s, (struct sockaddr *)&addr, sizeof(addr)); 116 + if (!ASSERT_OK(err, "bind(AF_PACKET)")) { 117 + close(s); 118 + return -1; 119 + } 120 + 121 + /* Use packet socket to capture only the ingress, so we can distinguish 122 + * the case where a regression that actually redirects the packet to 123 + * the egress. 124 + */ 125 + err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING, 126 + &ignore_outgoing, sizeof(ignore_outgoing)); 127 + if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) { 128 + close(s); 129 + return -1; 130 + } 131 + 132 + err = fcntl(s, F_SETFL, O_NONBLOCK); 133 + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { 134 + close(s); 135 + return -1; 136 + } 137 + 138 + return s; 139 + } 140 + 141 + static int expect_icmp(char *buf, ssize_t len) 142 + { 143 + struct ethhdr *eth = (struct ethhdr *)buf; 144 + 145 + if (len < (ssize_t)sizeof(*eth)) 146 + return -1; 147 + 148 + if (eth->h_proto == htons(ETH_P_IP)) 149 + return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth)); 150 + 151 + return -1; 152 + } 153 + 154 + static int expect_icmp_nomac(char *buf, ssize_t len) 155 + { 156 + return __expect_icmp_ipv4(buf, len); 157 + } 158 + 159 + static void send_and_capture_test_packets(const char *test_name, int tap_fd, 160 + const char *target_dev, bool need_mac) 161 + { 162 + int psock = -1; 163 + struct timeval timeo = { 164 + .tv_sec = 0, 165 + .tv_usec = 250000, 166 + }; 167 + int ret = -1; 168 + 169 + filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac; 170 + 171 + ping_dev(target_dev, false); 172 + 173 + ret = wait_for_packet(tap_fd, filter, &timeo); 174 + if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) { 175 + log_err("%s egress test fails", test_name); 176 + goto out; 177 + } 178 + 179 + psock = new_packet_sock(target_dev); 180 + ping_dev(target_dev, true); 181 + 182 + ret = wait_for_packet(psock, filter, &timeo); 183 + if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) { 184 + log_err("%s ingress test fails", test_name); 185 + goto out; 186 + } 187 + 188 + out: 189 + if (psock >= 0) 190 + close(psock); 191 + } 192 + 193 + static int setup_redirect_target(const char *target_dev, bool need_mac) 194 + { 195 + int target_index = -1; 196 + int tap_fd = -1; 197 + 198 + tap_fd = open_tuntap(target_dev, need_mac); 199 + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) 200 + goto fail; 201 + 202 + target_index = if_nametoindex(target_dev); 203 + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) 204 + goto fail; 205 + 206 + SYS(fail, "ip link add link_err type dummy"); 207 + SYS(fail, "ip link set lo up"); 208 + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); 209 + SYS(fail, "ip link set link_err up"); 210 + SYS(fail, "ip link set %s up", target_dev); 211 + 212 + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", 213 + CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac)); 214 + 215 + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", 216 + CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac)); 217 + 218 + return tap_fd; 219 + 220 + fail: 221 + if (tap_fd >= 0) 222 + close(tap_fd); 223 + return -1; 224 + } 225 + 226 + static void test_lwt_redirect_normal(void) 227 + { 228 + const char *target_dev = "tap0"; 229 + int tap_fd = -1; 230 + bool need_mac = true; 231 + 232 + tap_fd = setup_redirect_target(target_dev, need_mac); 233 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 234 + return; 235 + 236 + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); 237 + close(tap_fd); 238 + } 239 + 240 + static void test_lwt_redirect_normal_nomac(void) 241 + { 242 + const char *target_dev = "tun0"; 243 + int tap_fd = -1; 244 + bool need_mac = false; 245 + 246 + tap_fd = setup_redirect_target(target_dev, need_mac); 247 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 248 + return; 249 + 250 + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); 251 + close(tap_fd); 252 + } 253 + 254 + /* This test aims to prevent regression of future. As long as the kernel does 255 + * not panic, it is considered as success. 256 + */ 257 + static void __test_lwt_redirect_dev_down(bool need_mac) 258 + { 259 + const char *target_dev = "tap0"; 260 + int tap_fd = -1; 261 + 262 + tap_fd = setup_redirect_target(target_dev, need_mac); 263 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 264 + return; 265 + 266 + SYS(out, "ip link set %s down", target_dev); 267 + ping_dev(target_dev, true); 268 + ping_dev(target_dev, false); 269 + 270 + out: 271 + close(tap_fd); 272 + } 273 + 274 + static void test_lwt_redirect_dev_down(void) 275 + { 276 + __test_lwt_redirect_dev_down(true); 277 + } 278 + 279 + static void test_lwt_redirect_dev_down_nomac(void) 280 + { 281 + __test_lwt_redirect_dev_down(false); 282 + } 283 + 284 + /* This test aims to prevent regression of future. As long as the kernel does 285 + * not panic, it is considered as success. 286 + */ 287 + static void test_lwt_redirect_dev_carrier_down(void) 288 + { 289 + const char *lower_dev = "tap0"; 290 + const char *vlan_dev = "vlan100"; 291 + int tap_fd = -1; 292 + 293 + tap_fd = setup_redirect_target(lower_dev, true); 294 + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) 295 + return; 296 + 297 + SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev); 298 + SYS(out, "ip link set %s up", vlan_dev); 299 + SYS(out, "ip link set %s down", lower_dev); 300 + ping_dev(vlan_dev, true); 301 + ping_dev(vlan_dev, false); 302 + 303 + out: 304 + close(tap_fd); 305 + } 306 + 307 + static void *test_lwt_redirect_run(void *arg) 308 + { 309 + netns_delete(); 310 + RUN_TEST(lwt_redirect_normal); 311 + RUN_TEST(lwt_redirect_normal_nomac); 312 + RUN_TEST(lwt_redirect_dev_down); 313 + RUN_TEST(lwt_redirect_dev_down_nomac); 314 + RUN_TEST(lwt_redirect_dev_carrier_down); 315 + return NULL; 316 + } 317 + 318 + void test_lwt_redirect(void) 319 + { 320 + pthread_t test_thread; 321 + int err; 322 + 323 + /* Run the tests in their own thread to isolate the namespace changes 324 + * so they do not affect the environment of other tests. 325 + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 326 + */ 327 + err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL); 328 + if (ASSERT_OK(err, "pthread_create")) 329 + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 330 + }

+262

tools/testing/selftests/bpf/prog_tests/lwt_reroute.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 + 3 + /* 4 + * Test suite of lwt BPF programs that reroutes packets 5 + * The file tests focus not only if these programs work as expected normally, 6 + * but also if they can handle abnormal situations gracefully. This test 7 + * suite currently only covers lwt_xmit hook. lwt_in tests have not been 8 + * implemented. 9 + * 10 + * WARNING 11 + * ------- 12 + * This test suite can crash the kernel, thus should be run in a VM. 13 + * 14 + * Setup: 15 + * --------- 16 + * all tests are performed in a single netns. A lwt encap route is setup for 17 + * each subtest: 18 + * 19 + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err 20 + * 21 + * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains 22 + * a single test program entry. This program sets packet mark by last byte of 23 + * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb 24 + * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped 25 + * to avoid route loop. We didn't use generated BPF skeleton since the 26 + * attachment for lwt programs are not supported by libbpf yet. 27 + * 28 + * The test program will bring up a tun device, and sets up the following 29 + * routes: 30 + * 31 + * ip rule add pref 100 from all fwmark <tun_index> lookup 100 32 + * ip route add table 100 default dev tun0 33 + * 34 + * For normal testing, a ping command is running in the test netns: 35 + * 36 + * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100 37 + * 38 + * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP 39 + * socket will try to overflow the fq queue and trigger qdisc drop error. 40 + * 41 + * Scenarios: 42 + * -------------------------------- 43 + * 1. Reroute to a running tun device 44 + * 2. Reroute to a device where qdisc drop 45 + * 46 + * For case 1, ping packets should be received by the tun device. 47 + * 48 + * For case 2, force UDP packets to overflow fq limit. As long as kernel 49 + * is not crashed, it is considered successful. 50 + */ 51 + #include "lwt_helpers.h" 52 + #include "network_helpers.h" 53 + #include <linux/net_tstamp.h> 54 + 55 + #define BPF_OBJECT "test_lwt_reroute.bpf.o" 56 + #define LOCAL_SRC "10.0.0.1" 57 + #define TEST_CIDR "10.0.0.0/24" 58 + #define XMIT_HOOK "xmit" 59 + #define XMIT_SECTION "lwt_xmit" 60 + #define NSEC_PER_SEC 1000000000ULL 61 + 62 + /* send a ping to be rerouted to the target device */ 63 + static void ping_once(const char *ip) 64 + { 65 + /* We won't get a reply. Don't fail here */ 66 + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", 67 + ip, ICMP_PAYLOAD_SIZE); 68 + } 69 + 70 + /* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop 71 + * error. This is done via TX tstamp to force buffering delayed packets. 72 + */ 73 + static int overflow_fq(int snd_target, const char *target_ip) 74 + { 75 + struct sockaddr_in addr = { 76 + .sin_family = AF_INET, 77 + .sin_port = htons(1234), 78 + }; 79 + 80 + char data_buf[8]; /* only #pkts matter, so use a random small buffer */ 81 + char control_buf[CMSG_SPACE(sizeof(uint64_t))]; 82 + struct iovec iov = { 83 + .iov_base = data_buf, 84 + .iov_len = sizeof(data_buf), 85 + }; 86 + int err = -1; 87 + int s = -1; 88 + struct sock_txtime txtime_on = { 89 + .clockid = CLOCK_MONOTONIC, 90 + .flags = 0, 91 + }; 92 + struct msghdr msg = { 93 + .msg_name = &addr, 94 + .msg_namelen = sizeof(addr), 95 + .msg_control = control_buf, 96 + .msg_controllen = sizeof(control_buf), 97 + .msg_iovlen = 1, 98 + .msg_iov = &iov, 99 + }; 100 + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); 101 + 102 + memset(data_buf, 0, sizeof(data_buf)); 103 + 104 + s = socket(AF_INET, SOCK_DGRAM, 0); 105 + if (!ASSERT_GE(s, 0, "socket")) 106 + goto out; 107 + 108 + err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on)); 109 + if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)")) 110 + goto out; 111 + 112 + err = inet_pton(AF_INET, target_ip, &addr.sin_addr); 113 + if (!ASSERT_EQ(err, 1, "inet_pton")) 114 + goto out; 115 + 116 + while (snd_target > 0) { 117 + struct timespec now; 118 + 119 + memset(control_buf, 0, sizeof(control_buf)); 120 + cmsg->cmsg_type = SCM_TXTIME; 121 + cmsg->cmsg_level = SOL_SOCKET; 122 + cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t)); 123 + 124 + err = clock_gettime(CLOCK_MONOTONIC, &now); 125 + if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) { 126 + err = -1; 127 + goto out; 128 + } 129 + 130 + *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC + 131 + now.tv_nsec; 132 + 133 + /* we will intentionally send more than fq limit, so ignore 134 + * the error here. 135 + */ 136 + sendmsg(s, &msg, MSG_NOSIGNAL); 137 + snd_target--; 138 + } 139 + 140 + /* no kernel crash so far is considered success */ 141 + err = 0; 142 + 143 + out: 144 + if (s >= 0) 145 + close(s); 146 + 147 + return err; 148 + } 149 + 150 + static int setup(const char *tun_dev) 151 + { 152 + int target_index = -1; 153 + int tap_fd = -1; 154 + 155 + tap_fd = open_tuntap(tun_dev, false); 156 + if (!ASSERT_GE(tap_fd, 0, "open_tun")) 157 + return -1; 158 + 159 + target_index = if_nametoindex(tun_dev); 160 + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) 161 + return -1; 162 + 163 + SYS(fail, "ip link add link_err type dummy"); 164 + SYS(fail, "ip link set lo up"); 165 + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); 166 + SYS(fail, "ip link set link_err up"); 167 + SYS(fail, "ip link set %s up", tun_dev); 168 + 169 + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit", 170 + TEST_CIDR, BPF_OBJECT); 171 + 172 + SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100", 173 + target_index); 174 + SYS(fail, "ip route add t 100 default dev %s", tun_dev); 175 + 176 + return tap_fd; 177 + 178 + fail: 179 + if (tap_fd >= 0) 180 + close(tap_fd); 181 + return -1; 182 + } 183 + 184 + static void test_lwt_reroute_normal_xmit(void) 185 + { 186 + const char *tun_dev = "tun0"; 187 + int tun_fd = -1; 188 + int ifindex = -1; 189 + char ip[256]; 190 + struct timeval timeo = { 191 + .tv_sec = 0, 192 + .tv_usec = 250000, 193 + }; 194 + 195 + tun_fd = setup(tun_dev); 196 + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) 197 + return; 198 + 199 + ifindex = if_nametoindex(tun_dev); 200 + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) 201 + return; 202 + 203 + snprintf(ip, 256, "10.0.0.%d", ifindex); 204 + 205 + /* ping packets should be received by the tun device */ 206 + ping_once(ip); 207 + 208 + if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1, 209 + "wait_for_packet")) 210 + log_err("%s xmit", __func__); 211 + } 212 + 213 + /* 214 + * Test the failure case when the skb is dropped at the qdisc. This is a 215 + * regression prevention at the xmit hook only. 216 + */ 217 + static void test_lwt_reroute_qdisc_dropped(void) 218 + { 219 + const char *tun_dev = "tun0"; 220 + int tun_fd = -1; 221 + int ifindex = -1; 222 + char ip[256]; 223 + 224 + tun_fd = setup(tun_dev); 225 + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) 226 + goto fail; 227 + 228 + SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev); 229 + 230 + ifindex = if_nametoindex(tun_dev); 231 + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) 232 + return; 233 + 234 + snprintf(ip, 256, "10.0.0.%d", ifindex); 235 + ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq"); 236 + 237 + fail: 238 + if (tun_fd >= 0) 239 + close(tun_fd); 240 + } 241 + 242 + static void *test_lwt_reroute_run(void *arg) 243 + { 244 + netns_delete(); 245 + RUN_TEST(lwt_reroute_normal_xmit); 246 + RUN_TEST(lwt_reroute_qdisc_dropped); 247 + return NULL; 248 + } 249 + 250 + void test_lwt_reroute(void) 251 + { 252 + pthread_t test_thread; 253 + int err; 254 + 255 + /* Run the tests in their own thread to isolate the namespace changes 256 + * so they do not affect the environment of other tests. 257 + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) 258 + */ 259 + err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL); 260 + if (ASSERT_OK(err, "pthread_create")) 261 + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); 262 + }

+26

tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c

··· 9 9 10 10 void test_refcounted_kptr(void) 11 11 { 12 + RUN_TESTS(refcounted_kptr); 12 13 } 13 14 14 15 void test_refcounted_kptr_fail(void) 15 16 { 17 + RUN_TESTS(refcounted_kptr_fail); 16 18 } 17 19 18 20 void test_refcounted_kptr_wrong_owner(void) 19 21 { 22 + LIBBPF_OPTS(bpf_test_run_opts, opts, 23 + .data_in = &pkt_v4, 24 + .data_size_in = sizeof(pkt_v4), 25 + .repeat = 1, 26 + ); 27 + struct refcounted_kptr *skel; 28 + int ret; 29 + 30 + skel = refcounted_kptr__open_and_load(); 31 + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) 32 + return; 33 + 34 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a1), &opts); 35 + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a1"); 36 + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a1 retval"); 37 + 38 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_b), &opts); 39 + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_b"); 40 + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_b retval"); 41 + 42 + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a2), &opts); 43 + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a2"); 44 + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval"); 45 + refcounted_kptr__destroy(skel); 20 46 }

+2

tools/testing/selftests/bpf/prog_tests/task_kfunc.c

··· 79 79 "test_task_from_pid_current", 80 80 "test_task_from_pid_invalid", 81 81 "task_kfunc_acquire_trusted_walked", 82 + "test_task_kfunc_flavor_relo", 83 + "test_task_kfunc_flavor_relo_not_found", 82 84 }; 83 85 84 86 void test_task_kfunc(void)

+35 -1

tools/testing/selftests/bpf/prog_tests/tc_bpf.c

··· 3 3 #include <test_progs.h> 4 4 #include <linux/pkt_cls.h> 5 5 6 + #include "cap_helpers.h" 6 7 #include "test_tc_bpf.skel.h" 7 8 8 9 #define LO_IFINDEX 1 ··· 328 327 return 0; 329 328 } 330 329 331 - void test_tc_bpf(void) 330 + void tc_bpf_root(void) 332 331 { 333 332 DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, 334 333 .attach_point = BPF_TC_INGRESS); ··· 393 392 bpf_tc_hook_destroy(&hook); 394 393 } 395 394 test_tc_bpf__destroy(skel); 395 + } 396 + 397 + void tc_bpf_non_root(void) 398 + { 399 + struct test_tc_bpf *skel = NULL; 400 + __u64 caps = 0; 401 + int ret; 402 + 403 + /* In case CAP_BPF and CAP_PERFMON is not set */ 404 + ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps); 405 + if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin")) 406 + return; 407 + ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL); 408 + if (!ASSERT_OK(ret, "disable_cap_sys_admin")) 409 + goto restore_cap; 410 + 411 + skel = test_tc_bpf__open_and_load(); 412 + if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load")) 413 + goto restore_cap; 414 + 415 + test_tc_bpf__destroy(skel); 416 + 417 + restore_cap: 418 + if (caps) 419 + cap_enable_effective(caps, NULL); 420 + } 421 + 422 + void test_tc_bpf(void) 423 + { 424 + if (test__start_subtest("tc_bpf_root")) 425 + tc_bpf_root(); 426 + if (test__start_subtest("tc_bpf_non_root")) 427 + tc_bpf_non_root(); 396 428 }

+415

tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <unistd.h> 4 + #include <test_progs.h> 5 + #include "uprobe_multi.skel.h" 6 + #include "uprobe_multi_bench.skel.h" 7 + #include "uprobe_multi_usdt.skel.h" 8 + #include "bpf/libbpf_internal.h" 9 + #include "testing_helpers.h" 10 + 11 + static char test_data[] = "test_data"; 12 + 13 + noinline void uprobe_multi_func_1(void) 14 + { 15 + asm volatile (""); 16 + } 17 + 18 + noinline void uprobe_multi_func_2(void) 19 + { 20 + asm volatile (""); 21 + } 22 + 23 + noinline void uprobe_multi_func_3(void) 24 + { 25 + asm volatile (""); 26 + } 27 + 28 + struct child { 29 + int go[2]; 30 + int pid; 31 + }; 32 + 33 + static void release_child(struct child *child) 34 + { 35 + int child_status; 36 + 37 + if (!child) 38 + return; 39 + close(child->go[1]); 40 + close(child->go[0]); 41 + if (child->pid > 0) 42 + waitpid(child->pid, &child_status, 0); 43 + } 44 + 45 + static void kick_child(struct child *child) 46 + { 47 + char c = 1; 48 + 49 + if (child) { 50 + write(child->go[1], &c, 1); 51 + release_child(child); 52 + } 53 + fflush(NULL); 54 + } 55 + 56 + static struct child *spawn_child(void) 57 + { 58 + static struct child child; 59 + int err; 60 + int c; 61 + 62 + /* pipe to notify child to execute the trigger functions */ 63 + if (pipe(child.go)) 64 + return NULL; 65 + 66 + child.pid = fork(); 67 + if (child.pid < 0) { 68 + release_child(&child); 69 + errno = EINVAL; 70 + return NULL; 71 + } 72 + 73 + /* child */ 74 + if (child.pid == 0) { 75 + close(child.go[1]); 76 + 77 + /* wait for parent's kick */ 78 + err = read(child.go[0], &c, 1); 79 + if (err != 1) 80 + exit(err); 81 + 82 + uprobe_multi_func_1(); 83 + uprobe_multi_func_2(); 84 + uprobe_multi_func_3(); 85 + 86 + exit(errno); 87 + } 88 + 89 + return &child; 90 + } 91 + 92 + static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child) 93 + { 94 + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; 95 + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; 96 + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; 97 + 98 + skel->bss->user_ptr = test_data; 99 + 100 + /* 101 + * Disable pid check in bpf program if we are pid filter test, 102 + * because the probe should be executed only by child->pid 103 + * passed at the probe attach. 104 + */ 105 + skel->bss->pid = child ? 0 : getpid(); 106 + 107 + if (child) 108 + kick_child(child); 109 + 110 + /* trigger all probes */ 111 + uprobe_multi_func_1(); 112 + uprobe_multi_func_2(); 113 + uprobe_multi_func_3(); 114 + 115 + /* 116 + * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123] 117 + * function and each slepable probe (6) increments uprobe_multi_sleep_result. 118 + */ 119 + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 2, "uprobe_multi_func_1_result"); 120 + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 2, "uprobe_multi_func_2_result"); 121 + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 2, "uprobe_multi_func_3_result"); 122 + 123 + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 2, "uretprobe_multi_func_1_result"); 124 + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 2, "uretprobe_multi_func_2_result"); 125 + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 2, "uretprobe_multi_func_3_result"); 126 + 127 + ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result"); 128 + 129 + if (child) 130 + ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid"); 131 + } 132 + 133 + static void test_skel_api(void) 134 + { 135 + struct uprobe_multi *skel = NULL; 136 + int err; 137 + 138 + skel = uprobe_multi__open_and_load(); 139 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) 140 + goto cleanup; 141 + 142 + err = uprobe_multi__attach(skel); 143 + if (!ASSERT_OK(err, "uprobe_multi__attach")) 144 + goto cleanup; 145 + 146 + uprobe_multi_test_run(skel, NULL); 147 + 148 + cleanup: 149 + uprobe_multi__destroy(skel); 150 + } 151 + 152 + static void 153 + __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts, 154 + struct child *child) 155 + { 156 + pid_t pid = child ? child->pid : -1; 157 + struct uprobe_multi *skel = NULL; 158 + 159 + skel = uprobe_multi__open_and_load(); 160 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) 161 + goto cleanup; 162 + 163 + opts->retprobe = false; 164 + skel->links.uprobe = bpf_program__attach_uprobe_multi(skel->progs.uprobe, pid, 165 + binary, pattern, opts); 166 + if (!ASSERT_OK_PTR(skel->links.uprobe, "bpf_program__attach_uprobe_multi")) 167 + goto cleanup; 168 + 169 + opts->retprobe = true; 170 + skel->links.uretprobe = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, pid, 171 + binary, pattern, opts); 172 + if (!ASSERT_OK_PTR(skel->links.uretprobe, "bpf_program__attach_uprobe_multi")) 173 + goto cleanup; 174 + 175 + opts->retprobe = false; 176 + skel->links.uprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uprobe_sleep, pid, 177 + binary, pattern, opts); 178 + if (!ASSERT_OK_PTR(skel->links.uprobe_sleep, "bpf_program__attach_uprobe_multi")) 179 + goto cleanup; 180 + 181 + opts->retprobe = true; 182 + skel->links.uretprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uretprobe_sleep, 183 + pid, binary, pattern, opts); 184 + if (!ASSERT_OK_PTR(skel->links.uretprobe_sleep, "bpf_program__attach_uprobe_multi")) 185 + goto cleanup; 186 + 187 + opts->retprobe = false; 188 + skel->links.uprobe_extra = bpf_program__attach_uprobe_multi(skel->progs.uprobe_extra, -1, 189 + binary, pattern, opts); 190 + if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi")) 191 + goto cleanup; 192 + 193 + uprobe_multi_test_run(skel, child); 194 + 195 + cleanup: 196 + uprobe_multi__destroy(skel); 197 + } 198 + 199 + static void 200 + test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts) 201 + { 202 + struct child *child; 203 + 204 + /* no pid filter */ 205 + __test_attach_api(binary, pattern, opts, NULL); 206 + 207 + /* pid filter */ 208 + child = spawn_child(); 209 + if (!ASSERT_OK_PTR(child, "spawn_child")) 210 + return; 211 + 212 + __test_attach_api(binary, pattern, opts, child); 213 + } 214 + 215 + static void test_attach_api_pattern(void) 216 + { 217 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 218 + 219 + test_attach_api("/proc/self/exe", "uprobe_multi_func_*", &opts); 220 + test_attach_api("/proc/self/exe", "uprobe_multi_func_?", &opts); 221 + } 222 + 223 + static void test_attach_api_syms(void) 224 + { 225 + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); 226 + const char *syms[3] = { 227 + "uprobe_multi_func_1", 228 + "uprobe_multi_func_2", 229 + "uprobe_multi_func_3", 230 + }; 231 + 232 + opts.syms = syms; 233 + opts.cnt = ARRAY_SIZE(syms); 234 + test_attach_api("/proc/self/exe", NULL, &opts); 235 + } 236 + 237 + static void __test_link_api(struct child *child) 238 + { 239 + int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1; 240 + LIBBPF_OPTS(bpf_link_create_opts, opts); 241 + const char *path = "/proc/self/exe"; 242 + struct uprobe_multi *skel = NULL; 243 + unsigned long *offsets = NULL; 244 + const char *syms[3] = { 245 + "uprobe_multi_func_1", 246 + "uprobe_multi_func_2", 247 + "uprobe_multi_func_3", 248 + }; 249 + int link_extra_fd = -1; 250 + int err; 251 + 252 + err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets); 253 + if (!ASSERT_OK(err, "elf_resolve_syms_offsets")) 254 + return; 255 + 256 + opts.uprobe_multi.path = path; 257 + opts.uprobe_multi.offsets = offsets; 258 + opts.uprobe_multi.cnt = ARRAY_SIZE(syms); 259 + opts.uprobe_multi.pid = child ? child->pid : 0; 260 + 261 + skel = uprobe_multi__open_and_load(); 262 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) 263 + goto cleanup; 264 + 265 + opts.kprobe_multi.flags = 0; 266 + prog_fd = bpf_program__fd(skel->progs.uprobe); 267 + link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 268 + if (!ASSERT_GE(link1_fd, 0, "link1_fd")) 269 + goto cleanup; 270 + 271 + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; 272 + prog_fd = bpf_program__fd(skel->progs.uretprobe); 273 + link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 274 + if (!ASSERT_GE(link2_fd, 0, "link2_fd")) 275 + goto cleanup; 276 + 277 + opts.kprobe_multi.flags = 0; 278 + prog_fd = bpf_program__fd(skel->progs.uprobe_sleep); 279 + link3_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 280 + if (!ASSERT_GE(link3_fd, 0, "link3_fd")) 281 + goto cleanup; 282 + 283 + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; 284 + prog_fd = bpf_program__fd(skel->progs.uretprobe_sleep); 285 + link4_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 286 + if (!ASSERT_GE(link4_fd, 0, "link4_fd")) 287 + goto cleanup; 288 + 289 + opts.kprobe_multi.flags = 0; 290 + opts.uprobe_multi.pid = 0; 291 + prog_fd = bpf_program__fd(skel->progs.uprobe_extra); 292 + link_extra_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); 293 + if (!ASSERT_GE(link_extra_fd, 0, "link_extra_fd")) 294 + goto cleanup; 295 + 296 + uprobe_multi_test_run(skel, child); 297 + 298 + cleanup: 299 + if (link1_fd >= 0) 300 + close(link1_fd); 301 + if (link2_fd >= 0) 302 + close(link2_fd); 303 + if (link3_fd >= 0) 304 + close(link3_fd); 305 + if (link4_fd >= 0) 306 + close(link4_fd); 307 + if (link_extra_fd >= 0) 308 + close(link_extra_fd); 309 + 310 + uprobe_multi__destroy(skel); 311 + free(offsets); 312 + } 313 + 314 + void test_link_api(void) 315 + { 316 + struct child *child; 317 + 318 + /* no pid filter */ 319 + __test_link_api(NULL); 320 + 321 + /* pid filter */ 322 + child = spawn_child(); 323 + if (!ASSERT_OK_PTR(child, "spawn_child")) 324 + return; 325 + 326 + __test_link_api(child); 327 + } 328 + 329 + static void test_bench_attach_uprobe(void) 330 + { 331 + long attach_start_ns = 0, attach_end_ns = 0; 332 + struct uprobe_multi_bench *skel = NULL; 333 + long detach_start_ns, detach_end_ns; 334 + double attach_delta, detach_delta; 335 + int err; 336 + 337 + skel = uprobe_multi_bench__open_and_load(); 338 + if (!ASSERT_OK_PTR(skel, "uprobe_multi_bench__open_and_load")) 339 + goto cleanup; 340 + 341 + attach_start_ns = get_time_ns(); 342 + 343 + err = uprobe_multi_bench__attach(skel); 344 + if (!ASSERT_OK(err, "uprobe_multi_bench__attach")) 345 + goto cleanup; 346 + 347 + attach_end_ns = get_time_ns(); 348 + 349 + system("./uprobe_multi bench"); 350 + 351 + ASSERT_EQ(skel->bss->count, 50000, "uprobes_count"); 352 + 353 + cleanup: 354 + detach_start_ns = get_time_ns(); 355 + uprobe_multi_bench__destroy(skel); 356 + detach_end_ns = get_time_ns(); 357 + 358 + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; 359 + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; 360 + 361 + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); 362 + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); 363 + } 364 + 365 + static void test_bench_attach_usdt(void) 366 + { 367 + long attach_start_ns = 0, attach_end_ns = 0; 368 + struct uprobe_multi_usdt *skel = NULL; 369 + long detach_start_ns, detach_end_ns; 370 + double attach_delta, detach_delta; 371 + 372 + skel = uprobe_multi_usdt__open_and_load(); 373 + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open")) 374 + goto cleanup; 375 + 376 + attach_start_ns = get_time_ns(); 377 + 378 + skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, -1, "./uprobe_multi", 379 + "test", "usdt", NULL); 380 + if (!ASSERT_OK_PTR(skel->links.usdt0, "bpf_program__attach_usdt")) 381 + goto cleanup; 382 + 383 + attach_end_ns = get_time_ns(); 384 + 385 + system("./uprobe_multi usdt"); 386 + 387 + ASSERT_EQ(skel->bss->count, 50000, "usdt_count"); 388 + 389 + cleanup: 390 + detach_start_ns = get_time_ns(); 391 + uprobe_multi_usdt__destroy(skel); 392 + detach_end_ns = get_time_ns(); 393 + 394 + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; 395 + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; 396 + 397 + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); 398 + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); 399 + } 400 + 401 + void test_uprobe_multi_test(void) 402 + { 403 + if (test__start_subtest("skel_api")) 404 + test_skel_api(); 405 + if (test__start_subtest("attach_api_pattern")) 406 + test_attach_api_pattern(); 407 + if (test__start_subtest("attach_api_syms")) 408 + test_attach_api_syms(); 409 + if (test__start_subtest("link_api")) 410 + test_link_api(); 411 + if (test__start_subtest("bench_uprobe")) 412 + test_bench_attach_uprobe(); 413 + if (test__start_subtest("bench_usdt")) 414 + test_bench_attach_usdt(); 415 + }

+28

tools/testing/selftests/bpf/progs/local_kptr_stash.c

··· 14 14 struct bpf_rb_node node; 15 15 }; 16 16 17 + struct plain_local { 18 + long key; 19 + long data; 20 + }; 21 + 17 22 struct map_value { 18 23 struct prog_test_ref_kfunc *not_kptr; 19 24 struct prog_test_ref_kfunc __kptr *val; 20 25 struct node_data __kptr *node; 26 + struct plain_local __kptr *plain; 21 27 }; 22 28 23 29 /* This is necessary so that LLVM generates BTF for node_data struct ··· 70 64 long stash_rb_nodes(void *ctx) 71 65 { 72 66 return create_and_stash(0, 41) ?: create_and_stash(1, 42); 67 + } 68 + 69 + SEC("tc") 70 + long stash_plain(void *ctx) 71 + { 72 + struct map_value *mapval; 73 + struct plain_local *res; 74 + int idx = 0; 75 + 76 + mapval = bpf_map_lookup_elem(&some_nodes, &idx); 77 + if (!mapval) 78 + return 1; 79 + 80 + res = bpf_obj_new(typeof(*res)); 81 + if (!res) 82 + return 1; 83 + res->key = 41; 84 + 85 + res = bpf_kptr_xchg(&mapval->plain, res); 86 + if (res) 87 + bpf_obj_drop(res); 88 + return 0; 73 89 } 74 90 75 91 SEC("tc")

+85

tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ 3 + 4 + #include <vmlinux.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + #include <bpf/bpf_core_read.h> 8 + #include "../bpf_experimental.h" 9 + #include "bpf_misc.h" 10 + 11 + struct node_data { 12 + long key; 13 + long data; 14 + struct bpf_rb_node node; 15 + }; 16 + 17 + struct map_value { 18 + struct node_data __kptr *node; 19 + }; 20 + 21 + struct node_data2 { 22 + long key[4]; 23 + }; 24 + 25 + /* This is necessary so that LLVM generates BTF for node_data struct 26 + * If it's not included, a fwd reference for node_data will be generated but 27 + * no struct. Example BTF of "node" field in map_value when not included: 28 + * 29 + * [10] PTR '(anon)' type_id=35 30 + * [34] FWD 'node_data' fwd_kind=struct 31 + * [35] TYPE_TAG 'kptr_ref' type_id=34 32 + */ 33 + struct node_data *just_here_because_btf_bug; 34 + 35 + struct { 36 + __uint(type, BPF_MAP_TYPE_ARRAY); 37 + __type(key, int); 38 + __type(value, struct map_value); 39 + __uint(max_entries, 2); 40 + } some_nodes SEC(".maps"); 41 + 42 + SEC("tc") 43 + __failure __msg("invalid kptr access, R2 type=ptr_node_data2 expected=ptr_node_data") 44 + long stash_rb_nodes(void *ctx) 45 + { 46 + struct map_value *mapval; 47 + struct node_data2 *res; 48 + int idx = 0; 49 + 50 + mapval = bpf_map_lookup_elem(&some_nodes, &idx); 51 + if (!mapval) 52 + return 1; 53 + 54 + res = bpf_obj_new(typeof(*res)); 55 + if (!res) 56 + return 1; 57 + res->key[0] = 40; 58 + 59 + res = bpf_kptr_xchg(&mapval->node, res); 60 + if (res) 61 + bpf_obj_drop(res); 62 + return 0; 63 + } 64 + 65 + SEC("tc") 66 + __failure __msg("R1 must have zero offset when passed to release func") 67 + long drop_rb_node_off(void *ctx) 68 + { 69 + struct map_value *mapval; 70 + struct node_data *res; 71 + int idx = 0; 72 + 73 + mapval = bpf_map_lookup_elem(&some_nodes, &idx); 74 + if (!mapval) 75 + return 1; 76 + 77 + res = bpf_obj_new(typeof(*res)); 78 + if (!res) 79 + return 1; 80 + /* Try releasing with graph node offset */ 81 + bpf_obj_drop(&res->node); 82 + return 0; 83 + } 84 + 85 + char _license[] SEC("license") = "GPL";

+71

tools/testing/selftests/bpf/progs/refcounted_kptr.c

··· 8 8 #include "bpf_misc.h" 9 9 #include "bpf_experimental.h" 10 10 11 + extern void bpf_rcu_read_lock(void) __ksym; 12 + extern void bpf_rcu_read_unlock(void) __ksym; 13 + 11 14 struct node_data { 12 15 long key; 13 16 long list_data; ··· 497 494 bpf_obj_drop(container_of(res, struct node_data, r)); 498 495 return 3; 499 496 } 497 + return 0; 498 + } 499 + 500 + SEC("?fentry.s/bpf_testmod_test_read") 501 + __success 502 + int BPF_PROG(rbtree_sleepable_rcu, 503 + struct file *file, struct kobject *kobj, 504 + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) 505 + { 506 + struct bpf_rb_node *rb; 507 + struct node_data *n, *m = NULL; 508 + 509 + n = bpf_obj_new(typeof(*n)); 510 + if (!n) 511 + return 0; 512 + 513 + bpf_rcu_read_lock(); 514 + bpf_spin_lock(&lock); 515 + bpf_rbtree_add(&root, &n->r, less); 516 + rb = bpf_rbtree_first(&root); 517 + if (!rb) 518 + goto err_out; 519 + 520 + rb = bpf_rbtree_remove(&root, rb); 521 + if (!rb) 522 + goto err_out; 523 + 524 + m = container_of(rb, struct node_data, r); 525 + 526 + err_out: 527 + bpf_spin_unlock(&lock); 528 + bpf_rcu_read_unlock(); 529 + if (m) 530 + bpf_obj_drop(m); 531 + return 0; 532 + } 533 + 534 + SEC("?fentry.s/bpf_testmod_test_read") 535 + __success 536 + int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock, 537 + struct file *file, struct kobject *kobj, 538 + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) 539 + { 540 + struct bpf_rb_node *rb; 541 + struct node_data *n, *m = NULL; 542 + 543 + n = bpf_obj_new(typeof(*n)); 544 + if (!n) 545 + return 0; 546 + 547 + /* No explicit bpf_rcu_read_lock */ 548 + bpf_spin_lock(&lock); 549 + bpf_rbtree_add(&root, &n->r, less); 550 + rb = bpf_rbtree_first(&root); 551 + if (!rb) 552 + goto err_out; 553 + 554 + rb = bpf_rbtree_remove(&root, rb); 555 + if (!rb) 556 + goto err_out; 557 + 558 + m = container_of(rb, struct node_data, r); 559 + 560 + err_out: 561 + bpf_spin_unlock(&lock); 562 + /* No explicit bpf_rcu_read_unlock */ 563 + if (m) 564 + bpf_obj_drop(m); 500 565 return 0; 501 566 } 502 567

+28

tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c

··· 13 13 struct bpf_refcount refcount; 14 14 }; 15 15 16 + extern void bpf_rcu_read_lock(void) __ksym; 17 + extern void bpf_rcu_read_unlock(void) __ksym; 18 + 16 19 #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) 17 20 private(A) struct bpf_spin_lock glock; 18 21 private(A) struct bpf_rb_root groot __contains(node_acquire, node); ··· 70 67 bpf_spin_lock(&glock); 71 68 bpf_rbtree_add(&groot, &n->node, less); 72 69 bpf_spin_unlock(&glock); 70 + 71 + return 0; 72 + } 73 + 74 + SEC("?fentry.s/bpf_testmod_test_read") 75 + __failure __msg("function calls are not allowed while holding a lock") 76 + int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu, 77 + struct file *file, struct kobject *kobj, 78 + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) 79 + { 80 + struct node_acquire *n; 81 + 82 + n = bpf_obj_new(typeof(*n)); 83 + if (!n) 84 + return 0; 85 + 86 + /* spin_{lock,unlock} are in different RCU CS */ 87 + bpf_rcu_read_lock(); 88 + bpf_spin_lock(&glock); 89 + bpf_rbtree_add(&groot, &n->node, less); 90 + bpf_rcu_read_unlock(); 91 + 92 + bpf_rcu_read_lock(); 93 + bpf_spin_unlock(&glock); 94 + bpf_rcu_read_unlock(); 73 95 74 96 return 0; 75 97 }

+51

tools/testing/selftests/bpf/progs/task_kfunc_success.c

··· 18 18 */ 19 19 20 20 struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; 21 + 22 + struct task_struct *bpf_task_acquire___one(struct task_struct *task) __ksym __weak; 23 + /* The two-param bpf_task_acquire doesn't exist */ 24 + struct task_struct *bpf_task_acquire___two(struct task_struct *p, void *ctx) __ksym __weak; 25 + /* Incorrect type for first param */ 26 + struct task_struct *bpf_task_acquire___three(void *ctx) __ksym __weak; 27 + 21 28 void invalid_kfunc(void) __ksym __weak; 22 29 void bpf_testmod_test_mod_kfunc(int i) __ksym __weak; 23 30 ··· 58 51 bpf_task_release(acquired); 59 52 else 60 53 err = 6; 54 + 55 + return 0; 56 + } 57 + 58 + SEC("tp_btf/task_newtask") 59 + int BPF_PROG(test_task_kfunc_flavor_relo, struct task_struct *task, u64 clone_flags) 60 + { 61 + struct task_struct *acquired = NULL; 62 + int fake_ctx = 42; 63 + 64 + if (bpf_ksym_exists(bpf_task_acquire___one)) { 65 + acquired = bpf_task_acquire___one(task); 66 + } else if (bpf_ksym_exists(bpf_task_acquire___two)) { 67 + /* Here, bpf_object__resolve_ksym_func_btf_id's find_ksym_btf_id 68 + * call will find vmlinux's bpf_task_acquire, but subsequent 69 + * bpf_core_types_are_compat will fail 70 + */ 71 + acquired = bpf_task_acquire___two(task, &fake_ctx); 72 + err = 3; 73 + return 0; 74 + } else if (bpf_ksym_exists(bpf_task_acquire___three)) { 75 + /* bpf_core_types_are_compat will fail similarly to above case */ 76 + acquired = bpf_task_acquire___three(&fake_ctx); 77 + err = 4; 78 + return 0; 79 + } 80 + 81 + if (acquired) 82 + bpf_task_release(acquired); 83 + else 84 + err = 5; 85 + return 0; 86 + } 87 + 88 + SEC("tp_btf/task_newtask") 89 + int BPF_PROG(test_task_kfunc_flavor_relo_not_found, struct task_struct *task, u64 clone_flags) 90 + { 91 + /* Neither symbol should successfully resolve. 92 + * Success or failure of one ___flavor should not affect others 93 + */ 94 + if (bpf_ksym_exists(bpf_task_acquire___two)) 95 + err = 1; 96 + else if (bpf_ksym_exists(bpf_task_acquire___three)) 97 + err = 2; 61 98 62 99 return 0; 63 100 }

+2 -1

tools/testing/selftests/bpf/progs/test_ldsx_insn.c

··· 5 5 #include <bpf/bpf_helpers.h> 6 6 #include <bpf/bpf_tracing.h> 7 7 8 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 8 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 9 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 9 10 const volatile int skip = 0; 10 11 #else 11 12 const volatile int skip = 1;

+90

tools/testing/selftests/bpf/progs/test_lwt_redirect.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_endian.h> 4 + #include <bpf/bpf_helpers.h> 5 + #include <linux/ip.h> 6 + #include "bpf_tracing_net.h" 7 + 8 + /* We don't care about whether the packet can be received by network stack. 9 + * Just care if the packet is sent to the correct device at correct direction 10 + * and not panic the kernel. 11 + */ 12 + static int prepend_dummy_mac(struct __sk_buff *skb) 13 + { 14 + char mac[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xf, 15 + 0xe, 0xd, 0xc, 0xb, 0xa, 0x08, 0x00}; 16 + 17 + if (bpf_skb_change_head(skb, ETH_HLEN, 0)) 18 + return -1; 19 + 20 + if (bpf_skb_store_bytes(skb, 0, mac, sizeof(mac), 0)) 21 + return -1; 22 + 23 + return 0; 24 + } 25 + 26 + /* Use the last byte of IP address to redirect the packet */ 27 + static int get_redirect_target(struct __sk_buff *skb) 28 + { 29 + struct iphdr *iph = NULL; 30 + void *start = (void *)(long)skb->data; 31 + void *end = (void *)(long)skb->data_end; 32 + 33 + if (start + sizeof(*iph) > end) 34 + return -1; 35 + 36 + iph = (struct iphdr *)start; 37 + return bpf_ntohl(iph->daddr) & 0xff; 38 + } 39 + 40 + SEC("redir_ingress") 41 + int test_lwt_redirect_in(struct __sk_buff *skb) 42 + { 43 + int target = get_redirect_target(skb); 44 + 45 + if (target < 0) 46 + return BPF_OK; 47 + 48 + if (prepend_dummy_mac(skb)) 49 + return BPF_DROP; 50 + 51 + return bpf_redirect(target, BPF_F_INGRESS); 52 + } 53 + 54 + SEC("redir_egress") 55 + int test_lwt_redirect_out(struct __sk_buff *skb) 56 + { 57 + int target = get_redirect_target(skb); 58 + 59 + if (target < 0) 60 + return BPF_OK; 61 + 62 + if (prepend_dummy_mac(skb)) 63 + return BPF_DROP; 64 + 65 + return bpf_redirect(target, 0); 66 + } 67 + 68 + SEC("redir_egress_nomac") 69 + int test_lwt_redirect_out_nomac(struct __sk_buff *skb) 70 + { 71 + int target = get_redirect_target(skb); 72 + 73 + if (target < 0) 74 + return BPF_OK; 75 + 76 + return bpf_redirect(target, 0); 77 + } 78 + 79 + SEC("redir_ingress_nomac") 80 + int test_lwt_redirect_in_nomac(struct __sk_buff *skb) 81 + { 82 + int target = get_redirect_target(skb); 83 + 84 + if (target < 0) 85 + return BPF_OK; 86 + 87 + return bpf_redirect(target, BPF_F_INGRESS); 88 + } 89 + 90 + char _license[] SEC("license") = "GPL";

+36

tools/testing/selftests/bpf/progs/test_lwt_reroute.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <inttypes.h> 3 + #include <linux/bpf.h> 4 + #include <bpf/bpf_endian.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <linux/if_ether.h> 7 + #include <linux/ip.h> 8 + 9 + /* This function extracts the last byte of the daddr, and uses it 10 + * as output dev index. 11 + */ 12 + SEC("lwt_xmit") 13 + int test_lwt_reroute(struct __sk_buff *skb) 14 + { 15 + struct iphdr *iph = NULL; 16 + void *start = (void *)(long)skb->data; 17 + void *end = (void *)(long)skb->data_end; 18 + 19 + /* set mark at most once */ 20 + if (skb->mark != 0) 21 + return BPF_OK; 22 + 23 + if (start + sizeof(*iph) > end) 24 + return BPF_DROP; 25 + 26 + iph = (struct iphdr *)start; 27 + skb->mark = bpf_ntohl(iph->daddr) & 0xff; 28 + 29 + /* do not reroute x.x.x.0 packets */ 30 + if (skb->mark == 0) 31 + return BPF_OK; 32 + 33 + return BPF_LWT_REROUTE; 34 + } 35 + 36 + char _license[] SEC("license") = "GPL";

+13

tools/testing/selftests/bpf/progs/test_tc_bpf.c

··· 2 2 3 3 #include <linux/bpf.h> 4 4 #include <bpf/bpf_helpers.h> 5 + #include <linux/if_ether.h> 6 + #include <linux/ip.h> 5 7 6 8 /* Dummy prog to test TC-BPF API */ 7 9 8 10 SEC("tc") 9 11 int cls(struct __sk_buff *skb) 10 12 { 13 + return 0; 14 + } 15 + 16 + /* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */ 17 + SEC("tcx/ingress") 18 + int pkt_ptr(struct __sk_buff *skb) 19 + { 20 + struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr); 21 + 22 + if ((long)(iph + 1) > (long)skb->data_end) 23 + return 1; 11 24 return 0; 12 25 }

+101

tools/testing/selftests/bpf/progs/uprobe_multi.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + #include <stdbool.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + __u64 uprobe_multi_func_1_addr = 0; 10 + __u64 uprobe_multi_func_2_addr = 0; 11 + __u64 uprobe_multi_func_3_addr = 0; 12 + 13 + __u64 uprobe_multi_func_1_result = 0; 14 + __u64 uprobe_multi_func_2_result = 0; 15 + __u64 uprobe_multi_func_3_result = 0; 16 + 17 + __u64 uretprobe_multi_func_1_result = 0; 18 + __u64 uretprobe_multi_func_2_result = 0; 19 + __u64 uretprobe_multi_func_3_result = 0; 20 + 21 + __u64 uprobe_multi_sleep_result = 0; 22 + 23 + int pid = 0; 24 + int child_pid = 0; 25 + 26 + bool test_cookie = false; 27 + void *user_ptr = 0; 28 + 29 + static __always_inline bool verify_sleepable_user_copy(void) 30 + { 31 + char data[9]; 32 + 33 + bpf_copy_from_user(data, sizeof(data), user_ptr); 34 + return bpf_strncmp(data, sizeof(data), "test_data") == 0; 35 + } 36 + 37 + static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep) 38 + { 39 + child_pid = bpf_get_current_pid_tgid() >> 32; 40 + 41 + if (pid && child_pid != pid) 42 + return; 43 + 44 + __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0; 45 + __u64 addr = bpf_get_func_ip(ctx); 46 + 47 + #define SET(__var, __addr, __cookie) ({ \ 48 + if (addr == __addr && \ 49 + (!test_cookie || (cookie == __cookie))) \ 50 + __var += 1; \ 51 + }) 52 + 53 + if (is_return) { 54 + SET(uretprobe_multi_func_1_result, uprobe_multi_func_1_addr, 2); 55 + SET(uretprobe_multi_func_2_result, uprobe_multi_func_2_addr, 3); 56 + SET(uretprobe_multi_func_3_result, uprobe_multi_func_3_addr, 1); 57 + } else { 58 + SET(uprobe_multi_func_1_result, uprobe_multi_func_1_addr, 3); 59 + SET(uprobe_multi_func_2_result, uprobe_multi_func_2_addr, 1); 60 + SET(uprobe_multi_func_3_result, uprobe_multi_func_3_addr, 2); 61 + } 62 + 63 + #undef SET 64 + 65 + if (is_sleep && verify_sleepable_user_copy()) 66 + uprobe_multi_sleep_result += 1; 67 + } 68 + 69 + SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") 70 + int uprobe(struct pt_regs *ctx) 71 + { 72 + uprobe_multi_check(ctx, false, false); 73 + return 0; 74 + } 75 + 76 + SEC("uretprobe.multi//proc/self/exe:uprobe_multi_func_*") 77 + int uretprobe(struct pt_regs *ctx) 78 + { 79 + uprobe_multi_check(ctx, true, false); 80 + return 0; 81 + } 82 + 83 + SEC("uprobe.multi.s//proc/self/exe:uprobe_multi_func_*") 84 + int uprobe_sleep(struct pt_regs *ctx) 85 + { 86 + uprobe_multi_check(ctx, false, true); 87 + return 0; 88 + } 89 + 90 + SEC("uretprobe.multi.s//proc/self/exe:uprobe_multi_func_*") 91 + int uretprobe_sleep(struct pt_regs *ctx) 92 + { 93 + uprobe_multi_check(ctx, true, true); 94 + return 0; 95 + } 96 + 97 + SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") 98 + int uprobe_extra(struct pt_regs *ctx) 99 + { 100 + return 0; 101 + }

+15

tools/testing/selftests/bpf/progs/uprobe_multi_bench.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include <bpf/bpf_helpers.h> 4 + #include <bpf/bpf_tracing.h> 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + int count; 9 + 10 + SEC("uprobe.multi/./uprobe_multi:uprobe_multi_func_*") 11 + int uprobe_bench(struct pt_regs *ctx) 12 + { 13 + count++; 14 + return 0; 15 + }

+16

tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include "vmlinux.h" 4 + #include <bpf/bpf_helpers.h> 5 + #include <bpf/usdt.bpf.h> 6 + 7 + char _license[] SEC("license") = "GPL"; 8 + 9 + int count; 10 + 11 + SEC("usdt") 12 + int usdt0(struct pt_regs *ctx) 13 + { 14 + count++; 15 + return 0; 16 + }

+2 -1

tools/testing/selftests/bpf/progs/verifier_bswap.c

··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("BSWAP, 16")

+2 -1

tools/testing/selftests/bpf/progs/verifier_gotol.c

··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("gotol, small_imm")

+2 -1

tools/testing/selftests/bpf/progs/verifier_ldsx.c

··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("LDSX, S8")

+2 -1

tools/testing/selftests/bpf/progs/verifier_movsx.c

··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("MOV32SX, S8")

+2 -1

tools/testing/selftests/bpf/progs/verifier_sdiv.c

··· 4 4 #include <bpf/bpf_helpers.h> 5 5 #include "bpf_misc.h" 6 6 7 - #if defined(__TARGET_ARCH_x86) && __clang_major__ >= 18 7 + #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ 8 + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 8 9 9 10 SEC("socket") 10 11 __description("SDIV32, non-zero imm divisor, check 1")

+10

tools/testing/selftests/bpf/testing_helpers.h

··· 7 7 #include <stdbool.h> 8 8 #include <bpf/bpf.h> 9 9 #include <bpf/libbpf.h> 10 + #include <time.h> 10 11 11 12 int parse_num_list(const char *s, bool **set, int *set_len); 12 13 __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); ··· 33 32 int load_bpf_testmod(bool verbose); 34 33 int unload_bpf_testmod(bool verbose); 35 34 int kern_sync_rcu(void); 35 + 36 + static inline __u64 get_time_ns(void) 37 + { 38 + struct timespec t; 39 + 40 + clock_gettime(CLOCK_MONOTONIC, &t); 41 + 42 + return (u64)t.tv_sec * 1000000000 + t.tv_nsec; 43 + } 36 44 37 45 #endif /* __TESTING_HELPERS_H */

+91

tools/testing/selftests/bpf/uprobe_multi.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + #include <stdio.h> 4 + #include <string.h> 5 + #include <sdt.h> 6 + 7 + #define __PASTE(a, b) a##b 8 + #define PASTE(a, b) __PASTE(a, b) 9 + 10 + #define NAME(name, idx) PASTE(name, idx) 11 + 12 + #define DEF(name, idx) int NAME(name, idx)(void) { return 0; } 13 + #define CALL(name, idx) NAME(name, idx)(); 14 + 15 + #define F(body, name, idx) body(name, idx) 16 + 17 + #define F10(body, name, idx) \ 18 + F(body, PASTE(name, idx), 0) F(body, PASTE(name, idx), 1) F(body, PASTE(name, idx), 2) \ 19 + F(body, PASTE(name, idx), 3) F(body, PASTE(name, idx), 4) F(body, PASTE(name, idx), 5) \ 20 + F(body, PASTE(name, idx), 6) F(body, PASTE(name, idx), 7) F(body, PASTE(name, idx), 8) \ 21 + F(body, PASTE(name, idx), 9) 22 + 23 + #define F100(body, name, idx) \ 24 + F10(body, PASTE(name, idx), 0) F10(body, PASTE(name, idx), 1) F10(body, PASTE(name, idx), 2) \ 25 + F10(body, PASTE(name, idx), 3) F10(body, PASTE(name, idx), 4) F10(body, PASTE(name, idx), 5) \ 26 + F10(body, PASTE(name, idx), 6) F10(body, PASTE(name, idx), 7) F10(body, PASTE(name, idx), 8) \ 27 + F10(body, PASTE(name, idx), 9) 28 + 29 + #define F1000(body, name, idx) \ 30 + F100(body, PASTE(name, idx), 0) F100(body, PASTE(name, idx), 1) F100(body, PASTE(name, idx), 2) \ 31 + F100(body, PASTE(name, idx), 3) F100(body, PASTE(name, idx), 4) F100(body, PASTE(name, idx), 5) \ 32 + F100(body, PASTE(name, idx), 6) F100(body, PASTE(name, idx), 7) F100(body, PASTE(name, idx), 8) \ 33 + F100(body, PASTE(name, idx), 9) 34 + 35 + #define F10000(body, name, idx) \ 36 + F1000(body, PASTE(name, idx), 0) F1000(body, PASTE(name, idx), 1) F1000(body, PASTE(name, idx), 2) \ 37 + F1000(body, PASTE(name, idx), 3) F1000(body, PASTE(name, idx), 4) F1000(body, PASTE(name, idx), 5) \ 38 + F1000(body, PASTE(name, idx), 6) F1000(body, PASTE(name, idx), 7) F1000(body, PASTE(name, idx), 8) \ 39 + F1000(body, PASTE(name, idx), 9) 40 + 41 + F10000(DEF, uprobe_multi_func_, 0) 42 + F10000(DEF, uprobe_multi_func_, 1) 43 + F10000(DEF, uprobe_multi_func_, 2) 44 + F10000(DEF, uprobe_multi_func_, 3) 45 + F10000(DEF, uprobe_multi_func_, 4) 46 + 47 + static int bench(void) 48 + { 49 + F10000(CALL, uprobe_multi_func_, 0) 50 + F10000(CALL, uprobe_multi_func_, 1) 51 + F10000(CALL, uprobe_multi_func_, 2) 52 + F10000(CALL, uprobe_multi_func_, 3) 53 + F10000(CALL, uprobe_multi_func_, 4) 54 + return 0; 55 + } 56 + 57 + #define PROBE STAP_PROBE(test, usdt); 58 + 59 + #define PROBE10 PROBE PROBE PROBE PROBE PROBE \ 60 + PROBE PROBE PROBE PROBE PROBE 61 + #define PROBE100 PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 \ 62 + PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 63 + #define PROBE1000 PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 \ 64 + PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 65 + #define PROBE10000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 \ 66 + PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 67 + 68 + static int usdt(void) 69 + { 70 + PROBE10000 71 + PROBE10000 72 + PROBE10000 73 + PROBE10000 74 + PROBE10000 75 + return 0; 76 + } 77 + 78 + int main(int argc, char **argv) 79 + { 80 + if (argc != 2) 81 + goto error; 82 + 83 + if (!strcmp("bench", argv[1])) 84 + return bench(); 85 + if (!strcmp("usdt", argv[1])) 86 + return usdt(); 87 + 88 + error: 89 + fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]); 90 + return -1; 91 + }