Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+2 -1

Documentation/admin-guide/sysctl/net.rst

··· 67 67 - sparc64 68 68 - mips64 69 69 - s390x 70 - - riscv 70 + - riscv64 71 + - riscv32 71 72 72 73 And the older cBPF JIT supported on the following archs: 73 74

+1 -1

Documentation/networking/filter.txt

··· 606 606 607 607 Currently, the classic BPF format is being used for JITing on most 608 608 32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64, 609 - sparc64, arm32, riscv (RV64G) perform JIT compilation from eBPF 609 + sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF 610 610 instruction set. 611 611 612 612 Some core changes of the new internal format:

+16 -2

MAINTAINERS

··· 3213 3213 S: Maintained 3214 3214 F: arch/powerpc/net/ 3215 3215 3216 - BPF JIT for RISC-V (RV64G) 3217 - M: Björn Töpel <bjorn.topel@gmail.com> 3216 + BPF JIT for RISC-V (32-bit) 3217 + M: Luke Nelson <luke.r.nels@gmail.com> 3218 + M: Xi Wang <xi.wang@gmail.com> 3218 3219 L: netdev@vger.kernel.org 3220 + L: bpf@vger.kernel.org 3219 3221 S: Maintained 3220 3222 F: arch/riscv/net/ 3223 + X: arch/riscv/net/bpf_jit_comp64.c 3224 + 3225 + BPF JIT for RISC-V (64-bit) 3226 + M: Björn Töpel <bjorn.topel@gmail.com> 3227 + L: netdev@vger.kernel.org 3228 + L: bpf@vger.kernel.org 3229 + S: Maintained 3230 + F: arch/riscv/net/ 3231 + X: arch/riscv/net/bpf_jit_comp32.c 3221 3232 3222 3233 BPF JIT for S390 3223 3234 M: Ilya Leoshkevich <iii@linux.ibm.com> ··· 9361 9350 L7 BPF FRAMEWORK 9362 9351 M: John Fastabend <john.fastabend@gmail.com> 9363 9352 M: Daniel Borkmann <daniel@iogearbox.net> 9353 + M: Jakub Sitnicki <jakub@cloudflare.com> 9354 + M: Lorenz Bauer <lmb@cloudflare.com> 9364 9355 L: netdev@vger.kernel.org 9365 9356 L: bpf@vger.kernel.org 9366 9357 S: Maintained ··· 9370 9357 F: net/core/skmsg.c 9371 9358 F: net/core/sock_map.c 9372 9359 F: net/ipv4/tcp_bpf.c 9360 + F: net/ipv4/udp_bpf.c 9373 9361 9374 9362 LANTIQ / INTEL Ethernet drivers 9375 9363 M: Hauke Mehrtens <hauke@hauke-m.de>

+1 -1

arch/riscv/Kconfig

··· 56 56 select ARCH_HAS_PTE_SPECIAL 57 57 select ARCH_HAS_MMIOWB 58 58 select ARCH_HAS_DEBUG_VIRTUAL 59 - select HAVE_EBPF_JIT if 64BIT 59 + select HAVE_EBPF_JIT 60 60 select EDAC_SUPPORT 61 61 select ARCH_HAS_GIGANTIC_PAGE 62 62 select ARCH_WANT_HUGE_PMD_SHARE if 64BIT

+8 -1

arch/riscv/net/Makefile

··· 1 1 # SPDX-License-Identifier: GPL-2.0-only 2 - obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o 2 + 3 + obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o 4 + 5 + ifeq ($(CONFIG_ARCH_RV64I),y) 6 + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o 7 + else 8 + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o 9 + endif

+514

arch/riscv/net/bpf_jit.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Common functionality for RV32 and RV64 BPF JIT compilers 4 + * 5 + * Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com> 6 + * 7 + */ 8 + 9 + #ifndef _BPF_JIT_H 10 + #define _BPF_JIT_H 11 + 12 + #include <linux/bpf.h> 13 + #include <linux/filter.h> 14 + #include <asm/cacheflush.h> 15 + 16 + enum { 17 + RV_REG_ZERO = 0, /* The constant value 0 */ 18 + RV_REG_RA = 1, /* Return address */ 19 + RV_REG_SP = 2, /* Stack pointer */ 20 + RV_REG_GP = 3, /* Global pointer */ 21 + RV_REG_TP = 4, /* Thread pointer */ 22 + RV_REG_T0 = 5, /* Temporaries */ 23 + RV_REG_T1 = 6, 24 + RV_REG_T2 = 7, 25 + RV_REG_FP = 8, /* Saved register/frame pointer */ 26 + RV_REG_S1 = 9, /* Saved register */ 27 + RV_REG_A0 = 10, /* Function argument/return values */ 28 + RV_REG_A1 = 11, /* Function arguments */ 29 + RV_REG_A2 = 12, 30 + RV_REG_A3 = 13, 31 + RV_REG_A4 = 14, 32 + RV_REG_A5 = 15, 33 + RV_REG_A6 = 16, 34 + RV_REG_A7 = 17, 35 + RV_REG_S2 = 18, /* Saved registers */ 36 + RV_REG_S3 = 19, 37 + RV_REG_S4 = 20, 38 + RV_REG_S5 = 21, 39 + RV_REG_S6 = 22, 40 + RV_REG_S7 = 23, 41 + RV_REG_S8 = 24, 42 + RV_REG_S9 = 25, 43 + RV_REG_S10 = 26, 44 + RV_REG_S11 = 27, 45 + RV_REG_T3 = 28, /* Temporaries */ 46 + RV_REG_T4 = 29, 47 + RV_REG_T5 = 30, 48 + RV_REG_T6 = 31, 49 + }; 50 + 51 + struct rv_jit_context { 52 + struct bpf_prog *prog; 53 + u32 *insns; /* RV insns */ 54 + int ninsns; 55 + int epilogue_offset; 56 + int *offset; /* BPF to RV */ 57 + unsigned long flags; 58 + int stack_size; 59 + }; 60 + 61 + struct rv_jit_data { 62 + struct bpf_binary_header *header; 63 + u8 *image; 64 + struct rv_jit_context ctx; 65 + }; 66 + 67 + static inline void bpf_fill_ill_insns(void *area, unsigned int size) 68 + { 69 + memset(area, 0, size); 70 + } 71 + 72 + static inline void bpf_flush_icache(void *start, void *end) 73 + { 74 + flush_icache_range((unsigned long)start, (unsigned long)end); 75 + } 76 + 77 + static inline void emit(const u32 insn, struct rv_jit_context *ctx) 78 + { 79 + if (ctx->insns) 80 + ctx->insns[ctx->ninsns] = insn; 81 + 82 + ctx->ninsns++; 83 + } 84 + 85 + static inline int epilogue_offset(struct rv_jit_context *ctx) 86 + { 87 + int to = ctx->epilogue_offset, from = ctx->ninsns; 88 + 89 + return (to - from) << 2; 90 + } 91 + 92 + /* Return -1 or inverted cond. */ 93 + static inline int invert_bpf_cond(u8 cond) 94 + { 95 + switch (cond) { 96 + case BPF_JEQ: 97 + return BPF_JNE; 98 + case BPF_JGT: 99 + return BPF_JLE; 100 + case BPF_JLT: 101 + return BPF_JGE; 102 + case BPF_JGE: 103 + return BPF_JLT; 104 + case BPF_JLE: 105 + return BPF_JGT; 106 + case BPF_JNE: 107 + return BPF_JEQ; 108 + case BPF_JSGT: 109 + return BPF_JSLE; 110 + case BPF_JSLT: 111 + return BPF_JSGE; 112 + case BPF_JSGE: 113 + return BPF_JSLT; 114 + case BPF_JSLE: 115 + return BPF_JSGT; 116 + } 117 + return -1; 118 + } 119 + 120 + static inline bool is_12b_int(long val) 121 + { 122 + return -(1L << 11) <= val && val < (1L << 11); 123 + } 124 + 125 + static inline int is_12b_check(int off, int insn) 126 + { 127 + if (!is_12b_int(off)) { 128 + pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", 129 + insn, (int)off); 130 + return -1; 131 + } 132 + return 0; 133 + } 134 + 135 + static inline bool is_13b_int(long val) 136 + { 137 + return -(1L << 12) <= val && val < (1L << 12); 138 + } 139 + 140 + static inline bool is_21b_int(long val) 141 + { 142 + return -(1L << 20) <= val && val < (1L << 20); 143 + } 144 + 145 + static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx) 146 + { 147 + int from, to; 148 + 149 + off++; /* BPF branch is from PC+1, RV is from PC */ 150 + from = (insn > 0) ? ctx->offset[insn - 1] : 0; 151 + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; 152 + return (to - from) << 2; 153 + } 154 + 155 + /* Instruction formats. */ 156 + 157 + static inline u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, 158 + u8 opcode) 159 + { 160 + return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | 161 + (rd << 7) | opcode; 162 + } 163 + 164 + static inline u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode) 165 + { 166 + return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | 167 + opcode; 168 + } 169 + 170 + static inline u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode) 171 + { 172 + u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f; 173 + 174 + return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | 175 + (imm4_0 << 7) | opcode; 176 + } 177 + 178 + static inline u32 rv_b_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode) 179 + { 180 + u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4); 181 + u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10); 182 + 183 + return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | 184 + (imm4_1 << 7) | opcode; 185 + } 186 + 187 + static inline u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode) 188 + { 189 + return (imm31_12 << 12) | (rd << 7) | opcode; 190 + } 191 + 192 + static inline u32 rv_j_insn(u32 imm20_1, u8 rd, u8 opcode) 193 + { 194 + u32 imm; 195 + 196 + imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) | 197 + ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11); 198 + 199 + return (imm << 12) | (rd << 7) | opcode; 200 + } 201 + 202 + static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1, 203 + u8 funct3, u8 rd, u8 opcode) 204 + { 205 + u8 funct7 = (funct5 << 2) | (aq << 1) | rl; 206 + 207 + return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); 208 + } 209 + 210 + /* Instructions shared by both RV32 and RV64. */ 211 + 212 + static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0) 213 + { 214 + return rv_i_insn(imm11_0, rs1, 0, rd, 0x13); 215 + } 216 + 217 + static inline u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0) 218 + { 219 + return rv_i_insn(imm11_0, rs1, 7, rd, 0x13); 220 + } 221 + 222 + static inline u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0) 223 + { 224 + return rv_i_insn(imm11_0, rs1, 6, rd, 0x13); 225 + } 226 + 227 + static inline u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0) 228 + { 229 + return rv_i_insn(imm11_0, rs1, 4, rd, 0x13); 230 + } 231 + 232 + static inline u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0) 233 + { 234 + return rv_i_insn(imm11_0, rs1, 1, rd, 0x13); 235 + } 236 + 237 + static inline u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0) 238 + { 239 + return rv_i_insn(imm11_0, rs1, 5, rd, 0x13); 240 + } 241 + 242 + static inline u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0) 243 + { 244 + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13); 245 + } 246 + 247 + static inline u32 rv_lui(u8 rd, u32 imm31_12) 248 + { 249 + return rv_u_insn(imm31_12, rd, 0x37); 250 + } 251 + 252 + static inline u32 rv_auipc(u8 rd, u32 imm31_12) 253 + { 254 + return rv_u_insn(imm31_12, rd, 0x17); 255 + } 256 + 257 + static inline u32 rv_add(u8 rd, u8 rs1, u8 rs2) 258 + { 259 + return rv_r_insn(0, rs2, rs1, 0, rd, 0x33); 260 + } 261 + 262 + static inline u32 rv_sub(u8 rd, u8 rs1, u8 rs2) 263 + { 264 + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33); 265 + } 266 + 267 + static inline u32 rv_sltu(u8 rd, u8 rs1, u8 rs2) 268 + { 269 + return rv_r_insn(0, rs2, rs1, 3, rd, 0x33); 270 + } 271 + 272 + static inline u32 rv_and(u8 rd, u8 rs1, u8 rs2) 273 + { 274 + return rv_r_insn(0, rs2, rs1, 7, rd, 0x33); 275 + } 276 + 277 + static inline u32 rv_or(u8 rd, u8 rs1, u8 rs2) 278 + { 279 + return rv_r_insn(0, rs2, rs1, 6, rd, 0x33); 280 + } 281 + 282 + static inline u32 rv_xor(u8 rd, u8 rs1, u8 rs2) 283 + { 284 + return rv_r_insn(0, rs2, rs1, 4, rd, 0x33); 285 + } 286 + 287 + static inline u32 rv_sll(u8 rd, u8 rs1, u8 rs2) 288 + { 289 + return rv_r_insn(0, rs2, rs1, 1, rd, 0x33); 290 + } 291 + 292 + static inline u32 rv_srl(u8 rd, u8 rs1, u8 rs2) 293 + { 294 + return rv_r_insn(0, rs2, rs1, 5, rd, 0x33); 295 + } 296 + 297 + static inline u32 rv_sra(u8 rd, u8 rs1, u8 rs2) 298 + { 299 + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33); 300 + } 301 + 302 + static inline u32 rv_mul(u8 rd, u8 rs1, u8 rs2) 303 + { 304 + return rv_r_insn(1, rs2, rs1, 0, rd, 0x33); 305 + } 306 + 307 + static inline u32 rv_mulhu(u8 rd, u8 rs1, u8 rs2) 308 + { 309 + return rv_r_insn(1, rs2, rs1, 3, rd, 0x33); 310 + } 311 + 312 + static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2) 313 + { 314 + return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); 315 + } 316 + 317 + static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2) 318 + { 319 + return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); 320 + } 321 + 322 + static inline u32 rv_jal(u8 rd, u32 imm20_1) 323 + { 324 + return rv_j_insn(imm20_1, rd, 0x6f); 325 + } 326 + 327 + static inline u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0) 328 + { 329 + return rv_i_insn(imm11_0, rs1, 0, rd, 0x67); 330 + } 331 + 332 + static inline u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1) 333 + { 334 + return rv_b_insn(imm12_1, rs2, rs1, 0, 0x63); 335 + } 336 + 337 + static inline u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1) 338 + { 339 + return rv_b_insn(imm12_1, rs2, rs1, 1, 0x63); 340 + } 341 + 342 + static inline u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1) 343 + { 344 + return rv_b_insn(imm12_1, rs2, rs1, 6, 0x63); 345 + } 346 + 347 + static inline u32 rv_bgtu(u8 rs1, u8 rs2, u16 imm12_1) 348 + { 349 + return rv_bltu(rs2, rs1, imm12_1); 350 + } 351 + 352 + static inline u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1) 353 + { 354 + return rv_b_insn(imm12_1, rs2, rs1, 7, 0x63); 355 + } 356 + 357 + static inline u32 rv_bleu(u8 rs1, u8 rs2, u16 imm12_1) 358 + { 359 + return rv_bgeu(rs2, rs1, imm12_1); 360 + } 361 + 362 + static inline u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1) 363 + { 364 + return rv_b_insn(imm12_1, rs2, rs1, 4, 0x63); 365 + } 366 + 367 + static inline u32 rv_bgt(u8 rs1, u8 rs2, u16 imm12_1) 368 + { 369 + return rv_blt(rs2, rs1, imm12_1); 370 + } 371 + 372 + static inline u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1) 373 + { 374 + return rv_b_insn(imm12_1, rs2, rs1, 5, 0x63); 375 + } 376 + 377 + static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1) 378 + { 379 + return rv_bge(rs2, rs1, imm12_1); 380 + } 381 + 382 + static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1) 383 + { 384 + return rv_i_insn(imm11_0, rs1, 2, rd, 0x03); 385 + } 386 + 387 + static inline u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1) 388 + { 389 + return rv_i_insn(imm11_0, rs1, 4, rd, 0x03); 390 + } 391 + 392 + static inline u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1) 393 + { 394 + return rv_i_insn(imm11_0, rs1, 5, rd, 0x03); 395 + } 396 + 397 + static inline u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2) 398 + { 399 + return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23); 400 + } 401 + 402 + static inline u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2) 403 + { 404 + return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23); 405 + } 406 + 407 + static inline u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2) 408 + { 409 + return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23); 410 + } 411 + 412 + static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 413 + { 414 + return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); 415 + } 416 + 417 + /* 418 + * RV64-only instructions. 419 + * 420 + * These instructions are not available on RV32. Wrap them below a #if to 421 + * ensure that the RV32 JIT doesn't emit any of these instructions. 422 + */ 423 + 424 + #if __riscv_xlen == 64 425 + 426 + static inline u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0) 427 + { 428 + return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b); 429 + } 430 + 431 + static inline u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0) 432 + { 433 + return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b); 434 + } 435 + 436 + static inline u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0) 437 + { 438 + return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b); 439 + } 440 + 441 + static inline u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0) 442 + { 443 + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b); 444 + } 445 + 446 + static inline u32 rv_addw(u8 rd, u8 rs1, u8 rs2) 447 + { 448 + return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b); 449 + } 450 + 451 + static inline u32 rv_subw(u8 rd, u8 rs1, u8 rs2) 452 + { 453 + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b); 454 + } 455 + 456 + static inline u32 rv_sllw(u8 rd, u8 rs1, u8 rs2) 457 + { 458 + return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b); 459 + } 460 + 461 + static inline u32 rv_srlw(u8 rd, u8 rs1, u8 rs2) 462 + { 463 + return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b); 464 + } 465 + 466 + static inline u32 rv_sraw(u8 rd, u8 rs1, u8 rs2) 467 + { 468 + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b); 469 + } 470 + 471 + static inline u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) 472 + { 473 + return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); 474 + } 475 + 476 + static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) 477 + { 478 + return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); 479 + } 480 + 481 + static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) 482 + { 483 + return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); 484 + } 485 + 486 + static inline u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1) 487 + { 488 + return rv_i_insn(imm11_0, rs1, 3, rd, 0x03); 489 + } 490 + 491 + static inline u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1) 492 + { 493 + return rv_i_insn(imm11_0, rs1, 6, rd, 0x03); 494 + } 495 + 496 + static inline u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2) 497 + { 498 + return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23); 499 + } 500 + 501 + static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 502 + { 503 + return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); 504 + } 505 + 506 + #endif /* __riscv_xlen == 64 */ 507 + 508 + void bpf_jit_build_prologue(struct rv_jit_context *ctx); 509 + void bpf_jit_build_epilogue(struct rv_jit_context *ctx); 510 + 511 + int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 512 + bool extra_pass); 513 + 514 + #endif /* _BPF_JIT_H */

+5 -600

arch/riscv/net/bpf_jit_comp.c arch/riscv/net/bpf_jit_comp64.c

··· 7 7 8 8 #include <linux/bpf.h> 9 9 #include <linux/filter.h> 10 - #include <asm/cacheflush.h> 11 - 12 - enum { 13 - RV_REG_ZERO = 0, /* The constant value 0 */ 14 - RV_REG_RA = 1, /* Return address */ 15 - RV_REG_SP = 2, /* Stack pointer */ 16 - RV_REG_GP = 3, /* Global pointer */ 17 - RV_REG_TP = 4, /* Thread pointer */ 18 - RV_REG_T0 = 5, /* Temporaries */ 19 - RV_REG_T1 = 6, 20 - RV_REG_T2 = 7, 21 - RV_REG_FP = 8, 22 - RV_REG_S1 = 9, /* Saved registers */ 23 - RV_REG_A0 = 10, /* Function argument/return values */ 24 - RV_REG_A1 = 11, /* Function arguments */ 25 - RV_REG_A2 = 12, 26 - RV_REG_A3 = 13, 27 - RV_REG_A4 = 14, 28 - RV_REG_A5 = 15, 29 - RV_REG_A6 = 16, 30 - RV_REG_A7 = 17, 31 - RV_REG_S2 = 18, /* Saved registers */ 32 - RV_REG_S3 = 19, 33 - RV_REG_S4 = 20, 34 - RV_REG_S5 = 21, 35 - RV_REG_S6 = 22, 36 - RV_REG_S7 = 23, 37 - RV_REG_S8 = 24, 38 - RV_REG_S9 = 25, 39 - RV_REG_S10 = 26, 40 - RV_REG_S11 = 27, 41 - RV_REG_T3 = 28, /* Temporaries */ 42 - RV_REG_T4 = 29, 43 - RV_REG_T5 = 30, 44 - RV_REG_T6 = 31, 45 - }; 10 + #include "bpf_jit.h" 46 11 47 12 #define RV_REG_TCC RV_REG_A6 48 13 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ ··· 36 71 RV_CTX_F_SEEN_S4 = RV_REG_S4, 37 72 RV_CTX_F_SEEN_S5 = RV_REG_S5, 38 73 RV_CTX_F_SEEN_S6 = RV_REG_S6, 39 - }; 40 - 41 - struct rv_jit_context { 42 - struct bpf_prog *prog; 43 - u32 *insns; /* RV insns */ 44 - int ninsns; 45 - int epilogue_offset; 46 - int *offset; /* BPF to RV */ 47 - unsigned long flags; 48 - int stack_size; 49 - }; 50 - 51 - struct rv_jit_data { 52 - struct bpf_binary_header *header; 53 - u8 *image; 54 - struct rv_jit_context ctx; 55 74 }; 56 75 57 76 static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx) ··· 105 156 return RV_REG_A6; 106 157 } 107 158 108 - static void emit(const u32 insn, struct rv_jit_context *ctx) 109 - { 110 - if (ctx->insns) 111 - ctx->insns[ctx->ninsns] = insn; 112 - 113 - ctx->ninsns++; 114 - } 115 - 116 - static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode) 117 - { 118 - return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | 119 - (rd << 7) | opcode; 120 - } 121 - 122 - static u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode) 123 - { 124 - return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | 125 - opcode; 126 - } 127 - 128 - static u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode) 129 - { 130 - u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f; 131 - 132 - return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | 133 - (imm4_0 << 7) | opcode; 134 - } 135 - 136 - static u32 rv_sb_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode) 137 - { 138 - u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4); 139 - u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10); 140 - 141 - return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | 142 - (imm4_1 << 7) | opcode; 143 - } 144 - 145 - static u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode) 146 - { 147 - return (imm31_12 << 12) | (rd << 7) | opcode; 148 - } 149 - 150 - static u32 rv_uj_insn(u32 imm20_1, u8 rd, u8 opcode) 151 - { 152 - u32 imm; 153 - 154 - imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) | 155 - ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11); 156 - 157 - return (imm << 12) | (rd << 7) | opcode; 158 - } 159 - 160 - static u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1, 161 - u8 funct3, u8 rd, u8 opcode) 162 - { 163 - u8 funct7 = (funct5 << 2) | (aq << 1) | rl; 164 - 165 - return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); 166 - } 167 - 168 - static u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0) 169 - { 170 - return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b); 171 - } 172 - 173 - static u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0) 174 - { 175 - return rv_i_insn(imm11_0, rs1, 0, rd, 0x13); 176 - } 177 - 178 - static u32 rv_addw(u8 rd, u8 rs1, u8 rs2) 179 - { 180 - return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b); 181 - } 182 - 183 - static u32 rv_add(u8 rd, u8 rs1, u8 rs2) 184 - { 185 - return rv_r_insn(0, rs2, rs1, 0, rd, 0x33); 186 - } 187 - 188 - static u32 rv_subw(u8 rd, u8 rs1, u8 rs2) 189 - { 190 - return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b); 191 - } 192 - 193 - static u32 rv_sub(u8 rd, u8 rs1, u8 rs2) 194 - { 195 - return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33); 196 - } 197 - 198 - static u32 rv_and(u8 rd, u8 rs1, u8 rs2) 199 - { 200 - return rv_r_insn(0, rs2, rs1, 7, rd, 0x33); 201 - } 202 - 203 - static u32 rv_or(u8 rd, u8 rs1, u8 rs2) 204 - { 205 - return rv_r_insn(0, rs2, rs1, 6, rd, 0x33); 206 - } 207 - 208 - static u32 rv_xor(u8 rd, u8 rs1, u8 rs2) 209 - { 210 - return rv_r_insn(0, rs2, rs1, 4, rd, 0x33); 211 - } 212 - 213 - static u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) 214 - { 215 - return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); 216 - } 217 - 218 - static u32 rv_mul(u8 rd, u8 rs1, u8 rs2) 219 - { 220 - return rv_r_insn(1, rs2, rs1, 0, rd, 0x33); 221 - } 222 - 223 - static u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) 224 - { 225 - return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); 226 - } 227 - 228 - static u32 rv_divu(u8 rd, u8 rs1, u8 rs2) 229 - { 230 - return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); 231 - } 232 - 233 - static u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) 234 - { 235 - return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); 236 - } 237 - 238 - static u32 rv_remu(u8 rd, u8 rs1, u8 rs2) 239 - { 240 - return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); 241 - } 242 - 243 - static u32 rv_sllw(u8 rd, u8 rs1, u8 rs2) 244 - { 245 - return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b); 246 - } 247 - 248 - static u32 rv_sll(u8 rd, u8 rs1, u8 rs2) 249 - { 250 - return rv_r_insn(0, rs2, rs1, 1, rd, 0x33); 251 - } 252 - 253 - static u32 rv_srlw(u8 rd, u8 rs1, u8 rs2) 254 - { 255 - return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b); 256 - } 257 - 258 - static u32 rv_srl(u8 rd, u8 rs1, u8 rs2) 259 - { 260 - return rv_r_insn(0, rs2, rs1, 5, rd, 0x33); 261 - } 262 - 263 - static u32 rv_sraw(u8 rd, u8 rs1, u8 rs2) 264 - { 265 - return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b); 266 - } 267 - 268 - static u32 rv_sra(u8 rd, u8 rs1, u8 rs2) 269 - { 270 - return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33); 271 - } 272 - 273 - static u32 rv_lui(u8 rd, u32 imm31_12) 274 - { 275 - return rv_u_insn(imm31_12, rd, 0x37); 276 - } 277 - 278 - static u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0) 279 - { 280 - return rv_i_insn(imm11_0, rs1, 1, rd, 0x13); 281 - } 282 - 283 - static u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0) 284 - { 285 - return rv_i_insn(imm11_0, rs1, 7, rd, 0x13); 286 - } 287 - 288 - static u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0) 289 - { 290 - return rv_i_insn(imm11_0, rs1, 6, rd, 0x13); 291 - } 292 - 293 - static u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0) 294 - { 295 - return rv_i_insn(imm11_0, rs1, 4, rd, 0x13); 296 - } 297 - 298 - static u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0) 299 - { 300 - return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b); 301 - } 302 - 303 - static u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0) 304 - { 305 - return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b); 306 - } 307 - 308 - static u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0) 309 - { 310 - return rv_i_insn(imm11_0, rs1, 5, rd, 0x13); 311 - } 312 - 313 - static u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0) 314 - { 315 - return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b); 316 - } 317 - 318 - static u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0) 319 - { 320 - return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13); 321 - } 322 - 323 - static u32 rv_jal(u8 rd, u32 imm20_1) 324 - { 325 - return rv_uj_insn(imm20_1, rd, 0x6f); 326 - } 327 - 328 - static u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0) 329 - { 330 - return rv_i_insn(imm11_0, rs1, 0, rd, 0x67); 331 - } 332 - 333 - static u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1) 334 - { 335 - return rv_sb_insn(imm12_1, rs2, rs1, 0, 0x63); 336 - } 337 - 338 - static u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1) 339 - { 340 - return rv_sb_insn(imm12_1, rs2, rs1, 6, 0x63); 341 - } 342 - 343 - static u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1) 344 - { 345 - return rv_sb_insn(imm12_1, rs2, rs1, 7, 0x63); 346 - } 347 - 348 - static u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1) 349 - { 350 - return rv_sb_insn(imm12_1, rs2, rs1, 1, 0x63); 351 - } 352 - 353 - static u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1) 354 - { 355 - return rv_sb_insn(imm12_1, rs2, rs1, 4, 0x63); 356 - } 357 - 358 - static u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1) 359 - { 360 - return rv_sb_insn(imm12_1, rs2, rs1, 5, 0x63); 361 - } 362 - 363 - static u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2) 364 - { 365 - return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23); 366 - } 367 - 368 - static u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2) 369 - { 370 - return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23); 371 - } 372 - 373 - static u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2) 374 - { 375 - return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23); 376 - } 377 - 378 - static u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2) 379 - { 380 - return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23); 381 - } 382 - 383 - static u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1) 384 - { 385 - return rv_i_insn(imm11_0, rs1, 4, rd, 0x03); 386 - } 387 - 388 - static u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1) 389 - { 390 - return rv_i_insn(imm11_0, rs1, 5, rd, 0x03); 391 - } 392 - 393 - static u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1) 394 - { 395 - return rv_i_insn(imm11_0, rs1, 6, rd, 0x03); 396 - } 397 - 398 - static u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1) 399 - { 400 - return rv_i_insn(imm11_0, rs1, 3, rd, 0x03); 401 - } 402 - 403 - static u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 404 - { 405 - return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); 406 - } 407 - 408 - static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) 409 - { 410 - return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); 411 - } 412 - 413 - static u32 rv_auipc(u8 rd, u32 imm31_12) 414 - { 415 - return rv_u_insn(imm31_12, rd, 0x17); 416 - } 417 - 418 - static bool is_12b_int(s64 val) 419 - { 420 - return -(1 << 11) <= val && val < (1 << 11); 421 - } 422 - 423 - static bool is_13b_int(s64 val) 424 - { 425 - return -(1 << 12) <= val && val < (1 << 12); 426 - } 427 - 428 - static bool is_21b_int(s64 val) 429 - { 430 - return -(1L << 20) <= val && val < (1L << 20); 431 - } 432 - 433 159 static bool is_32b_int(s64 val) 434 160 { 435 161 return -(1L << 31) <= val && val < (1L << 31); 436 - } 437 - 438 - static int is_12b_check(int off, int insn) 439 - { 440 - if (!is_12b_int(off)) { 441 - pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", 442 - insn, (int)off); 443 - return -1; 444 - } 445 - return 0; 446 162 } 447 163 448 164 static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) ··· 147 533 emit(rv_slli(rd, rd, shift), ctx); 148 534 if (lower) 149 535 emit(rv_addi(rd, rd, lower), ctx); 150 - } 151 - 152 - static int rv_offset(int insn, int off, struct rv_jit_context *ctx) 153 - { 154 - int from, to; 155 - 156 - off++; /* BPF branch is from PC+1, RV is from PC */ 157 - from = (insn > 0) ? ctx->offset[insn - 1] : 0; 158 - to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; 159 - return (to - from) << 2; 160 - } 161 - 162 - static int epilogue_offset(struct rv_jit_context *ctx) 163 - { 164 - int to = ctx->epilogue_offset, from = ctx->ninsns; 165 - 166 - return (to - from) << 2; 167 536 } 168 537 169 538 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) ··· 191 594 emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, 192 595 is_tail_call ? 4 : 0), /* skip TCC init */ 193 596 ctx); 194 - } 195 - 196 - /* return -1 or inverted cond */ 197 - static int invert_bpf_cond(u8 cond) 198 - { 199 - switch (cond) { 200 - case BPF_JEQ: 201 - return BPF_JNE; 202 - case BPF_JGT: 203 - return BPF_JLE; 204 - case BPF_JLT: 205 - return BPF_JGE; 206 - case BPF_JGE: 207 - return BPF_JLT; 208 - case BPF_JLE: 209 - return BPF_JGT; 210 - case BPF_JNE: 211 - return BPF_JEQ; 212 - case BPF_JSGT: 213 - return BPF_JSLE; 214 - case BPF_JSLT: 215 - return BPF_JSGE; 216 - case BPF_JSGE: 217 - return BPF_JSLT; 218 - case BPF_JSLE: 219 - return BPF_JSGT; 220 - } 221 - return -1; 222 597 } 223 598 224 599 static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, ··· 424 855 return 0; 425 856 } 426 857 427 - static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 428 - bool extra_pass) 858 + int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 859 + bool extra_pass) 429 860 { 430 861 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 431 862 BPF_CLASS(insn->code) == BPF_JMP; ··· 1003 1434 return 0; 1004 1435 } 1005 1436 1006 - static void build_prologue(struct rv_jit_context *ctx) 1437 + void bpf_jit_build_prologue(struct rv_jit_context *ctx) 1007 1438 { 1008 1439 int stack_adjust = 0, store_offset, bpf_stack_adjust; 1009 1440 ··· 1084 1515 ctx->stack_size = stack_adjust; 1085 1516 } 1086 1517 1087 - static void build_epilogue(struct rv_jit_context *ctx) 1518 + void bpf_jit_build_epilogue(struct rv_jit_context *ctx) 1088 1519 { 1089 1520 __build_epilogue(false, ctx); 1090 - } 1091 - 1092 - static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) 1093 - { 1094 - const struct bpf_prog *prog = ctx->prog; 1095 - int i; 1096 - 1097 - for (i = 0; i < prog->len; i++) { 1098 - const struct bpf_insn *insn = &prog->insnsi[i]; 1099 - int ret; 1100 - 1101 - ret = emit_insn(insn, ctx, extra_pass); 1102 - if (ret > 0) { 1103 - i++; 1104 - if (offset) 1105 - offset[i] = ctx->ninsns; 1106 - continue; 1107 - } 1108 - if (offset) 1109 - offset[i] = ctx->ninsns; 1110 - if (ret) 1111 - return ret; 1112 - } 1113 - return 0; 1114 - } 1115 - 1116 - static void bpf_fill_ill_insns(void *area, unsigned int size) 1117 - { 1118 - memset(area, 0, size); 1119 - } 1120 - 1121 - static void bpf_flush_icache(void *start, void *end) 1122 - { 1123 - flush_icache_range((unsigned long)start, (unsigned long)end); 1124 - } 1125 - 1126 - bool bpf_jit_needs_zext(void) 1127 - { 1128 - return true; 1129 - } 1130 - 1131 - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 1132 - { 1133 - bool tmp_blinded = false, extra_pass = false; 1134 - struct bpf_prog *tmp, *orig_prog = prog; 1135 - int pass = 0, prev_ninsns = 0, i; 1136 - struct rv_jit_data *jit_data; 1137 - unsigned int image_size = 0; 1138 - struct rv_jit_context *ctx; 1139 - 1140 - if (!prog->jit_requested) 1141 - return orig_prog; 1142 - 1143 - tmp = bpf_jit_blind_constants(prog); 1144 - if (IS_ERR(tmp)) 1145 - return orig_prog; 1146 - if (tmp != prog) { 1147 - tmp_blinded = true; 1148 - prog = tmp; 1149 - } 1150 - 1151 - jit_data = prog->aux->jit_data; 1152 - if (!jit_data) { 1153 - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 1154 - if (!jit_data) { 1155 - prog = orig_prog; 1156 - goto out; 1157 - } 1158 - prog->aux->jit_data = jit_data; 1159 - } 1160 - 1161 - ctx = &jit_data->ctx; 1162 - 1163 - if (ctx->offset) { 1164 - extra_pass = true; 1165 - image_size = sizeof(u32) * ctx->ninsns; 1166 - goto skip_init_ctx; 1167 - } 1168 - 1169 - ctx->prog = prog; 1170 - ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); 1171 - if (!ctx->offset) { 1172 - prog = orig_prog; 1173 - goto out_offset; 1174 - } 1175 - for (i = 0; i < prog->len; i++) { 1176 - prev_ninsns += 32; 1177 - ctx->offset[i] = prev_ninsns; 1178 - } 1179 - 1180 - for (i = 0; i < 16; i++) { 1181 - pass++; 1182 - ctx->ninsns = 0; 1183 - if (build_body(ctx, extra_pass, ctx->offset)) { 1184 - prog = orig_prog; 1185 - goto out_offset; 1186 - } 1187 - build_prologue(ctx); 1188 - ctx->epilogue_offset = ctx->ninsns; 1189 - build_epilogue(ctx); 1190 - 1191 - if (ctx->ninsns == prev_ninsns) { 1192 - if (jit_data->header) 1193 - break; 1194 - 1195 - image_size = sizeof(u32) * ctx->ninsns; 1196 - jit_data->header = 1197 - bpf_jit_binary_alloc(image_size, 1198 - &jit_data->image, 1199 - sizeof(u32), 1200 - bpf_fill_ill_insns); 1201 - if (!jit_data->header) { 1202 - prog = orig_prog; 1203 - goto out_offset; 1204 - } 1205 - 1206 - ctx->insns = (u32 *)jit_data->image; 1207 - /* Now, when the image is allocated, the image 1208 - * can potentially shrink more (auipc/jalr -> 1209 - * jal). 1210 - */ 1211 - } 1212 - prev_ninsns = ctx->ninsns; 1213 - } 1214 - 1215 - if (i == 16) { 1216 - pr_err("bpf-jit: image did not converge in <%d passes!\n", i); 1217 - bpf_jit_binary_free(jit_data->header); 1218 - prog = orig_prog; 1219 - goto out_offset; 1220 - } 1221 - 1222 - skip_init_ctx: 1223 - pass++; 1224 - ctx->ninsns = 0; 1225 - 1226 - build_prologue(ctx); 1227 - if (build_body(ctx, extra_pass, NULL)) { 1228 - bpf_jit_binary_free(jit_data->header); 1229 - prog = orig_prog; 1230 - goto out_offset; 1231 - } 1232 - build_epilogue(ctx); 1233 - 1234 - if (bpf_jit_enable > 1) 1235 - bpf_jit_dump(prog->len, image_size, pass, ctx->insns); 1236 - 1237 - prog->bpf_func = (void *)ctx->insns; 1238 - prog->jited = 1; 1239 - prog->jited_len = image_size; 1240 - 1241 - bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); 1242 - 1243 - if (!prog->is_func || extra_pass) { 1244 - out_offset: 1245 - kfree(ctx->offset); 1246 - kfree(jit_data); 1247 - prog->aux->jit_data = NULL; 1248 - } 1249 - out: 1250 - if (tmp_blinded) 1251 - bpf_jit_prog_release_other(prog, prog == orig_prog ? 1252 - tmp : orig_prog); 1253 - return prog; 1254 1521 } 1255 1522 1256 1523 void *bpf_jit_alloc_exec(unsigned long size)

+1310

arch/riscv/net/bpf_jit_comp32.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * BPF JIT compiler for RV32G 4 + * 5 + * Copyright (c) 2020 Luke Nelson <luke.r.nels@gmail.com> 6 + * Copyright (c) 2020 Xi Wang <xi.wang@gmail.com> 7 + * 8 + * The code is based on the BPF JIT compiler for RV64G by Björn Töpel and 9 + * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan. 10 + */ 11 + 12 + #include <linux/bpf.h> 13 + #include <linux/filter.h> 14 + #include "bpf_jit.h" 15 + 16 + enum { 17 + /* Stack layout - these are offsets from (top of stack - 4). */ 18 + BPF_R6_HI, 19 + BPF_R6_LO, 20 + BPF_R7_HI, 21 + BPF_R7_LO, 22 + BPF_R8_HI, 23 + BPF_R8_LO, 24 + BPF_R9_HI, 25 + BPF_R9_LO, 26 + BPF_AX_HI, 27 + BPF_AX_LO, 28 + /* Stack space for BPF_REG_6 through BPF_REG_9 and BPF_REG_AX. */ 29 + BPF_JIT_SCRATCH_REGS, 30 + }; 31 + 32 + #define STACK_OFFSET(k) (-4 - ((k) * 4)) 33 + 34 + #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) 35 + #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) 36 + 37 + #define RV_REG_TCC RV_REG_T6 38 + #define RV_REG_TCC_SAVED RV_REG_S7 39 + 40 + static const s8 bpf2rv32[][2] = { 41 + /* Return value from in-kernel function, and exit value from eBPF. */ 42 + [BPF_REG_0] = {RV_REG_S2, RV_REG_S1}, 43 + /* Arguments from eBPF program to in-kernel function. */ 44 + [BPF_REG_1] = {RV_REG_A1, RV_REG_A0}, 45 + [BPF_REG_2] = {RV_REG_A3, RV_REG_A2}, 46 + [BPF_REG_3] = {RV_REG_A5, RV_REG_A4}, 47 + [BPF_REG_4] = {RV_REG_A7, RV_REG_A6}, 48 + [BPF_REG_5] = {RV_REG_S4, RV_REG_S3}, 49 + /* 50 + * Callee-saved registers that in-kernel function will preserve. 51 + * Stored on the stack. 52 + */ 53 + [BPF_REG_6] = {STACK_OFFSET(BPF_R6_HI), STACK_OFFSET(BPF_R6_LO)}, 54 + [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, 55 + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, 56 + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, 57 + /* Read-only frame pointer to access BPF stack. */ 58 + [BPF_REG_FP] = {RV_REG_S6, RV_REG_S5}, 59 + /* Temporary register for blinding constants. Stored on the stack. */ 60 + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, 61 + /* 62 + * Temporary registers used by the JIT to operate on registers stored 63 + * on the stack. Save t0 and t1 to be used as temporaries in generated 64 + * code. 65 + */ 66 + [TMP_REG_1] = {RV_REG_T3, RV_REG_T2}, 67 + [TMP_REG_2] = {RV_REG_T5, RV_REG_T4}, 68 + }; 69 + 70 + static s8 hi(const s8 *r) 71 + { 72 + return r[0]; 73 + } 74 + 75 + static s8 lo(const s8 *r) 76 + { 77 + return r[1]; 78 + } 79 + 80 + static void emit_imm(const s8 rd, s32 imm, struct rv_jit_context *ctx) 81 + { 82 + u32 upper = (imm + (1 << 11)) >> 12; 83 + u32 lower = imm & 0xfff; 84 + 85 + if (upper) { 86 + emit(rv_lui(rd, upper), ctx); 87 + emit(rv_addi(rd, rd, lower), ctx); 88 + } else { 89 + emit(rv_addi(rd, RV_REG_ZERO, lower), ctx); 90 + } 91 + } 92 + 93 + static void emit_imm32(const s8 *rd, s32 imm, struct rv_jit_context *ctx) 94 + { 95 + /* Emit immediate into lower bits. */ 96 + emit_imm(lo(rd), imm, ctx); 97 + 98 + /* Sign-extend into upper bits. */ 99 + if (imm >= 0) 100 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 101 + else 102 + emit(rv_addi(hi(rd), RV_REG_ZERO, -1), ctx); 103 + } 104 + 105 + static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo, 106 + struct rv_jit_context *ctx) 107 + { 108 + emit_imm(lo(rd), imm_lo, ctx); 109 + emit_imm(hi(rd), imm_hi, ctx); 110 + } 111 + 112 + static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) 113 + { 114 + int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 4; 115 + const s8 *r0 = bpf2rv32[BPF_REG_0]; 116 + 117 + store_offset -= 4 * BPF_JIT_SCRATCH_REGS; 118 + 119 + /* Set return value if not tail call. */ 120 + if (!is_tail_call) { 121 + emit(rv_addi(RV_REG_A0, lo(r0), 0), ctx); 122 + emit(rv_addi(RV_REG_A1, hi(r0), 0), ctx); 123 + } 124 + 125 + /* Restore callee-saved registers. */ 126 + emit(rv_lw(RV_REG_RA, store_offset - 0, RV_REG_SP), ctx); 127 + emit(rv_lw(RV_REG_FP, store_offset - 4, RV_REG_SP), ctx); 128 + emit(rv_lw(RV_REG_S1, store_offset - 8, RV_REG_SP), ctx); 129 + emit(rv_lw(RV_REG_S2, store_offset - 12, RV_REG_SP), ctx); 130 + emit(rv_lw(RV_REG_S3, store_offset - 16, RV_REG_SP), ctx); 131 + emit(rv_lw(RV_REG_S4, store_offset - 20, RV_REG_SP), ctx); 132 + emit(rv_lw(RV_REG_S5, store_offset - 24, RV_REG_SP), ctx); 133 + emit(rv_lw(RV_REG_S6, store_offset - 28, RV_REG_SP), ctx); 134 + emit(rv_lw(RV_REG_S7, store_offset - 32, RV_REG_SP), ctx); 135 + 136 + emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); 137 + 138 + if (is_tail_call) { 139 + /* 140 + * goto *(t0 + 4); 141 + * Skips first instruction of prologue which initializes tail 142 + * call counter. Assumes t0 contains address of target program, 143 + * see emit_bpf_tail_call. 144 + */ 145 + emit(rv_jalr(RV_REG_ZERO, RV_REG_T0, 4), ctx); 146 + } else { 147 + emit(rv_jalr(RV_REG_ZERO, RV_REG_RA, 0), ctx); 148 + } 149 + } 150 + 151 + static bool is_stacked(s8 reg) 152 + { 153 + return reg < 0; 154 + } 155 + 156 + static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp, 157 + struct rv_jit_context *ctx) 158 + { 159 + if (is_stacked(hi(reg))) { 160 + emit(rv_lw(hi(tmp), hi(reg), RV_REG_FP), ctx); 161 + emit(rv_lw(lo(tmp), lo(reg), RV_REG_FP), ctx); 162 + reg = tmp; 163 + } 164 + return reg; 165 + } 166 + 167 + static void bpf_put_reg64(const s8 *reg, const s8 *src, 168 + struct rv_jit_context *ctx) 169 + { 170 + if (is_stacked(hi(reg))) { 171 + emit(rv_sw(RV_REG_FP, hi(reg), hi(src)), ctx); 172 + emit(rv_sw(RV_REG_FP, lo(reg), lo(src)), ctx); 173 + } 174 + } 175 + 176 + static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp, 177 + struct rv_jit_context *ctx) 178 + { 179 + if (is_stacked(lo(reg))) { 180 + emit(rv_lw(lo(tmp), lo(reg), RV_REG_FP), ctx); 181 + reg = tmp; 182 + } 183 + return reg; 184 + } 185 + 186 + static void bpf_put_reg32(const s8 *reg, const s8 *src, 187 + struct rv_jit_context *ctx) 188 + { 189 + if (is_stacked(lo(reg))) { 190 + emit(rv_sw(RV_REG_FP, lo(reg), lo(src)), ctx); 191 + if (!ctx->prog->aux->verifier_zext) 192 + emit(rv_sw(RV_REG_FP, hi(reg), RV_REG_ZERO), ctx); 193 + } else if (!ctx->prog->aux->verifier_zext) { 194 + emit(rv_addi(hi(reg), RV_REG_ZERO, 0), ctx); 195 + } 196 + } 197 + 198 + static void emit_jump_and_link(u8 rd, s32 rvoff, bool force_jalr, 199 + struct rv_jit_context *ctx) 200 + { 201 + s32 upper, lower; 202 + 203 + if (rvoff && is_21b_int(rvoff) && !force_jalr) { 204 + emit(rv_jal(rd, rvoff >> 1), ctx); 205 + return; 206 + } 207 + 208 + upper = (rvoff + (1 << 11)) >> 12; 209 + lower = rvoff & 0xfff; 210 + emit(rv_auipc(RV_REG_T1, upper), ctx); 211 + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); 212 + } 213 + 214 + static void emit_alu_i64(const s8 *dst, s32 imm, 215 + struct rv_jit_context *ctx, const u8 op) 216 + { 217 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 218 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 219 + 220 + switch (op) { 221 + case BPF_MOV: 222 + emit_imm32(rd, imm, ctx); 223 + break; 224 + case BPF_AND: 225 + if (is_12b_int(imm)) { 226 + emit(rv_andi(lo(rd), lo(rd), imm), ctx); 227 + } else { 228 + emit_imm(RV_REG_T0, imm, ctx); 229 + emit(rv_and(lo(rd), lo(rd), RV_REG_T0), ctx); 230 + } 231 + if (imm >= 0) 232 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 233 + break; 234 + case BPF_OR: 235 + if (is_12b_int(imm)) { 236 + emit(rv_ori(lo(rd), lo(rd), imm), ctx); 237 + } else { 238 + emit_imm(RV_REG_T0, imm, ctx); 239 + emit(rv_or(lo(rd), lo(rd), RV_REG_T0), ctx); 240 + } 241 + if (imm < 0) 242 + emit(rv_ori(hi(rd), RV_REG_ZERO, -1), ctx); 243 + break; 244 + case BPF_XOR: 245 + if (is_12b_int(imm)) { 246 + emit(rv_xori(lo(rd), lo(rd), imm), ctx); 247 + } else { 248 + emit_imm(RV_REG_T0, imm, ctx); 249 + emit(rv_xor(lo(rd), lo(rd), RV_REG_T0), ctx); 250 + } 251 + if (imm < 0) 252 + emit(rv_xori(hi(rd), hi(rd), -1), ctx); 253 + break; 254 + case BPF_LSH: 255 + if (imm >= 32) { 256 + emit(rv_slli(hi(rd), lo(rd), imm - 32), ctx); 257 + emit(rv_addi(lo(rd), RV_REG_ZERO, 0), ctx); 258 + } else if (imm == 0) { 259 + /* Do nothing. */ 260 + } else { 261 + emit(rv_srli(RV_REG_T0, lo(rd), 32 - imm), ctx); 262 + emit(rv_slli(hi(rd), hi(rd), imm), ctx); 263 + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); 264 + emit(rv_slli(lo(rd), lo(rd), imm), ctx); 265 + } 266 + break; 267 + case BPF_RSH: 268 + if (imm >= 32) { 269 + emit(rv_srli(lo(rd), hi(rd), imm - 32), ctx); 270 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 271 + } else if (imm == 0) { 272 + /* Do nothing. */ 273 + } else { 274 + emit(rv_slli(RV_REG_T0, hi(rd), 32 - imm), ctx); 275 + emit(rv_srli(lo(rd), lo(rd), imm), ctx); 276 + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); 277 + emit(rv_srli(hi(rd), hi(rd), imm), ctx); 278 + } 279 + break; 280 + case BPF_ARSH: 281 + if (imm >= 32) { 282 + emit(rv_srai(lo(rd), hi(rd), imm - 32), ctx); 283 + emit(rv_srai(hi(rd), hi(rd), 31), ctx); 284 + } else if (imm == 0) { 285 + /* Do nothing. */ 286 + } else { 287 + emit(rv_slli(RV_REG_T0, hi(rd), 32 - imm), ctx); 288 + emit(rv_srli(lo(rd), lo(rd), imm), ctx); 289 + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); 290 + emit(rv_srai(hi(rd), hi(rd), imm), ctx); 291 + } 292 + break; 293 + } 294 + 295 + bpf_put_reg64(dst, rd, ctx); 296 + } 297 + 298 + static void emit_alu_i32(const s8 *dst, s32 imm, 299 + struct rv_jit_context *ctx, const u8 op) 300 + { 301 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 302 + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); 303 + 304 + switch (op) { 305 + case BPF_MOV: 306 + emit_imm(lo(rd), imm, ctx); 307 + break; 308 + case BPF_ADD: 309 + if (is_12b_int(imm)) { 310 + emit(rv_addi(lo(rd), lo(rd), imm), ctx); 311 + } else { 312 + emit_imm(RV_REG_T0, imm, ctx); 313 + emit(rv_add(lo(rd), lo(rd), RV_REG_T0), ctx); 314 + } 315 + break; 316 + case BPF_SUB: 317 + if (is_12b_int(-imm)) { 318 + emit(rv_addi(lo(rd), lo(rd), -imm), ctx); 319 + } else { 320 + emit_imm(RV_REG_T0, imm, ctx); 321 + emit(rv_sub(lo(rd), lo(rd), RV_REG_T0), ctx); 322 + } 323 + break; 324 + case BPF_AND: 325 + if (is_12b_int(imm)) { 326 + emit(rv_andi(lo(rd), lo(rd), imm), ctx); 327 + } else { 328 + emit_imm(RV_REG_T0, imm, ctx); 329 + emit(rv_and(lo(rd), lo(rd), RV_REG_T0), ctx); 330 + } 331 + break; 332 + case BPF_OR: 333 + if (is_12b_int(imm)) { 334 + emit(rv_ori(lo(rd), lo(rd), imm), ctx); 335 + } else { 336 + emit_imm(RV_REG_T0, imm, ctx); 337 + emit(rv_or(lo(rd), lo(rd), RV_REG_T0), ctx); 338 + } 339 + break; 340 + case BPF_XOR: 341 + if (is_12b_int(imm)) { 342 + emit(rv_xori(lo(rd), lo(rd), imm), ctx); 343 + } else { 344 + emit_imm(RV_REG_T0, imm, ctx); 345 + emit(rv_xor(lo(rd), lo(rd), RV_REG_T0), ctx); 346 + } 347 + break; 348 + case BPF_LSH: 349 + if (is_12b_int(imm)) { 350 + emit(rv_slli(lo(rd), lo(rd), imm), ctx); 351 + } else { 352 + emit_imm(RV_REG_T0, imm, ctx); 353 + emit(rv_sll(lo(rd), lo(rd), RV_REG_T0), ctx); 354 + } 355 + break; 356 + case BPF_RSH: 357 + if (is_12b_int(imm)) { 358 + emit(rv_srli(lo(rd), lo(rd), imm), ctx); 359 + } else { 360 + emit_imm(RV_REG_T0, imm, ctx); 361 + emit(rv_srl(lo(rd), lo(rd), RV_REG_T0), ctx); 362 + } 363 + break; 364 + case BPF_ARSH: 365 + if (is_12b_int(imm)) { 366 + emit(rv_srai(lo(rd), lo(rd), imm), ctx); 367 + } else { 368 + emit_imm(RV_REG_T0, imm, ctx); 369 + emit(rv_sra(lo(rd), lo(rd), RV_REG_T0), ctx); 370 + } 371 + break; 372 + } 373 + 374 + bpf_put_reg32(dst, rd, ctx); 375 + } 376 + 377 + static void emit_alu_r64(const s8 *dst, const s8 *src, 378 + struct rv_jit_context *ctx, const u8 op) 379 + { 380 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 381 + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; 382 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 383 + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); 384 + 385 + switch (op) { 386 + case BPF_MOV: 387 + emit(rv_addi(lo(rd), lo(rs), 0), ctx); 388 + emit(rv_addi(hi(rd), hi(rs), 0), ctx); 389 + break; 390 + case BPF_ADD: 391 + if (rd == rs) { 392 + emit(rv_srli(RV_REG_T0, lo(rd), 31), ctx); 393 + emit(rv_slli(hi(rd), hi(rd), 1), ctx); 394 + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); 395 + emit(rv_slli(lo(rd), lo(rd), 1), ctx); 396 + } else { 397 + emit(rv_add(lo(rd), lo(rd), lo(rs)), ctx); 398 + emit(rv_sltu(RV_REG_T0, lo(rd), lo(rs)), ctx); 399 + emit(rv_add(hi(rd), hi(rd), hi(rs)), ctx); 400 + emit(rv_add(hi(rd), hi(rd), RV_REG_T0), ctx); 401 + } 402 + break; 403 + case BPF_SUB: 404 + emit(rv_sub(RV_REG_T1, hi(rd), hi(rs)), ctx); 405 + emit(rv_sltu(RV_REG_T0, lo(rd), lo(rs)), ctx); 406 + emit(rv_sub(hi(rd), RV_REG_T1, RV_REG_T0), ctx); 407 + emit(rv_sub(lo(rd), lo(rd), lo(rs)), ctx); 408 + break; 409 + case BPF_AND: 410 + emit(rv_and(lo(rd), lo(rd), lo(rs)), ctx); 411 + emit(rv_and(hi(rd), hi(rd), hi(rs)), ctx); 412 + break; 413 + case BPF_OR: 414 + emit(rv_or(lo(rd), lo(rd), lo(rs)), ctx); 415 + emit(rv_or(hi(rd), hi(rd), hi(rs)), ctx); 416 + break; 417 + case BPF_XOR: 418 + emit(rv_xor(lo(rd), lo(rd), lo(rs)), ctx); 419 + emit(rv_xor(hi(rd), hi(rd), hi(rs)), ctx); 420 + break; 421 + case BPF_MUL: 422 + emit(rv_mul(RV_REG_T0, hi(rs), lo(rd)), ctx); 423 + emit(rv_mul(hi(rd), hi(rd), lo(rs)), ctx); 424 + emit(rv_mulhu(RV_REG_T1, lo(rd), lo(rs)), ctx); 425 + emit(rv_add(hi(rd), hi(rd), RV_REG_T0), ctx); 426 + emit(rv_mul(lo(rd), lo(rd), lo(rs)), ctx); 427 + emit(rv_add(hi(rd), hi(rd), RV_REG_T1), ctx); 428 + break; 429 + case BPF_LSH: 430 + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); 431 + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); 432 + emit(rv_sll(hi(rd), lo(rd), RV_REG_T0), ctx); 433 + emit(rv_addi(lo(rd), RV_REG_ZERO, 0), ctx); 434 + emit(rv_jal(RV_REG_ZERO, 16), ctx); 435 + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); 436 + emit(rv_srli(RV_REG_T0, lo(rd), 1), ctx); 437 + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); 438 + emit(rv_srl(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); 439 + emit(rv_sll(hi(rd), hi(rd), lo(rs)), ctx); 440 + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); 441 + emit(rv_sll(lo(rd), lo(rd), lo(rs)), ctx); 442 + break; 443 + case BPF_RSH: 444 + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); 445 + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); 446 + emit(rv_srl(lo(rd), hi(rd), RV_REG_T0), ctx); 447 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 448 + emit(rv_jal(RV_REG_ZERO, 16), ctx); 449 + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); 450 + emit(rv_slli(RV_REG_T0, hi(rd), 1), ctx); 451 + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); 452 + emit(rv_sll(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); 453 + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); 454 + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); 455 + emit(rv_srl(hi(rd), hi(rd), lo(rs)), ctx); 456 + break; 457 + case BPF_ARSH: 458 + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); 459 + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); 460 + emit(rv_sra(lo(rd), hi(rd), RV_REG_T0), ctx); 461 + emit(rv_srai(hi(rd), hi(rd), 31), ctx); 462 + emit(rv_jal(RV_REG_ZERO, 16), ctx); 463 + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); 464 + emit(rv_slli(RV_REG_T0, hi(rd), 1), ctx); 465 + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); 466 + emit(rv_sll(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); 467 + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); 468 + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); 469 + emit(rv_sra(hi(rd), hi(rd), lo(rs)), ctx); 470 + break; 471 + case BPF_NEG: 472 + emit(rv_sub(lo(rd), RV_REG_ZERO, lo(rd)), ctx); 473 + emit(rv_sltu(RV_REG_T0, RV_REG_ZERO, lo(rd)), ctx); 474 + emit(rv_sub(hi(rd), RV_REG_ZERO, hi(rd)), ctx); 475 + emit(rv_sub(hi(rd), hi(rd), RV_REG_T0), ctx); 476 + break; 477 + } 478 + 479 + bpf_put_reg64(dst, rd, ctx); 480 + } 481 + 482 + static void emit_alu_r32(const s8 *dst, const s8 *src, 483 + struct rv_jit_context *ctx, const u8 op) 484 + { 485 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 486 + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; 487 + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); 488 + const s8 *rs = bpf_get_reg32(src, tmp2, ctx); 489 + 490 + switch (op) { 491 + case BPF_MOV: 492 + emit(rv_addi(lo(rd), lo(rs), 0), ctx); 493 + break; 494 + case BPF_ADD: 495 + emit(rv_add(lo(rd), lo(rd), lo(rs)), ctx); 496 + break; 497 + case BPF_SUB: 498 + emit(rv_sub(lo(rd), lo(rd), lo(rs)), ctx); 499 + break; 500 + case BPF_AND: 501 + emit(rv_and(lo(rd), lo(rd), lo(rs)), ctx); 502 + break; 503 + case BPF_OR: 504 + emit(rv_or(lo(rd), lo(rd), lo(rs)), ctx); 505 + break; 506 + case BPF_XOR: 507 + emit(rv_xor(lo(rd), lo(rd), lo(rs)), ctx); 508 + break; 509 + case BPF_MUL: 510 + emit(rv_mul(lo(rd), lo(rd), lo(rs)), ctx); 511 + break; 512 + case BPF_DIV: 513 + emit(rv_divu(lo(rd), lo(rd), lo(rs)), ctx); 514 + break; 515 + case BPF_MOD: 516 + emit(rv_remu(lo(rd), lo(rd), lo(rs)), ctx); 517 + break; 518 + case BPF_LSH: 519 + emit(rv_sll(lo(rd), lo(rd), lo(rs)), ctx); 520 + break; 521 + case BPF_RSH: 522 + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); 523 + break; 524 + case BPF_ARSH: 525 + emit(rv_sra(lo(rd), lo(rd), lo(rs)), ctx); 526 + break; 527 + case BPF_NEG: 528 + emit(rv_sub(lo(rd), RV_REG_ZERO, lo(rd)), ctx); 529 + break; 530 + } 531 + 532 + bpf_put_reg32(dst, rd, ctx); 533 + } 534 + 535 + static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff, 536 + struct rv_jit_context *ctx, const u8 op) 537 + { 538 + int e, s = ctx->ninsns; 539 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 540 + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; 541 + 542 + const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx); 543 + const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx); 544 + 545 + /* 546 + * NO_JUMP skips over the rest of the instructions and the 547 + * emit_jump_and_link, meaning the BPF branch is not taken. 548 + * JUMP skips directly to the emit_jump_and_link, meaning 549 + * the BPF branch is taken. 550 + * 551 + * The fallthrough case results in the BPF branch being taken. 552 + */ 553 + #define NO_JUMP(idx) (6 + (2 * (idx))) 554 + #define JUMP(idx) (2 + (2 * (idx))) 555 + 556 + switch (op) { 557 + case BPF_JEQ: 558 + emit(rv_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 559 + emit(rv_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 560 + break; 561 + case BPF_JGT: 562 + emit(rv_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); 563 + emit(rv_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 564 + emit(rv_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 565 + break; 566 + case BPF_JLT: 567 + emit(rv_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); 568 + emit(rv_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 569 + emit(rv_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 570 + break; 571 + case BPF_JGE: 572 + emit(rv_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); 573 + emit(rv_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 574 + emit(rv_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 575 + break; 576 + case BPF_JLE: 577 + emit(rv_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); 578 + emit(rv_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 579 + emit(rv_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 580 + break; 581 + case BPF_JNE: 582 + emit(rv_bne(hi(rs1), hi(rs2), JUMP(1)), ctx); 583 + emit(rv_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 584 + break; 585 + case BPF_JSGT: 586 + emit(rv_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); 587 + emit(rv_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 588 + emit(rv_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 589 + break; 590 + case BPF_JSLT: 591 + emit(rv_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); 592 + emit(rv_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 593 + emit(rv_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 594 + break; 595 + case BPF_JSGE: 596 + emit(rv_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); 597 + emit(rv_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 598 + emit(rv_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 599 + break; 600 + case BPF_JSLE: 601 + emit(rv_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); 602 + emit(rv_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); 603 + emit(rv_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); 604 + break; 605 + case BPF_JSET: 606 + emit(rv_and(RV_REG_T0, hi(rs1), hi(rs2)), ctx); 607 + emit(rv_bne(RV_REG_T0, RV_REG_ZERO, JUMP(2)), ctx); 608 + emit(rv_and(RV_REG_T0, lo(rs1), lo(rs2)), ctx); 609 + emit(rv_beq(RV_REG_T0, RV_REG_ZERO, NO_JUMP(0)), ctx); 610 + break; 611 + } 612 + 613 + #undef NO_JUMP 614 + #undef JUMP 615 + 616 + e = ctx->ninsns; 617 + /* Adjust for extra insns. */ 618 + rvoff -= (e - s) << 2; 619 + emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 620 + return 0; 621 + } 622 + 623 + static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx) 624 + { 625 + int e, s = ctx->ninsns; 626 + bool far = false; 627 + int off; 628 + 629 + if (op == BPF_JSET) { 630 + /* 631 + * BPF_JSET is a special case: it has no inverse so we always 632 + * treat it as a far branch. 633 + */ 634 + far = true; 635 + } else if (!is_13b_int(rvoff)) { 636 + op = invert_bpf_cond(op); 637 + far = true; 638 + } 639 + 640 + /* 641 + * For a far branch, the condition is negated and we jump over the 642 + * branch itself, and the two instructions from emit_jump_and_link. 643 + * For a near branch, just use rvoff. 644 + */ 645 + off = far ? 6 : (rvoff >> 1); 646 + 647 + switch (op) { 648 + case BPF_JEQ: 649 + emit(rv_beq(rd, rs, off), ctx); 650 + break; 651 + case BPF_JGT: 652 + emit(rv_bgtu(rd, rs, off), ctx); 653 + break; 654 + case BPF_JLT: 655 + emit(rv_bltu(rd, rs, off), ctx); 656 + break; 657 + case BPF_JGE: 658 + emit(rv_bgeu(rd, rs, off), ctx); 659 + break; 660 + case BPF_JLE: 661 + emit(rv_bleu(rd, rs, off), ctx); 662 + break; 663 + case BPF_JNE: 664 + emit(rv_bne(rd, rs, off), ctx); 665 + break; 666 + case BPF_JSGT: 667 + emit(rv_bgt(rd, rs, off), ctx); 668 + break; 669 + case BPF_JSLT: 670 + emit(rv_blt(rd, rs, off), ctx); 671 + break; 672 + case BPF_JSGE: 673 + emit(rv_bge(rd, rs, off), ctx); 674 + break; 675 + case BPF_JSLE: 676 + emit(rv_ble(rd, rs, off), ctx); 677 + break; 678 + case BPF_JSET: 679 + emit(rv_and(RV_REG_T0, rd, rs), ctx); 680 + emit(rv_beq(RV_REG_T0, RV_REG_ZERO, off), ctx); 681 + break; 682 + } 683 + 684 + if (far) { 685 + e = ctx->ninsns; 686 + /* Adjust for extra insns. */ 687 + rvoff -= (e - s) << 2; 688 + emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); 689 + } 690 + return 0; 691 + } 692 + 693 + static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff, 694 + struct rv_jit_context *ctx, const u8 op) 695 + { 696 + int e, s = ctx->ninsns; 697 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 698 + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; 699 + 700 + const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx); 701 + const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx); 702 + 703 + e = ctx->ninsns; 704 + /* Adjust for extra insns. */ 705 + rvoff -= (e - s) << 2; 706 + 707 + if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx)) 708 + return -1; 709 + 710 + return 0; 711 + } 712 + 713 + static void emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) 714 + { 715 + const s8 *r0 = bpf2rv32[BPF_REG_0]; 716 + const s8 *r5 = bpf2rv32[BPF_REG_5]; 717 + u32 upper = ((u32)addr + (1 << 11)) >> 12; 718 + u32 lower = addr & 0xfff; 719 + 720 + /* R1-R4 already in correct registers---need to push R5 to stack. */ 721 + emit(rv_addi(RV_REG_SP, RV_REG_SP, -16), ctx); 722 + emit(rv_sw(RV_REG_SP, 0, lo(r5)), ctx); 723 + emit(rv_sw(RV_REG_SP, 4, hi(r5)), ctx); 724 + 725 + /* Backup TCC. */ 726 + emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx); 727 + 728 + /* 729 + * Use lui/jalr pair to jump to absolute address. Don't use emit_imm as 730 + * the number of emitted instructions should not depend on the value of 731 + * addr. 732 + */ 733 + emit(rv_lui(RV_REG_T1, upper), ctx); 734 + emit(rv_jalr(RV_REG_RA, RV_REG_T1, lower), ctx); 735 + 736 + /* Restore TCC. */ 737 + emit(rv_addi(RV_REG_TCC, RV_REG_TCC_SAVED, 0), ctx); 738 + 739 + /* Set return value and restore stack. */ 740 + emit(rv_addi(lo(r0), RV_REG_A0, 0), ctx); 741 + emit(rv_addi(hi(r0), RV_REG_A1, 0), ctx); 742 + emit(rv_addi(RV_REG_SP, RV_REG_SP, 16), ctx); 743 + } 744 + 745 + static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) 746 + { 747 + /* 748 + * R1 -> &ctx 749 + * R2 -> &array 750 + * R3 -> index 751 + */ 752 + int tc_ninsn, off, start_insn = ctx->ninsns; 753 + const s8 *arr_reg = bpf2rv32[BPF_REG_2]; 754 + const s8 *idx_reg = bpf2rv32[BPF_REG_3]; 755 + 756 + tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : 757 + ctx->offset[0]; 758 + 759 + /* max_entries = array->map.max_entries; */ 760 + off = offsetof(struct bpf_array, map.max_entries); 761 + if (is_12b_check(off, insn)) 762 + return -1; 763 + emit(rv_lw(RV_REG_T1, off, lo(arr_reg)), ctx); 764 + 765 + /* 766 + * if (index >= max_entries) 767 + * goto out; 768 + */ 769 + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 770 + emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); 771 + 772 + /* 773 + * if ((temp_tcc = tcc - 1) < 0) 774 + * goto out; 775 + */ 776 + emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); 777 + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 778 + emit_bcc(BPF_JSLT, RV_REG_T1, RV_REG_ZERO, off, ctx); 779 + 780 + /* 781 + * prog = array->ptrs[index]; 782 + * if (!prog) 783 + * goto out; 784 + */ 785 + emit(rv_slli(RV_REG_T0, lo(idx_reg), 2), ctx); 786 + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(arr_reg)), ctx); 787 + off = offsetof(struct bpf_array, ptrs); 788 + if (is_12b_check(off, insn)) 789 + return -1; 790 + emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); 791 + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; 792 + emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx); 793 + 794 + /* 795 + * tcc = temp_tcc; 796 + * goto *(prog->bpf_func + 4); 797 + */ 798 + off = offsetof(struct bpf_prog, bpf_func); 799 + if (is_12b_check(off, insn)) 800 + return -1; 801 + emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); 802 + emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); 803 + /* Epilogue jumps to *(t0 + 4). */ 804 + __build_epilogue(true, ctx); 805 + return 0; 806 + } 807 + 808 + static int emit_load_r64(const s8 *dst, const s8 *src, s16 off, 809 + struct rv_jit_context *ctx, const u8 size) 810 + { 811 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 812 + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; 813 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 814 + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); 815 + 816 + emit_imm(RV_REG_T0, off, ctx); 817 + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(rs)), ctx); 818 + 819 + switch (size) { 820 + case BPF_B: 821 + emit(rv_lbu(lo(rd), 0, RV_REG_T0), ctx); 822 + if (!ctx->prog->aux->verifier_zext) 823 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 824 + break; 825 + case BPF_H: 826 + emit(rv_lhu(lo(rd), 0, RV_REG_T0), ctx); 827 + if (!ctx->prog->aux->verifier_zext) 828 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 829 + break; 830 + case BPF_W: 831 + emit(rv_lw(lo(rd), 0, RV_REG_T0), ctx); 832 + if (!ctx->prog->aux->verifier_zext) 833 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 834 + break; 835 + case BPF_DW: 836 + emit(rv_lw(lo(rd), 0, RV_REG_T0), ctx); 837 + emit(rv_lw(hi(rd), 4, RV_REG_T0), ctx); 838 + break; 839 + } 840 + 841 + bpf_put_reg64(dst, rd, ctx); 842 + return 0; 843 + } 844 + 845 + static int emit_store_r64(const s8 *dst, const s8 *src, s16 off, 846 + struct rv_jit_context *ctx, const u8 size, 847 + const u8 mode) 848 + { 849 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 850 + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; 851 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 852 + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); 853 + 854 + if (mode == BPF_XADD && size != BPF_W) 855 + return -1; 856 + 857 + emit_imm(RV_REG_T0, off, ctx); 858 + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(rd)), ctx); 859 + 860 + switch (size) { 861 + case BPF_B: 862 + emit(rv_sb(RV_REG_T0, 0, lo(rs)), ctx); 863 + break; 864 + case BPF_H: 865 + emit(rv_sh(RV_REG_T0, 0, lo(rs)), ctx); 866 + break; 867 + case BPF_W: 868 + switch (mode) { 869 + case BPF_MEM: 870 + emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx); 871 + break; 872 + case BPF_XADD: 873 + emit(rv_amoadd_w(RV_REG_ZERO, lo(rs), RV_REG_T0, 0, 0), 874 + ctx); 875 + break; 876 + } 877 + break; 878 + case BPF_DW: 879 + emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx); 880 + emit(rv_sw(RV_REG_T0, 4, hi(rs)), ctx); 881 + break; 882 + } 883 + 884 + return 0; 885 + } 886 + 887 + static void emit_rev16(const s8 rd, struct rv_jit_context *ctx) 888 + { 889 + emit(rv_slli(rd, rd, 16), ctx); 890 + emit(rv_slli(RV_REG_T1, rd, 8), ctx); 891 + emit(rv_srli(rd, rd, 8), ctx); 892 + emit(rv_add(RV_REG_T1, rd, RV_REG_T1), ctx); 893 + emit(rv_srli(rd, RV_REG_T1, 16), ctx); 894 + } 895 + 896 + static void emit_rev32(const s8 rd, struct rv_jit_context *ctx) 897 + { 898 + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 0), ctx); 899 + emit(rv_andi(RV_REG_T0, rd, 255), ctx); 900 + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); 901 + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); 902 + emit(rv_srli(rd, rd, 8), ctx); 903 + emit(rv_andi(RV_REG_T0, rd, 255), ctx); 904 + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); 905 + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); 906 + emit(rv_srli(rd, rd, 8), ctx); 907 + emit(rv_andi(RV_REG_T0, rd, 255), ctx); 908 + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); 909 + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); 910 + emit(rv_srli(rd, rd, 8), ctx); 911 + emit(rv_andi(RV_REG_T0, rd, 255), ctx); 912 + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); 913 + emit(rv_addi(rd, RV_REG_T1, 0), ctx); 914 + } 915 + 916 + static void emit_zext64(const s8 *dst, struct rv_jit_context *ctx) 917 + { 918 + const s8 *rd; 919 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 920 + 921 + rd = bpf_get_reg64(dst, tmp1, ctx); 922 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 923 + bpf_put_reg64(dst, rd, ctx); 924 + } 925 + 926 + int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, 927 + bool extra_pass) 928 + { 929 + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || 930 + BPF_CLASS(insn->code) == BPF_JMP; 931 + int s, e, rvoff, i = insn - ctx->prog->insnsi; 932 + u8 code = insn->code; 933 + s16 off = insn->off; 934 + s32 imm = insn->imm; 935 + 936 + const s8 *dst = bpf2rv32[insn->dst_reg]; 937 + const s8 *src = bpf2rv32[insn->src_reg]; 938 + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; 939 + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; 940 + 941 + switch (code) { 942 + case BPF_ALU64 | BPF_MOV | BPF_X: 943 + 944 + case BPF_ALU64 | BPF_ADD | BPF_X: 945 + case BPF_ALU64 | BPF_ADD | BPF_K: 946 + 947 + case BPF_ALU64 | BPF_SUB | BPF_X: 948 + case BPF_ALU64 | BPF_SUB | BPF_K: 949 + 950 + case BPF_ALU64 | BPF_AND | BPF_X: 951 + case BPF_ALU64 | BPF_OR | BPF_X: 952 + case BPF_ALU64 | BPF_XOR | BPF_X: 953 + 954 + case BPF_ALU64 | BPF_MUL | BPF_X: 955 + case BPF_ALU64 | BPF_MUL | BPF_K: 956 + 957 + case BPF_ALU64 | BPF_LSH | BPF_X: 958 + case BPF_ALU64 | BPF_RSH | BPF_X: 959 + case BPF_ALU64 | BPF_ARSH | BPF_X: 960 + if (BPF_SRC(code) == BPF_K) { 961 + emit_imm32(tmp2, imm, ctx); 962 + src = tmp2; 963 + } 964 + emit_alu_r64(dst, src, ctx, BPF_OP(code)); 965 + break; 966 + 967 + case BPF_ALU64 | BPF_NEG: 968 + emit_alu_r64(dst, tmp2, ctx, BPF_OP(code)); 969 + break; 970 + 971 + case BPF_ALU64 | BPF_DIV | BPF_X: 972 + case BPF_ALU64 | BPF_DIV | BPF_K: 973 + case BPF_ALU64 | BPF_MOD | BPF_X: 974 + case BPF_ALU64 | BPF_MOD | BPF_K: 975 + goto notsupported; 976 + 977 + case BPF_ALU64 | BPF_MOV | BPF_K: 978 + case BPF_ALU64 | BPF_AND | BPF_K: 979 + case BPF_ALU64 | BPF_OR | BPF_K: 980 + case BPF_ALU64 | BPF_XOR | BPF_K: 981 + case BPF_ALU64 | BPF_LSH | BPF_K: 982 + case BPF_ALU64 | BPF_RSH | BPF_K: 983 + case BPF_ALU64 | BPF_ARSH | BPF_K: 984 + emit_alu_i64(dst, imm, ctx, BPF_OP(code)); 985 + break; 986 + 987 + case BPF_ALU | BPF_MOV | BPF_X: 988 + if (imm == 1) { 989 + /* Special mov32 for zext. */ 990 + emit_zext64(dst, ctx); 991 + break; 992 + } 993 + /* Fallthrough. */ 994 + 995 + case BPF_ALU | BPF_ADD | BPF_X: 996 + case BPF_ALU | BPF_SUB | BPF_X: 997 + case BPF_ALU | BPF_AND | BPF_X: 998 + case BPF_ALU | BPF_OR | BPF_X: 999 + case BPF_ALU | BPF_XOR | BPF_X: 1000 + 1001 + case BPF_ALU | BPF_MUL | BPF_X: 1002 + case BPF_ALU | BPF_MUL | BPF_K: 1003 + 1004 + case BPF_ALU | BPF_DIV | BPF_X: 1005 + case BPF_ALU | BPF_DIV | BPF_K: 1006 + 1007 + case BPF_ALU | BPF_MOD | BPF_X: 1008 + case BPF_ALU | BPF_MOD | BPF_K: 1009 + 1010 + case BPF_ALU | BPF_LSH | BPF_X: 1011 + case BPF_ALU | BPF_RSH | BPF_X: 1012 + case BPF_ALU | BPF_ARSH | BPF_X: 1013 + if (BPF_SRC(code) == BPF_K) { 1014 + emit_imm32(tmp2, imm, ctx); 1015 + src = tmp2; 1016 + } 1017 + emit_alu_r32(dst, src, ctx, BPF_OP(code)); 1018 + break; 1019 + 1020 + case BPF_ALU | BPF_MOV | BPF_K: 1021 + case BPF_ALU | BPF_ADD | BPF_K: 1022 + case BPF_ALU | BPF_SUB | BPF_K: 1023 + case BPF_ALU | BPF_AND | BPF_K: 1024 + case BPF_ALU | BPF_OR | BPF_K: 1025 + case BPF_ALU | BPF_XOR | BPF_K: 1026 + case BPF_ALU | BPF_LSH | BPF_K: 1027 + case BPF_ALU | BPF_RSH | BPF_K: 1028 + case BPF_ALU | BPF_ARSH | BPF_K: 1029 + /* 1030 + * mul,div,mod are handled in the BPF_X case since there are 1031 + * no RISC-V I-type equivalents. 1032 + */ 1033 + emit_alu_i32(dst, imm, ctx, BPF_OP(code)); 1034 + break; 1035 + 1036 + case BPF_ALU | BPF_NEG: 1037 + /* 1038 + * src is ignored---choose tmp2 as a dummy register since it 1039 + * is not on the stack. 1040 + */ 1041 + emit_alu_r32(dst, tmp2, ctx, BPF_OP(code)); 1042 + break; 1043 + 1044 + case BPF_ALU | BPF_END | BPF_FROM_LE: 1045 + { 1046 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 1047 + 1048 + switch (imm) { 1049 + case 16: 1050 + emit(rv_slli(lo(rd), lo(rd), 16), ctx); 1051 + emit(rv_srli(lo(rd), lo(rd), 16), ctx); 1052 + /* Fallthrough. */ 1053 + case 32: 1054 + if (!ctx->prog->aux->verifier_zext) 1055 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 1056 + break; 1057 + case 64: 1058 + /* Do nothing. */ 1059 + break; 1060 + default: 1061 + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); 1062 + return -1; 1063 + } 1064 + 1065 + bpf_put_reg64(dst, rd, ctx); 1066 + break; 1067 + } 1068 + 1069 + case BPF_ALU | BPF_END | BPF_FROM_BE: 1070 + { 1071 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 1072 + 1073 + switch (imm) { 1074 + case 16: 1075 + emit_rev16(lo(rd), ctx); 1076 + if (!ctx->prog->aux->verifier_zext) 1077 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 1078 + break; 1079 + case 32: 1080 + emit_rev32(lo(rd), ctx); 1081 + if (!ctx->prog->aux->verifier_zext) 1082 + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); 1083 + break; 1084 + case 64: 1085 + /* Swap upper and lower halves. */ 1086 + emit(rv_addi(RV_REG_T0, lo(rd), 0), ctx); 1087 + emit(rv_addi(lo(rd), hi(rd), 0), ctx); 1088 + emit(rv_addi(hi(rd), RV_REG_T0, 0), ctx); 1089 + 1090 + /* Swap each half. */ 1091 + emit_rev32(lo(rd), ctx); 1092 + emit_rev32(hi(rd), ctx); 1093 + break; 1094 + default: 1095 + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); 1096 + return -1; 1097 + } 1098 + 1099 + bpf_put_reg64(dst, rd, ctx); 1100 + break; 1101 + } 1102 + 1103 + case BPF_JMP | BPF_JA: 1104 + rvoff = rv_offset(i, off, ctx); 1105 + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 1106 + break; 1107 + 1108 + case BPF_JMP | BPF_CALL: 1109 + { 1110 + bool fixed; 1111 + int ret; 1112 + u64 addr; 1113 + 1114 + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, 1115 + &fixed); 1116 + if (ret < 0) 1117 + return ret; 1118 + emit_call(fixed, addr, ctx); 1119 + break; 1120 + } 1121 + 1122 + case BPF_JMP | BPF_TAIL_CALL: 1123 + if (emit_bpf_tail_call(i, ctx)) 1124 + return -1; 1125 + break; 1126 + 1127 + case BPF_JMP | BPF_JEQ | BPF_X: 1128 + case BPF_JMP | BPF_JEQ | BPF_K: 1129 + case BPF_JMP32 | BPF_JEQ | BPF_X: 1130 + case BPF_JMP32 | BPF_JEQ | BPF_K: 1131 + 1132 + case BPF_JMP | BPF_JNE | BPF_X: 1133 + case BPF_JMP | BPF_JNE | BPF_K: 1134 + case BPF_JMP32 | BPF_JNE | BPF_X: 1135 + case BPF_JMP32 | BPF_JNE | BPF_K: 1136 + 1137 + case BPF_JMP | BPF_JLE | BPF_X: 1138 + case BPF_JMP | BPF_JLE | BPF_K: 1139 + case BPF_JMP32 | BPF_JLE | BPF_X: 1140 + case BPF_JMP32 | BPF_JLE | BPF_K: 1141 + 1142 + case BPF_JMP | BPF_JLT | BPF_X: 1143 + case BPF_JMP | BPF_JLT | BPF_K: 1144 + case BPF_JMP32 | BPF_JLT | BPF_X: 1145 + case BPF_JMP32 | BPF_JLT | BPF_K: 1146 + 1147 + case BPF_JMP | BPF_JGE | BPF_X: 1148 + case BPF_JMP | BPF_JGE | BPF_K: 1149 + case BPF_JMP32 | BPF_JGE | BPF_X: 1150 + case BPF_JMP32 | BPF_JGE | BPF_K: 1151 + 1152 + case BPF_JMP | BPF_JGT | BPF_X: 1153 + case BPF_JMP | BPF_JGT | BPF_K: 1154 + case BPF_JMP32 | BPF_JGT | BPF_X: 1155 + case BPF_JMP32 | BPF_JGT | BPF_K: 1156 + 1157 + case BPF_JMP | BPF_JSLE | BPF_X: 1158 + case BPF_JMP | BPF_JSLE | BPF_K: 1159 + case BPF_JMP32 | BPF_JSLE | BPF_X: 1160 + case BPF_JMP32 | BPF_JSLE | BPF_K: 1161 + 1162 + case BPF_JMP | BPF_JSLT | BPF_X: 1163 + case BPF_JMP | BPF_JSLT | BPF_K: 1164 + case BPF_JMP32 | BPF_JSLT | BPF_X: 1165 + case BPF_JMP32 | BPF_JSLT | BPF_K: 1166 + 1167 + case BPF_JMP | BPF_JSGE | BPF_X: 1168 + case BPF_JMP | BPF_JSGE | BPF_K: 1169 + case BPF_JMP32 | BPF_JSGE | BPF_X: 1170 + case BPF_JMP32 | BPF_JSGE | BPF_K: 1171 + 1172 + case BPF_JMP | BPF_JSGT | BPF_X: 1173 + case BPF_JMP | BPF_JSGT | BPF_K: 1174 + case BPF_JMP32 | BPF_JSGT | BPF_X: 1175 + case BPF_JMP32 | BPF_JSGT | BPF_K: 1176 + 1177 + case BPF_JMP | BPF_JSET | BPF_X: 1178 + case BPF_JMP | BPF_JSET | BPF_K: 1179 + case BPF_JMP32 | BPF_JSET | BPF_X: 1180 + case BPF_JMP32 | BPF_JSET | BPF_K: 1181 + rvoff = rv_offset(i, off, ctx); 1182 + if (BPF_SRC(code) == BPF_K) { 1183 + s = ctx->ninsns; 1184 + emit_imm32(tmp2, imm, ctx); 1185 + src = tmp2; 1186 + e = ctx->ninsns; 1187 + rvoff -= (e - s) << 2; 1188 + } 1189 + 1190 + if (is64) 1191 + emit_branch_r64(dst, src, rvoff, ctx, BPF_OP(code)); 1192 + else 1193 + emit_branch_r32(dst, src, rvoff, ctx, BPF_OP(code)); 1194 + break; 1195 + 1196 + case BPF_JMP | BPF_EXIT: 1197 + if (i == ctx->prog->len - 1) 1198 + break; 1199 + 1200 + rvoff = epilogue_offset(ctx); 1201 + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); 1202 + break; 1203 + 1204 + case BPF_LD | BPF_IMM | BPF_DW: 1205 + { 1206 + struct bpf_insn insn1 = insn[1]; 1207 + s32 imm_lo = imm; 1208 + s32 imm_hi = insn1.imm; 1209 + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); 1210 + 1211 + emit_imm64(rd, imm_hi, imm_lo, ctx); 1212 + bpf_put_reg64(dst, rd, ctx); 1213 + return 1; 1214 + } 1215 + 1216 + case BPF_LDX | BPF_MEM | BPF_B: 1217 + case BPF_LDX | BPF_MEM | BPF_H: 1218 + case BPF_LDX | BPF_MEM | BPF_W: 1219 + case BPF_LDX | BPF_MEM | BPF_DW: 1220 + if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code))) 1221 + return -1; 1222 + break; 1223 + 1224 + case BPF_ST | BPF_MEM | BPF_B: 1225 + case BPF_ST | BPF_MEM | BPF_H: 1226 + case BPF_ST | BPF_MEM | BPF_W: 1227 + case BPF_ST | BPF_MEM | BPF_DW: 1228 + 1229 + case BPF_STX | BPF_MEM | BPF_B: 1230 + case BPF_STX | BPF_MEM | BPF_H: 1231 + case BPF_STX | BPF_MEM | BPF_W: 1232 + case BPF_STX | BPF_MEM | BPF_DW: 1233 + case BPF_STX | BPF_XADD | BPF_W: 1234 + if (BPF_CLASS(code) == BPF_ST) { 1235 + emit_imm32(tmp2, imm, ctx); 1236 + src = tmp2; 1237 + } 1238 + 1239 + if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code), 1240 + BPF_MODE(code))) 1241 + return -1; 1242 + break; 1243 + 1244 + /* No hardware support for 8-byte atomics in RV32. */ 1245 + case BPF_STX | BPF_XADD | BPF_DW: 1246 + /* Fallthrough. */ 1247 + 1248 + notsupported: 1249 + pr_info_once("bpf-jit: not supported: opcode %02x ***\n", code); 1250 + return -EFAULT; 1251 + 1252 + default: 1253 + pr_err("bpf-jit: unknown opcode %02x\n", code); 1254 + return -EINVAL; 1255 + } 1256 + 1257 + return 0; 1258 + } 1259 + 1260 + void bpf_jit_build_prologue(struct rv_jit_context *ctx) 1261 + { 1262 + /* Make space to save 9 registers: ra, fp, s1--s7. */ 1263 + int stack_adjust = 9 * sizeof(u32), store_offset, bpf_stack_adjust; 1264 + const s8 *fp = bpf2rv32[BPF_REG_FP]; 1265 + const s8 *r1 = bpf2rv32[BPF_REG_1]; 1266 + 1267 + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); 1268 + stack_adjust += bpf_stack_adjust; 1269 + 1270 + store_offset = stack_adjust - 4; 1271 + 1272 + stack_adjust += 4 * BPF_JIT_SCRATCH_REGS; 1273 + 1274 + /* 1275 + * The first instruction sets the tail-call-counter (TCC) register. 1276 + * This instruction is skipped by tail calls. 1277 + */ 1278 + emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); 1279 + 1280 + emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx); 1281 + 1282 + /* Save callee-save registers. */ 1283 + emit(rv_sw(RV_REG_SP, store_offset - 0, RV_REG_RA), ctx); 1284 + emit(rv_sw(RV_REG_SP, store_offset - 4, RV_REG_FP), ctx); 1285 + emit(rv_sw(RV_REG_SP, store_offset - 8, RV_REG_S1), ctx); 1286 + emit(rv_sw(RV_REG_SP, store_offset - 12, RV_REG_S2), ctx); 1287 + emit(rv_sw(RV_REG_SP, store_offset - 16, RV_REG_S3), ctx); 1288 + emit(rv_sw(RV_REG_SP, store_offset - 20, RV_REG_S4), ctx); 1289 + emit(rv_sw(RV_REG_SP, store_offset - 24, RV_REG_S5), ctx); 1290 + emit(rv_sw(RV_REG_SP, store_offset - 28, RV_REG_S6), ctx); 1291 + emit(rv_sw(RV_REG_SP, store_offset - 32, RV_REG_S7), ctx); 1292 + 1293 + /* Set fp: used as the base address for stacked BPF registers. */ 1294 + emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx); 1295 + 1296 + /* Set up BPF stack pointer. */ 1297 + emit(rv_addi(lo(fp), RV_REG_SP, bpf_stack_adjust), ctx); 1298 + emit(rv_addi(hi(fp), RV_REG_ZERO, 0), ctx); 1299 + 1300 + /* Set up context pointer. */ 1301 + emit(rv_addi(lo(r1), RV_REG_A0, 0), ctx); 1302 + emit(rv_addi(hi(r1), RV_REG_ZERO, 0), ctx); 1303 + 1304 + ctx->stack_size = stack_adjust; 1305 + } 1306 + 1307 + void bpf_jit_build_epilogue(struct rv_jit_context *ctx) 1308 + { 1309 + __build_epilogue(false, ctx); 1310 + }

+166

arch/riscv/net/bpf_jit_core.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* 3 + * Common functionality for RV32 and RV64 BPF JIT compilers 4 + * 5 + * Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com> 6 + * 7 + */ 8 + 9 + #include <linux/bpf.h> 10 + #include <linux/filter.h> 11 + #include "bpf_jit.h" 12 + 13 + /* Number of iterations to try until offsets converge. */ 14 + #define NR_JIT_ITERATIONS 16 15 + 16 + static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) 17 + { 18 + const struct bpf_prog *prog = ctx->prog; 19 + int i; 20 + 21 + for (i = 0; i < prog->len; i++) { 22 + const struct bpf_insn *insn = &prog->insnsi[i]; 23 + int ret; 24 + 25 + ret = bpf_jit_emit_insn(insn, ctx, extra_pass); 26 + /* BPF_LD | BPF_IMM | BPF_DW: skip the next instruction. */ 27 + if (ret > 0) 28 + i++; 29 + if (offset) 30 + offset[i] = ctx->ninsns; 31 + if (ret < 0) 32 + return ret; 33 + } 34 + return 0; 35 + } 36 + 37 + bool bpf_jit_needs_zext(void) 38 + { 39 + return true; 40 + } 41 + 42 + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 43 + { 44 + bool tmp_blinded = false, extra_pass = false; 45 + struct bpf_prog *tmp, *orig_prog = prog; 46 + int pass = 0, prev_ninsns = 0, i; 47 + struct rv_jit_data *jit_data; 48 + struct rv_jit_context *ctx; 49 + unsigned int image_size = 0; 50 + 51 + if (!prog->jit_requested) 52 + return orig_prog; 53 + 54 + tmp = bpf_jit_blind_constants(prog); 55 + if (IS_ERR(tmp)) 56 + return orig_prog; 57 + if (tmp != prog) { 58 + tmp_blinded = true; 59 + prog = tmp; 60 + } 61 + 62 + jit_data = prog->aux->jit_data; 63 + if (!jit_data) { 64 + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 65 + if (!jit_data) { 66 + prog = orig_prog; 67 + goto out; 68 + } 69 + prog->aux->jit_data = jit_data; 70 + } 71 + 72 + ctx = &jit_data->ctx; 73 + 74 + if (ctx->offset) { 75 + extra_pass = true; 76 + image_size = sizeof(u32) * ctx->ninsns; 77 + goto skip_init_ctx; 78 + } 79 + 80 + ctx->prog = prog; 81 + ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); 82 + if (!ctx->offset) { 83 + prog = orig_prog; 84 + goto out_offset; 85 + } 86 + for (i = 0; i < prog->len; i++) { 87 + prev_ninsns += 32; 88 + ctx->offset[i] = prev_ninsns; 89 + } 90 + 91 + for (i = 0; i < NR_JIT_ITERATIONS; i++) { 92 + pass++; 93 + ctx->ninsns = 0; 94 + if (build_body(ctx, extra_pass, ctx->offset)) { 95 + prog = orig_prog; 96 + goto out_offset; 97 + } 98 + bpf_jit_build_prologue(ctx); 99 + ctx->epilogue_offset = ctx->ninsns; 100 + bpf_jit_build_epilogue(ctx); 101 + 102 + if (ctx->ninsns == prev_ninsns) { 103 + if (jit_data->header) 104 + break; 105 + 106 + image_size = sizeof(u32) * ctx->ninsns; 107 + jit_data->header = 108 + bpf_jit_binary_alloc(image_size, 109 + &jit_data->image, 110 + sizeof(u32), 111 + bpf_fill_ill_insns); 112 + if (!jit_data->header) { 113 + prog = orig_prog; 114 + goto out_offset; 115 + } 116 + 117 + ctx->insns = (u32 *)jit_data->image; 118 + /* 119 + * Now, when the image is allocated, the image can 120 + * potentially shrink more (auipc/jalr -> jal). 121 + */ 122 + } 123 + prev_ninsns = ctx->ninsns; 124 + } 125 + 126 + if (i == NR_JIT_ITERATIONS) { 127 + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); 128 + bpf_jit_binary_free(jit_data->header); 129 + prog = orig_prog; 130 + goto out_offset; 131 + } 132 + 133 + skip_init_ctx: 134 + pass++; 135 + ctx->ninsns = 0; 136 + 137 + bpf_jit_build_prologue(ctx); 138 + if (build_body(ctx, extra_pass, NULL)) { 139 + bpf_jit_binary_free(jit_data->header); 140 + prog = orig_prog; 141 + goto out_offset; 142 + } 143 + bpf_jit_build_epilogue(ctx); 144 + 145 + if (bpf_jit_enable > 1) 146 + bpf_jit_dump(prog->len, image_size, pass, ctx->insns); 147 + 148 + prog->bpf_func = (void *)ctx->insns; 149 + prog->jited = 1; 150 + prog->jited_len = image_size; 151 + 152 + bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); 153 + 154 + if (!prog->is_func || extra_pass) { 155 + out_offset: 156 + kfree(ctx->offset); 157 + kfree(jit_data); 158 + prog->aux->jit_data = NULL; 159 + } 160 + out: 161 + 162 + if (tmp_blinded) 163 + bpf_jit_prog_release_other(prog, prog == orig_prog ? 164 + tmp : orig_prog); 165 + return prog; 166 + }

+9 -5

arch/x86/mm/init_32.c

··· 238 238 } 239 239 } 240 240 241 - static inline int is_kernel_text(unsigned long addr) 241 + /* 242 + * The <linux/kallsyms.h> already defines is_kernel_text, 243 + * using '__' prefix not to get in conflict. 244 + */ 245 + static inline int __is_kernel_text(unsigned long addr) 242 246 { 243 247 if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) 244 248 return 1; ··· 332 328 addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + 333 329 PAGE_OFFSET + PAGE_SIZE-1; 334 330 335 - if (is_kernel_text(addr) || 336 - is_kernel_text(addr2)) 331 + if (__is_kernel_text(addr) || 332 + __is_kernel_text(addr2)) 337 333 prot = PAGE_KERNEL_LARGE_EXEC; 338 334 339 335 pages_2m++; ··· 358 354 */ 359 355 pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR); 360 356 361 - if (is_kernel_text(addr)) 357 + if (__is_kernel_text(addr)) 362 358 prot = PAGE_KERNEL_EXEC; 363 359 364 360 pages_4k++; ··· 885 881 */ 886 882 unsigned long start = PFN_ALIGN(_etext); 887 883 /* 888 - * This comes from is_kernel_text upper limit. Also HPAGE where used: 884 + * This comes from __is_kernel_text upper limit. Also HPAGE where used: 889 885 */ 890 886 unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; 891 887

+186 -76

arch/x86/net/bpf_jit_comp.c

··· 1361 1361 -(stack_size - i * 8)); 1362 1362 } 1363 1363 1364 - static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, 1365 - struct bpf_prog **progs, int prog_cnt, int stack_size) 1364 + static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, 1365 + struct bpf_prog *p, int stack_size, bool mod_ret) 1366 1366 { 1367 1367 u8 *prog = *pprog; 1368 - int cnt = 0, i; 1368 + int cnt = 0; 1369 1369 1370 - for (i = 0; i < prog_cnt; i++) { 1371 - if (emit_call(&prog, __bpf_prog_enter, prog)) 1372 - return -EINVAL; 1373 - /* remember prog start time returned by __bpf_prog_enter */ 1374 - emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); 1370 + if (emit_call(&prog, __bpf_prog_enter, prog)) 1371 + return -EINVAL; 1372 + /* remember prog start time returned by __bpf_prog_enter */ 1373 + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); 1375 1374 1376 - /* arg1: lea rdi, [rbp - stack_size] */ 1377 - EMIT4(0x48, 0x8D, 0x7D, -stack_size); 1378 - /* arg2: progs[i]->insnsi for interpreter */ 1379 - if (!progs[i]->jited) 1380 - emit_mov_imm64(&prog, BPF_REG_2, 1381 - (long) progs[i]->insnsi >> 32, 1382 - (u32) (long) progs[i]->insnsi); 1383 - /* call JITed bpf program or interpreter */ 1384 - if (emit_call(&prog, progs[i]->bpf_func, prog)) 1385 - return -EINVAL; 1375 + /* arg1: lea rdi, [rbp - stack_size] */ 1376 + EMIT4(0x48, 0x8D, 0x7D, -stack_size); 1377 + /* arg2: progs[i]->insnsi for interpreter */ 1378 + if (!p->jited) 1379 + emit_mov_imm64(&prog, BPF_REG_2, 1380 + (long) p->insnsi >> 32, 1381 + (u32) (long) p->insnsi); 1382 + /* call JITed bpf program or interpreter */ 1383 + if (emit_call(&prog, p->bpf_func, prog)) 1384 + return -EINVAL; 1386 1385 1387 - /* arg1: mov rdi, progs[i] */ 1388 - emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32, 1389 - (u32) (long) progs[i]); 1390 - /* arg2: mov rsi, rbx <- start time in nsec */ 1391 - emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); 1392 - if (emit_call(&prog, __bpf_prog_exit, prog)) 1386 + /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return 1387 + * of the previous call which is then passed on the stack to 1388 + * the next BPF program. 1389 + */ 1390 + if (mod_ret) 1391 + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 1392 + 1393 + /* arg1: mov rdi, progs[i] */ 1394 + emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, 1395 + (u32) (long) p); 1396 + /* arg2: mov rsi, rbx <- start time in nsec */ 1397 + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); 1398 + if (emit_call(&prog, __bpf_prog_exit, prog)) 1399 + return -EINVAL; 1400 + 1401 + *pprog = prog; 1402 + return 0; 1403 + } 1404 + 1405 + static void emit_nops(u8 **pprog, unsigned int len) 1406 + { 1407 + unsigned int i, noplen; 1408 + u8 *prog = *pprog; 1409 + int cnt = 0; 1410 + 1411 + while (len > 0) { 1412 + noplen = len; 1413 + 1414 + if (noplen > ASM_NOP_MAX) 1415 + noplen = ASM_NOP_MAX; 1416 + 1417 + for (i = 0; i < noplen; i++) 1418 + EMIT1(ideal_nops[noplen][i]); 1419 + len -= noplen; 1420 + } 1421 + 1422 + *pprog = prog; 1423 + } 1424 + 1425 + static void emit_align(u8 **pprog, u32 align) 1426 + { 1427 + u8 *target, *prog = *pprog; 1428 + 1429 + target = PTR_ALIGN(prog, align); 1430 + if (target != prog) 1431 + emit_nops(&prog, target - prog); 1432 + 1433 + *pprog = prog; 1434 + } 1435 + 1436 + static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) 1437 + { 1438 + u8 *prog = *pprog; 1439 + int cnt = 0; 1440 + s64 offset; 1441 + 1442 + offset = func - (ip + 2 + 4); 1443 + if (!is_simm32(offset)) { 1444 + pr_err("Target %p is out of range\n", func); 1445 + return -EINVAL; 1446 + } 1447 + EMIT2_off32(0x0F, jmp_cond + 0x10, offset); 1448 + *pprog = prog; 1449 + return 0; 1450 + } 1451 + 1452 + static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, 1453 + struct bpf_tramp_progs *tp, int stack_size) 1454 + { 1455 + int i; 1456 + u8 *prog = *pprog; 1457 + 1458 + for (i = 0; i < tp->nr_progs; i++) { 1459 + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) 1393 1460 return -EINVAL; 1394 1461 } 1462 + *pprog = prog; 1463 + return 0; 1464 + } 1465 + 1466 + static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, 1467 + struct bpf_tramp_progs *tp, int stack_size, 1468 + u8 **branches) 1469 + { 1470 + u8 *prog = *pprog; 1471 + int i, cnt = 0; 1472 + 1473 + /* The first fmod_ret program will receive a garbage return value. 1474 + * Set this to 0 to avoid confusing the program. 1475 + */ 1476 + emit_mov_imm32(&prog, false, BPF_REG_0, 0); 1477 + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 1478 + for (i = 0; i < tp->nr_progs; i++) { 1479 + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) 1480 + return -EINVAL; 1481 + 1482 + /* mod_ret prog stored return value into [rbp - 8]. Emit: 1483 + * if (*(u64 *)(rbp - 8) != 0) 1484 + * goto do_fexit; 1485 + */ 1486 + /* cmp QWORD PTR [rbp - 0x8], 0x0 */ 1487 + EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00); 1488 + 1489 + /* Save the location of the branch and Generate 6 nops 1490 + * (4 bytes for an offset and 2 bytes for the jump) These nops 1491 + * are replaced with a conditional jump once do_fexit (i.e. the 1492 + * start of the fexit invocation) is finalized. 1493 + */ 1494 + branches[i] = prog; 1495 + emit_nops(&prog, 4 + 2); 1496 + } 1497 + 1395 1498 *pprog = prog; 1396 1499 return 0; 1397 1500 } ··· 1561 1458 */ 1562 1459 int arch_prepare_bpf_trampoline(void *image, void *image_end, 1563 1460 const struct btf_func_model *m, u32 flags, 1564 - struct bpf_prog **fentry_progs, int fentry_cnt, 1565 - struct bpf_prog **fexit_progs, int fexit_cnt, 1461 + struct bpf_tramp_progs *tprogs, 1566 1462 void *orig_call) 1567 1463 { 1568 - int cnt = 0, nr_args = m->nr_args; 1464 + int ret, i, cnt = 0, nr_args = m->nr_args; 1569 1465 int stack_size = nr_args * 8; 1466 + struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; 1467 + struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; 1468 + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; 1469 + u8 **branches = NULL; 1570 1470 u8 *prog; 1571 1471 1572 1472 /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ ··· 1598 1492 1599 1493 save_regs(m, &prog, nr_args, stack_size); 1600 1494 1601 - if (fentry_cnt) 1602 - if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size)) 1495 + if (fentry->nr_progs) 1496 + if (invoke_bpf(m, &prog, fentry, stack_size)) 1603 1497 return -EINVAL; 1604 1498 1499 + if (fmod_ret->nr_progs) { 1500 + branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), 1501 + GFP_KERNEL); 1502 + if (!branches) 1503 + return -ENOMEM; 1504 + 1505 + if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size, 1506 + branches)) { 1507 + ret = -EINVAL; 1508 + goto cleanup; 1509 + } 1510 + } 1511 + 1605 1512 if (flags & BPF_TRAMP_F_CALL_ORIG) { 1606 - if (fentry_cnt) 1513 + if (fentry->nr_progs || fmod_ret->nr_progs) 1607 1514 restore_regs(m, &prog, nr_args, stack_size); 1608 1515 1609 1516 /* call original function */ 1610 - if (emit_call(&prog, orig_call, prog)) 1611 - return -EINVAL; 1517 + if (emit_call(&prog, orig_call, prog)) { 1518 + ret = -EINVAL; 1519 + goto cleanup; 1520 + } 1612 1521 /* remember return value in a stack for bpf prog to access */ 1613 1522 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 1614 1523 } 1615 1524 1616 - if (fexit_cnt) 1617 - if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size)) 1618 - return -EINVAL; 1525 + if (fmod_ret->nr_progs) { 1526 + /* From Intel 64 and IA-32 Architectures Optimization 1527 + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler 1528 + * Coding Rule 11: All branch targets should be 16-byte 1529 + * aligned. 1530 + */ 1531 + emit_align(&prog, 16); 1532 + /* Update the branches saved in invoke_bpf_mod_ret with the 1533 + * aligned address of do_fexit. 1534 + */ 1535 + for (i = 0; i < fmod_ret->nr_progs; i++) 1536 + emit_cond_near_jump(&branches[i], prog, branches[i], 1537 + X86_JNE); 1538 + } 1539 + 1540 + if (fexit->nr_progs) 1541 + if (invoke_bpf(m, &prog, fexit, stack_size)) { 1542 + ret = -EINVAL; 1543 + goto cleanup; 1544 + } 1619 1545 1620 1546 if (flags & BPF_TRAMP_F_RESTORE_REGS) 1621 1547 restore_regs(m, &prog, nr_args, stack_size); 1622 1548 1549 + /* This needs to be done regardless. If there were fmod_ret programs, 1550 + * the return value is only updated on the stack and still needs to be 1551 + * restored to R0. 1552 + */ 1623 1553 if (flags & BPF_TRAMP_F_CALL_ORIG) 1624 1554 /* restore original return value back into RAX */ 1625 1555 emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); ··· 1667 1525 EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ 1668 1526 EMIT1(0xC3); /* ret */ 1669 1527 /* Make sure the trampoline generation logic doesn't overflow */ 1670 - if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) 1671 - return -EFAULT; 1672 - return prog - (u8 *)image; 1673 - } 1674 - 1675 - static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) 1676 - { 1677 - u8 *prog = *pprog; 1678 - int cnt = 0; 1679 - s64 offset; 1680 - 1681 - offset = func - (ip + 2 + 4); 1682 - if (!is_simm32(offset)) { 1683 - pr_err("Target %p is out of range\n", func); 1684 - return -EINVAL; 1528 + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { 1529 + ret = -EFAULT; 1530 + goto cleanup; 1685 1531 } 1686 - EMIT2_off32(0x0F, jmp_cond + 0x10, offset); 1687 - *pprog = prog; 1688 - return 0; 1689 - } 1532 + ret = prog - (u8 *)image; 1690 1533 1691 - static void emit_nops(u8 **pprog, unsigned int len) 1692 - { 1693 - unsigned int i, noplen; 1694 - u8 *prog = *pprog; 1695 - int cnt = 0; 1696 - 1697 - while (len > 0) { 1698 - noplen = len; 1699 - 1700 - if (noplen > ASM_NOP_MAX) 1701 - noplen = ASM_NOP_MAX; 1702 - 1703 - for (i = 0; i < noplen; i++) 1704 - EMIT1(ideal_nops[noplen][i]); 1705 - len -= noplen; 1706 - } 1707 - 1708 - *pprog = prog; 1534 + cleanup: 1535 + kfree(branches); 1536 + return ret; 1709 1537 } 1710 1538 1711 1539 static int emit_fallback_jump(u8 **pprog) ··· 1700 1588 1701 1589 static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) 1702 1590 { 1703 - u8 *jg_reloc, *jg_target, *prog = *pprog; 1591 + u8 *jg_reloc, *prog = *pprog; 1704 1592 int pivot, err, jg_bytes = 1, cnt = 0; 1705 1593 s64 jg_offset; 1706 1594 ··· 1755 1643 * Coding Rule 11: All branch targets should be 16-byte 1756 1644 * aligned. 1757 1645 */ 1758 - jg_target = PTR_ALIGN(prog, 16); 1759 - if (jg_target != prog) 1760 - emit_nops(&prog, jg_target - prog); 1646 + emit_align(&prog, 16); 1761 1647 jg_offset = prog - jg_reloc; 1762 1648 emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); 1763 1649

+14

fs/nsfs.c

··· 247 247 return ERR_PTR(-EINVAL); 248 248 } 249 249 250 + /** 251 + * ns_match() - Returns true if current namespace matches dev/ino provided. 252 + * @ns_common: current ns 253 + * @dev: dev_t from nsfs that will be matched against current nsfs 254 + * @ino: ino_t from nsfs that will be matched against current nsfs 255 + * 256 + * Return: true if dev and ino matches the current nsfs. 257 + */ 258 + bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino) 259 + { 260 + return (ns->inum == ino) && (nsfs_mnt->mnt_sb->s_dev == dev); 261 + } 262 + 263 + 250 264 static int nsfs_show_path(struct seq_file *seq, struct dentry *dentry) 251 265 { 252 266 struct inode *inode = d_inode(dentry);

+81 -28

include/linux/bpf.h

··· 18 18 #include <linux/refcount.h> 19 19 #include <linux/mutex.h> 20 20 #include <linux/module.h> 21 + #include <linux/kallsyms.h> 21 22 22 23 struct bpf_verifier_env; 23 24 struct bpf_verifier_log; ··· 434 433 */ 435 434 #define BPF_TRAMP_F_SKIP_FRAME BIT(2) 436 435 436 + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 437 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 438 + */ 439 + #define BPF_MAX_TRAMP_PROGS 40 440 + 441 + struct bpf_tramp_progs { 442 + struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS]; 443 + int nr_progs; 444 + }; 445 + 437 446 /* Different use cases for BPF trampoline: 438 447 * 1. replace nop at the function entry (kprobe equivalent) 439 448 * flags = BPF_TRAMP_F_RESTORE_REGS ··· 466 455 */ 467 456 int arch_prepare_bpf_trampoline(void *image, void *image_end, 468 457 const struct btf_func_model *m, u32 flags, 469 - struct bpf_prog **fentry_progs, int fentry_cnt, 470 - struct bpf_prog **fexit_progs, int fexit_cnt, 458 + struct bpf_tramp_progs *tprogs, 471 459 void *orig_call); 472 460 /* these two functions are called from generated trampoline */ 473 461 u64 notrace __bpf_prog_enter(void); 474 462 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); 475 463 464 + struct bpf_ksym { 465 + unsigned long start; 466 + unsigned long end; 467 + char name[KSYM_NAME_LEN]; 468 + struct list_head lnode; 469 + struct latch_tree_node tnode; 470 + bool prog; 471 + }; 472 + 476 473 enum bpf_tramp_prog_type { 477 474 BPF_TRAMP_FENTRY, 478 475 BPF_TRAMP_FEXIT, 476 + BPF_TRAMP_MODIFY_RETURN, 479 477 BPF_TRAMP_MAX, 480 478 BPF_TRAMP_REPLACE, /* more than MAX */ 481 479 }; ··· 513 493 /* Executable image of trampoline */ 514 494 void *image; 515 495 u64 selector; 496 + struct bpf_ksym ksym; 516 497 }; 517 498 518 499 #define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ ··· 531 510 int num_progs; 532 511 void *image; 533 512 u32 image_off; 513 + struct bpf_ksym ksym; 534 514 }; 535 515 536 - static __always_inline unsigned int bpf_dispatcher_nopfunc( 516 + static __always_inline unsigned int bpf_dispatcher_nop_func( 537 517 const void *ctx, 538 518 const struct bpf_insn *insnsi, 539 519 unsigned int (*bpf_func)(const void *, ··· 547 525 int bpf_trampoline_link_prog(struct bpf_prog *prog); 548 526 int bpf_trampoline_unlink_prog(struct bpf_prog *prog); 549 527 void bpf_trampoline_put(struct bpf_trampoline *tr); 550 - #define BPF_DISPATCHER_INIT(name) { \ 551 - .mutex = __MUTEX_INITIALIZER(name.mutex), \ 552 - .func = &name##func, \ 553 - .progs = {}, \ 554 - .num_progs = 0, \ 555 - .image = NULL, \ 556 - .image_off = 0 \ 528 + #define BPF_DISPATCHER_INIT(_name) { \ 529 + .mutex = __MUTEX_INITIALIZER(_name.mutex), \ 530 + .func = &_name##_func, \ 531 + .progs = {}, \ 532 + .num_progs = 0, \ 533 + .image = NULL, \ 534 + .image_off = 0, \ 535 + .ksym = { \ 536 + .name = #_name, \ 537 + .lnode = LIST_HEAD_INIT(_name.ksym.lnode), \ 538 + }, \ 557 539 } 558 540 559 541 #define DEFINE_BPF_DISPATCHER(name) \ 560 - noinline unsigned int name##func( \ 542 + noinline unsigned int bpf_dispatcher_##name##_func( \ 561 543 const void *ctx, \ 562 544 const struct bpf_insn *insnsi, \ 563 545 unsigned int (*bpf_func)(const void *, \ ··· 569 543 { \ 570 544 return bpf_func(ctx, insnsi); \ 571 545 } \ 572 - EXPORT_SYMBOL(name##func); \ 573 - struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); 546 + EXPORT_SYMBOL(bpf_dispatcher_##name##_func); \ 547 + struct bpf_dispatcher bpf_dispatcher_##name = \ 548 + BPF_DISPATCHER_INIT(bpf_dispatcher_##name); 574 549 #define DECLARE_BPF_DISPATCHER(name) \ 575 - unsigned int name##func( \ 550 + unsigned int bpf_dispatcher_##name##_func( \ 576 551 const void *ctx, \ 577 552 const struct bpf_insn *insnsi, \ 578 553 unsigned int (*bpf_func)(const void *, \ 579 554 const struct bpf_insn *)); \ 580 - extern struct bpf_dispatcher name; 581 - #define BPF_DISPATCHER_FUNC(name) name##func 582 - #define BPF_DISPATCHER_PTR(name) (&name) 555 + extern struct bpf_dispatcher bpf_dispatcher_##name; 556 + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_##name##_func 557 + #define BPF_DISPATCHER_PTR(name) (&bpf_dispatcher_##name) 583 558 void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, 584 559 struct bpf_prog *to); 585 - struct bpf_image { 586 - struct latch_tree_node tnode; 587 - unsigned char data[]; 588 - }; 589 - #define BPF_IMAGE_SIZE (PAGE_SIZE - sizeof(struct bpf_image)) 590 - bool is_bpf_image_address(unsigned long address); 591 - void *bpf_image_alloc(void); 560 + /* Called only from JIT-enabled code, so there's no need for stubs. */ 561 + void *bpf_jit_alloc_exec_page(void); 562 + void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); 563 + void bpf_image_ksym_del(struct bpf_ksym *ksym); 564 + void bpf_ksym_add(struct bpf_ksym *ksym); 565 + void bpf_ksym_del(struct bpf_ksym *ksym); 592 566 #else 593 567 static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) 594 568 { ··· 605 579 static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} 606 580 #define DEFINE_BPF_DISPATCHER(name) 607 581 #define DECLARE_BPF_DISPATCHER(name) 608 - #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc 582 + #define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nop_func 609 583 #define BPF_DISPATCHER_PTR(name) NULL 610 584 static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, 611 585 struct bpf_prog *from, ··· 666 640 void *jit_data; /* JIT specific data. arch dependent */ 667 641 struct bpf_jit_poke_descriptor *poke_tab; 668 642 u32 size_poke_tab; 669 - struct latch_tree_node ksym_tnode; 670 - struct list_head ksym_lnode; 643 + struct bpf_ksym ksym; 671 644 const struct bpf_prog_ops *ops; 672 645 struct bpf_map **used_maps; 673 646 struct bpf_prog *prog; ··· 1081 1056 int bpf_map_new_fd(struct bpf_map *map, int flags); 1082 1057 int bpf_prog_new_fd(struct bpf_prog *prog); 1083 1058 1059 + struct bpf_link; 1060 + 1061 + struct bpf_link_ops { 1062 + void (*release)(struct bpf_link *link); 1063 + void (*dealloc)(struct bpf_link *link); 1064 + }; 1065 + 1066 + void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, 1067 + struct bpf_prog *prog); 1068 + void bpf_link_inc(struct bpf_link *link); 1069 + void bpf_link_put(struct bpf_link *link); 1070 + int bpf_link_new_fd(struct bpf_link *link); 1071 + struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd); 1072 + struct bpf_link *bpf_link_get_from_fd(u32 ufd); 1073 + 1084 1074 int bpf_obj_pin_user(u32 ufd, const char __user *pathname); 1085 1075 int bpf_obj_get_user(const char __user *pathname, int flags); 1086 1076 ··· 1173 1133 union bpf_attr __user *uattr); 1174 1134 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, 1175 1135 union bpf_attr __user *uattr); 1136 + int bpf_prog_test_run_tracing(struct bpf_prog *prog, 1137 + const union bpf_attr *kattr, 1138 + union bpf_attr __user *uattr); 1176 1139 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, 1177 1140 const union bpf_attr *kattr, 1178 1141 union bpf_attr __user *uattr); ··· 1333 1290 return -ENOTSUPP; 1334 1291 } 1335 1292 1293 + static inline int bpf_prog_test_run_tracing(struct bpf_prog *prog, 1294 + const union bpf_attr *kattr, 1295 + union bpf_attr __user *uattr) 1296 + { 1297 + return -ENOTSUPP; 1298 + } 1299 + 1336 1300 static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, 1337 1301 const union bpf_attr *kattr, 1338 1302 union bpf_attr __user *uattr) ··· 1436 1386 #if defined(CONFIG_BPF_STREAM_PARSER) 1437 1387 int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which); 1438 1388 int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); 1389 + void sock_map_unhash(struct sock *sk); 1390 + void sock_map_close(struct sock *sk, long timeout); 1439 1391 #else 1440 1392 static inline int sock_map_prog_update(struct bpf_map *map, 1441 1393 struct bpf_prog *prog, u32 which) ··· 1450 1398 { 1451 1399 return -EINVAL; 1452 1400 } 1453 - #endif 1401 + #endif /* CONFIG_BPF_STREAM_PARSER */ 1454 1402 1455 1403 #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) 1456 1404 void bpf_sk_reuseport_detach(struct sock *sk); ··· 1511 1459 extern const struct bpf_func_proto bpf_strtoul_proto; 1512 1460 extern const struct bpf_func_proto bpf_tcp_sock_proto; 1513 1461 extern const struct bpf_func_proto bpf_jiffies64_proto; 1462 + extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; 1514 1463 1515 1464 /* Shared helpers among cBPF and eBPF. */ 1516 1465 void bpf_user_rnd_init_once(void);

+4 -11

include/linux/filter.h

··· 577 577 ret; }) 578 578 579 579 #define BPF_PROG_RUN(prog, ctx) \ 580 - __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc) 580 + __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func) 581 581 582 582 /* 583 583 * Use in preemptible and therefore migratable context to make sure that ··· 596 596 u32 ret; 597 597 598 598 migrate_disable(); 599 - ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc); 599 + ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nop_func); 600 600 migrate_enable(); 601 601 return ret; 602 602 } ··· 722 722 return res; 723 723 } 724 724 725 - DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) 725 + DECLARE_BPF_DISPATCHER(xdp) 726 726 727 727 static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, 728 728 struct xdp_buff *xdp) ··· 733 733 * already takes rcu_read_lock() when fetching the program, so 734 734 * it's not necessary here anymore. 735 735 */ 736 - return __BPF_PROG_RUN(prog, xdp, 737 - BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); 736 + return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); 738 737 } 739 738 740 739 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); ··· 1083 1084 1084 1085 void bpf_prog_kallsyms_add(struct bpf_prog *fp); 1085 1086 void bpf_prog_kallsyms_del(struct bpf_prog *fp); 1086 - void bpf_get_prog_name(const struct bpf_prog *prog, char *sym); 1087 1087 1088 1088 #else /* CONFIG_BPF_JIT */ 1089 1089 ··· 1149 1151 1150 1152 static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) 1151 1153 { 1152 - } 1153 - 1154 - static inline void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) 1155 - { 1156 - sym[0] = '\0'; 1157 1154 } 1158 1155 1159 1156 #endif /* CONFIG_BPF_JIT */

+2

include/linux/proc_ns.h

··· 85 85 extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb, 86 86 void *private_data); 87 87 88 + extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino); 89 + 88 90 extern int ns_get_name(char *buf, size_t size, struct task_struct *task, 89 91 const struct proc_ns_operations *ns_ops); 90 92 extern void nsfs_init(void);

+23 -33

include/linux/skmsg.h

··· 323 323 } 324 324 325 325 struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock); 326 - #if defined(CONFIG_BPF_STREAM_PARSER) 327 - void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link); 328 - #else 329 - static inline void sk_psock_unlink(struct sock *sk, 330 - struct sk_psock_link *link) 331 - { 332 - } 333 - #endif 334 326 335 327 void __sk_psock_purge_ingress_msg(struct sk_psock *psock); 336 328 ··· 339 347 struct sk_psock *psock, 340 348 struct proto *ops) 341 349 { 342 - psock->saved_unhash = sk->sk_prot->unhash; 343 - psock->saved_close = sk->sk_prot->close; 344 - psock->saved_write_space = sk->sk_write_space; 350 + /* Initialize saved callbacks and original proto only once, since this 351 + * function may be called multiple times for a psock, e.g. when 352 + * psock->progs.msg_parser is updated. 353 + * 354 + * Since we've not installed the new proto, psock is not yet in use and 355 + * we can initialize it without synchronization. 356 + */ 357 + if (!psock->sk_proto) { 358 + struct proto *orig = READ_ONCE(sk->sk_prot); 345 359 346 - psock->sk_proto = sk->sk_prot; 360 + psock->saved_unhash = orig->unhash; 361 + psock->saved_close = orig->close; 362 + psock->saved_write_space = sk->sk_write_space; 363 + 364 + psock->sk_proto = orig; 365 + } 366 + 347 367 /* Pairs with lockless read in sk_clone_lock() */ 348 368 WRITE_ONCE(sk->sk_prot, ops); 349 369 } ··· 364 360 struct sk_psock *psock) 365 361 { 366 362 sk->sk_prot->unhash = psock->saved_unhash; 367 - tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); 363 + if (inet_csk_has_ulp(sk)) { 364 + tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space); 365 + } else { 366 + sk->sk_write_space = psock->saved_write_space; 367 + /* Pairs with lockless read in sk_clone_lock() */ 368 + WRITE_ONCE(sk->sk_prot, psock->sk_proto); 369 + } 368 370 } 369 371 370 372 static inline void sk_psock_set_state(struct sk_psock *psock, ··· 389 379 enum sk_psock_state_bits bit) 390 380 { 391 381 return test_bit(bit, &psock->state); 392 - } 393 - 394 - static inline struct sk_psock *sk_psock_get_checked(struct sock *sk) 395 - { 396 - struct sk_psock *psock; 397 - 398 - rcu_read_lock(); 399 - psock = sk_psock(sk); 400 - if (psock) { 401 - if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) { 402 - psock = ERR_PTR(-EBUSY); 403 - goto out; 404 - } 405 - 406 - if (!refcount_inc_not_zero(&psock->refcnt)) 407 - psock = ERR_PTR(-EBUSY); 408 - } 409 - out: 410 - rcu_read_unlock(); 411 - return psock; 412 382 } 413 383 414 384 static inline struct sk_psock *sk_psock_get(struct sock *sk)

+6

include/net/inet_connection_sock.h

··· 335 335 if (icsk->icsk_ack.pingpong < U8_MAX) 336 336 icsk->icsk_ack.pingpong++; 337 337 } 338 + 339 + static inline bool inet_csk_has_ulp(struct sock *sk) 340 + { 341 + return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops; 342 + } 343 + 338 344 #endif /* _INET_CONNECTION_SOCK_H */

+11 -9

include/net/tcp.h

··· 2195 2195 struct sk_msg; 2196 2196 struct sk_psock; 2197 2197 2198 - int tcp_bpf_init(struct sock *sk); 2199 - void tcp_bpf_reinit(struct sock *sk); 2198 + #ifdef CONFIG_BPF_STREAM_PARSER 2199 + struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); 2200 + void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); 2201 + #else 2202 + static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) 2203 + { 2204 + } 2205 + #endif /* CONFIG_BPF_STREAM_PARSER */ 2206 + 2207 + #ifdef CONFIG_NET_SOCK_MSG 2200 2208 int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, 2201 2209 int flags); 2202 2210 int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, 2203 2211 int nonblock, int flags, int *addr_len); 2204 2212 int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock, 2205 2213 struct msghdr *msg, int len, int flags); 2206 - #ifdef CONFIG_NET_SOCK_MSG 2207 - void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); 2208 - #else 2209 - static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk) 2210 - { 2211 - } 2212 - #endif 2214 + #endif /* CONFIG_NET_SOCK_MSG */ 2213 2215 2214 2216 /* Call BPF_SOCK_OPS program that returns an int. If the return value 2215 2217 * is < 0, then the BPF op failed (for example if the loaded BPF

+5

include/net/udp.h

··· 503 503 return segs; 504 504 } 505 505 506 + #ifdef CONFIG_BPF_STREAM_PARSER 507 + struct sk_psock; 508 + struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock); 509 + #endif /* BPF_STREAM_PARSER */ 510 + 506 511 #endif /* _UDP_H */

+11 -7

include/trace/bpf_probe.h

··· 75 75 check_trace_callback_type_##call(__bpf_trace_##template); \ 76 76 } \ 77 77 typedef void (*btf_trace_##call)(void *__data, proto); \ 78 - static struct bpf_raw_event_map __used \ 79 - __attribute__((section("__bpf_raw_tp_map"))) \ 80 - __bpf_trace_tp_map_##call = { \ 81 - .tp = &__tracepoint_##call, \ 82 - .bpf_func = (void *)(btf_trace_##call)__bpf_trace_##template, \ 83 - .num_args = COUNT_ARGS(args), \ 84 - .writable_size = size, \ 78 + static union { \ 79 + struct bpf_raw_event_map event; \ 80 + btf_trace_##call handler; \ 81 + } __bpf_trace_tp_map_##call __used \ 82 + __attribute__((section("__bpf_raw_tp_map"))) = { \ 83 + .event = { \ 84 + .tp = &__tracepoint_##call, \ 85 + .bpf_func = __bpf_trace_##template, \ 86 + .num_args = COUNT_ARGS(args), \ 87 + .writable_size = size, \ 88 + }, \ 85 89 }; 86 90 87 91 #define FIRST(x, ...) x

+154 -67

include/uapi/linux/bpf.h

··· 210 210 BPF_TRACE_RAW_TP, 211 211 BPF_TRACE_FENTRY, 212 212 BPF_TRACE_FEXIT, 213 + BPF_MODIFY_RETURN, 213 214 __MAX_BPF_ATTACH_TYPE 214 215 }; 215 216 ··· 326 325 #define BPF_PSEUDO_CALL 1 327 326 328 327 /* flags for BPF_MAP_UPDATE_ELEM command */ 329 - #define BPF_ANY 0 /* create new element or update existing */ 330 - #define BPF_NOEXIST 1 /* create new element if it didn't exist */ 331 - #define BPF_EXIST 2 /* update existing element */ 332 - #define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ 328 + enum { 329 + BPF_ANY = 0, /* create new element or update existing */ 330 + BPF_NOEXIST = 1, /* create new element if it didn't exist */ 331 + BPF_EXIST = 2, /* update existing element */ 332 + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ 333 + }; 333 334 334 335 /* flags for BPF_MAP_CREATE command */ 335 - #define BPF_F_NO_PREALLOC (1U << 0) 336 + enum { 337 + BPF_F_NO_PREALLOC = (1U << 0), 336 338 /* Instead of having one common LRU list in the 337 339 * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list 338 340 * which can scale and perform better. 339 341 * Note, the LRU nodes (including free nodes) cannot be moved 340 342 * across different LRU lists. 341 343 */ 342 - #define BPF_F_NO_COMMON_LRU (1U << 1) 344 + BPF_F_NO_COMMON_LRU = (1U << 1), 343 345 /* Specify numa node during map creation */ 344 - #define BPF_F_NUMA_NODE (1U << 2) 345 - 346 - #define BPF_OBJ_NAME_LEN 16U 346 + BPF_F_NUMA_NODE = (1U << 2), 347 347 348 348 /* Flags for accessing BPF object from syscall side. */ 349 - #define BPF_F_RDONLY (1U << 3) 350 - #define BPF_F_WRONLY (1U << 4) 349 + BPF_F_RDONLY = (1U << 3), 350 + BPF_F_WRONLY = (1U << 4), 351 351 352 352 /* Flag for stack_map, store build_id+offset instead of pointer */ 353 - #define BPF_F_STACK_BUILD_ID (1U << 5) 353 + BPF_F_STACK_BUILD_ID = (1U << 5), 354 354 355 355 /* Zero-initialize hash function seed. This should only be used for testing. */ 356 - #define BPF_F_ZERO_SEED (1U << 6) 356 + BPF_F_ZERO_SEED = (1U << 6), 357 357 358 358 /* Flags for accessing BPF object from program side. */ 359 - #define BPF_F_RDONLY_PROG (1U << 7) 360 - #define BPF_F_WRONLY_PROG (1U << 8) 359 + BPF_F_RDONLY_PROG = (1U << 7), 360 + BPF_F_WRONLY_PROG = (1U << 8), 361 361 362 362 /* Clone map from listener for newly accepted socket */ 363 - #define BPF_F_CLONE (1U << 9) 363 + BPF_F_CLONE = (1U << 9), 364 364 365 365 /* Enable memory-mapping BPF map */ 366 - #define BPF_F_MMAPABLE (1U << 10) 366 + BPF_F_MMAPABLE = (1U << 10), 367 + }; 367 368 368 369 /* Flags for BPF_PROG_QUERY. */ 369 370 ··· 393 390 __u64 ip; 394 391 }; 395 392 }; 393 + 394 + #define BPF_OBJ_NAME_LEN 16U 396 395 397 396 union bpf_attr { 398 397 struct { /* anonymous struct used by BPF_MAP_CREATE command */ ··· 2914 2909 * of sizeof(struct perf_branch_entry). 2915 2910 * 2916 2911 * **-ENOENT** if architecture does not support branch records. 2912 + * 2913 + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) 2914 + * Description 2915 + * Returns 0 on success, values for *pid* and *tgid* as seen from the current 2916 + * *namespace* will be returned in *nsdata*. 2917 + * 2918 + * On failure, the returned value is one of the following: 2919 + * 2920 + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number 2921 + * with nsfs of current task, or if dev conversion to dev_t lost high bits. 2922 + * 2923 + * **-ENOENT** if pidns does not exists for the current task. 2924 + * 2925 + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) 2926 + * Description 2927 + * Write raw *data* blob into a special BPF perf event held by 2928 + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf 2929 + * event must have the following attributes: **PERF_SAMPLE_RAW** 2930 + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and 2931 + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. 2932 + * 2933 + * The *flags* are used to indicate the index in *map* for which 2934 + * the value must be put, masked with **BPF_F_INDEX_MASK**. 2935 + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** 2936 + * to indicate that the index of the current CPU core should be 2937 + * used. 2938 + * 2939 + * The value to write, of *size*, is passed through eBPF stack and 2940 + * pointed by *data*. 2941 + * 2942 + * *ctx* is a pointer to in-kernel struct xdp_buff. 2943 + * 2944 + * This helper is similar to **bpf_perf_eventoutput**\ () but 2945 + * restricted to raw_tracepoint bpf programs. 2946 + * Return 2947 + * 0 on success, or a negative error in case of failure. 2917 2948 */ 2918 2949 #define __BPF_FUNC_MAPPER(FN) \ 2919 2950 FN(unspec), \ ··· 3071 3030 FN(tcp_send_ack), \ 3072 3031 FN(send_signal_thread), \ 3073 3032 FN(jiffies64), \ 3074 - FN(read_branch_records), 3033 + FN(read_branch_records), \ 3034 + FN(get_ns_current_pid_tgid), \ 3035 + FN(xdp_output), 3075 3036 3076 3037 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 3077 3038 * function eBPF program intends to call ··· 3088 3045 /* All flags used by eBPF helper functions, placed here. */ 3089 3046 3090 3047 /* BPF_FUNC_skb_store_bytes flags. */ 3091 - #define BPF_F_RECOMPUTE_CSUM (1ULL << 0) 3092 - #define BPF_F_INVALIDATE_HASH (1ULL << 1) 3048 + enum { 3049 + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), 3050 + BPF_F_INVALIDATE_HASH = (1ULL << 1), 3051 + }; 3093 3052 3094 3053 /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. 3095 3054 * First 4 bits are for passing the header field size. 3096 3055 */ 3097 - #define BPF_F_HDR_FIELD_MASK 0xfULL 3056 + enum { 3057 + BPF_F_HDR_FIELD_MASK = 0xfULL, 3058 + }; 3098 3059 3099 3060 /* BPF_FUNC_l4_csum_replace flags. */ 3100 - #define BPF_F_PSEUDO_HDR (1ULL << 4) 3101 - #define BPF_F_MARK_MANGLED_0 (1ULL << 5) 3102 - #define BPF_F_MARK_ENFORCE (1ULL << 6) 3061 + enum { 3062 + BPF_F_PSEUDO_HDR = (1ULL << 4), 3063 + BPF_F_MARK_MANGLED_0 = (1ULL << 5), 3064 + BPF_F_MARK_ENFORCE = (1ULL << 6), 3065 + }; 3103 3066 3104 3067 /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ 3105 - #define BPF_F_INGRESS (1ULL << 0) 3068 + enum { 3069 + BPF_F_INGRESS = (1ULL << 0), 3070 + }; 3106 3071 3107 3072 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ 3108 - #define BPF_F_TUNINFO_IPV6 (1ULL << 0) 3073 + enum { 3074 + BPF_F_TUNINFO_IPV6 = (1ULL << 0), 3075 + }; 3109 3076 3110 3077 /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ 3111 - #define BPF_F_SKIP_FIELD_MASK 0xffULL 3112 - #define BPF_F_USER_STACK (1ULL << 8) 3078 + enum { 3079 + BPF_F_SKIP_FIELD_MASK = 0xffULL, 3080 + BPF_F_USER_STACK = (1ULL << 8), 3113 3081 /* flags used by BPF_FUNC_get_stackid only. */ 3114 - #define BPF_F_FAST_STACK_CMP (1ULL << 9) 3115 - #define BPF_F_REUSE_STACKID (1ULL << 10) 3082 + BPF_F_FAST_STACK_CMP = (1ULL << 9), 3083 + BPF_F_REUSE_STACKID = (1ULL << 10), 3116 3084 /* flags used by BPF_FUNC_get_stack only. */ 3117 - #define BPF_F_USER_BUILD_ID (1ULL << 11) 3085 + BPF_F_USER_BUILD_ID = (1ULL << 11), 3086 + }; 3118 3087 3119 3088 /* BPF_FUNC_skb_set_tunnel_key flags. */ 3120 - #define BPF_F_ZERO_CSUM_TX (1ULL << 1) 3121 - #define BPF_F_DONT_FRAGMENT (1ULL << 2) 3122 - #define BPF_F_SEQ_NUMBER (1ULL << 3) 3089 + enum { 3090 + BPF_F_ZERO_CSUM_TX = (1ULL << 1), 3091 + BPF_F_DONT_FRAGMENT = (1ULL << 2), 3092 + BPF_F_SEQ_NUMBER = (1ULL << 3), 3093 + }; 3123 3094 3124 3095 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and 3125 3096 * BPF_FUNC_perf_event_read_value flags. 3126 3097 */ 3127 - #define BPF_F_INDEX_MASK 0xffffffffULL 3128 - #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK 3098 + enum { 3099 + BPF_F_INDEX_MASK = 0xffffffffULL, 3100 + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, 3129 3101 /* BPF_FUNC_perf_event_output for sk_buff input context. */ 3130 - #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) 3102 + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), 3103 + }; 3131 3104 3132 3105 /* Current network namespace */ 3133 - #define BPF_F_CURRENT_NETNS (-1L) 3106 + enum { 3107 + BPF_F_CURRENT_NETNS = (-1L), 3108 + }; 3134 3109 3135 3110 /* BPF_FUNC_skb_adjust_room flags. */ 3136 - #define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) 3111 + enum { 3112 + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), 3113 + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), 3114 + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), 3115 + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), 3116 + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), 3117 + }; 3137 3118 3138 - #define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff 3139 - #define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 3119 + enum { 3120 + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, 3121 + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, 3122 + }; 3140 3123 3141 - #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) 3142 - #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) 3143 - #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) 3144 - #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) 3145 3124 #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ 3146 3125 BPF_ADJ_ROOM_ENCAP_L2_MASK) \ 3147 3126 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) 3148 3127 3149 3128 /* BPF_FUNC_sysctl_get_name flags. */ 3150 - #define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) 3129 + enum { 3130 + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), 3131 + }; 3151 3132 3152 3133 /* BPF_FUNC_sk_storage_get flags */ 3153 - #define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) 3134 + enum { 3135 + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), 3136 + }; 3154 3137 3155 3138 /* BPF_FUNC_read_branch_records flags. */ 3156 - #define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) 3139 + enum { 3140 + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), 3141 + }; 3157 3142 3158 3143 /* Mode for BPF_FUNC_skb_adjust_room helper. */ 3159 3144 enum bpf_adj_room_mode { ··· 3247 3176 __u32 wire_len; 3248 3177 __u32 gso_segs; 3249 3178 __bpf_md_ptr(struct bpf_sock *, sk); 3179 + __u32 gso_size; 3250 3180 }; 3251 3181 3252 3182 struct bpf_tunnel_key { ··· 3600 3528 }; 3601 3529 3602 3530 /* Definitions for bpf_sock_ops_cb_flags */ 3603 - #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) 3604 - #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) 3605 - #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) 3606 - #define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) 3607 - #define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently 3608 - * supported cb flags 3609 - */ 3531 + enum { 3532 + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), 3533 + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), 3534 + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), 3535 + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), 3536 + /* Mask of all currently supported cb flags */ 3537 + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, 3538 + }; 3610 3539 3611 3540 /* List of known BPF sock_ops operators. 3612 3541 * New entries can only be added at the end ··· 3686 3613 BPF_TCP_MAX_STATES /* Leave at the end! */ 3687 3614 }; 3688 3615 3689 - #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ 3690 - #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ 3616 + enum { 3617 + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ 3618 + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ 3619 + }; 3691 3620 3692 3621 struct bpf_perf_event_value { 3693 3622 __u64 counter; ··· 3697 3622 __u64 running; 3698 3623 }; 3699 3624 3700 - #define BPF_DEVCG_ACC_MKNOD (1ULL << 0) 3701 - #define BPF_DEVCG_ACC_READ (1ULL << 1) 3702 - #define BPF_DEVCG_ACC_WRITE (1ULL << 2) 3625 + enum { 3626 + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), 3627 + BPF_DEVCG_ACC_READ = (1ULL << 1), 3628 + BPF_DEVCG_ACC_WRITE = (1ULL << 2), 3629 + }; 3703 3630 3704 - #define BPF_DEVCG_DEV_BLOCK (1ULL << 0) 3705 - #define BPF_DEVCG_DEV_CHAR (1ULL << 1) 3631 + enum { 3632 + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), 3633 + BPF_DEVCG_DEV_CHAR = (1ULL << 1), 3634 + }; 3706 3635 3707 3636 struct bpf_cgroup_dev_ctx { 3708 3637 /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ ··· 3722 3643 /* DIRECT: Skip the FIB rules and go to FIB table associated with device 3723 3644 * OUTPUT: Do lookup from egress perspective; default is ingress 3724 3645 */ 3725 - #define BPF_FIB_LOOKUP_DIRECT (1U << 0) 3726 - #define BPF_FIB_LOOKUP_OUTPUT (1U << 1) 3646 + enum { 3647 + BPF_FIB_LOOKUP_DIRECT = (1U << 0), 3648 + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), 3649 + }; 3727 3650 3728 3651 enum { 3729 3652 BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ ··· 3797 3716 BPF_FD_TYPE_URETPROBE, /* filename + offset */ 3798 3717 }; 3799 3718 3800 - #define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) 3801 - #define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) 3802 - #define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) 3719 + enum { 3720 + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), 3721 + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), 3722 + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), 3723 + }; 3803 3724 3804 3725 struct bpf_flow_keys { 3805 3726 __u16 nhoff; ··· 3867 3784 __s32 retval; 3868 3785 }; 3869 3786 3787 + struct bpf_pidns_info { 3788 + __u32 pid; 3789 + __u32 tgid; 3790 + }; 3870 3791 #endif /* _UAPI__LINUX_BPF_H__ */

+9 -1

kernel/bpf/bpf_struct_ops.c

··· 320 320 struct bpf_struct_ops_value *uvalue, *kvalue; 321 321 const struct btf_member *member; 322 322 const struct btf_type *t = st_ops->type; 323 + struct bpf_tramp_progs *tprogs = NULL; 323 324 void *udata, *kdata; 324 325 int prog_fd, err = 0; 325 326 void *image; ··· 343 342 344 343 if (uvalue->state || refcount_read(&uvalue->refcnt)) 345 344 return -EINVAL; 345 + 346 + tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); 347 + if (!tprogs) 348 + return -ENOMEM; 346 349 347 350 uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; 348 351 kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue; ··· 430 425 goto reset_unlock; 431 426 } 432 427 428 + tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; 429 + tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; 433 430 err = arch_prepare_bpf_trampoline(image, 434 431 st_map->image + PAGE_SIZE, 435 432 &st_ops->func_models[i], 0, 436 - &prog, 1, NULL, 0, NULL); 433 + tprogs, NULL); 437 434 if (err < 0) 438 435 goto reset_unlock; 439 436 ··· 476 469 memset(uvalue, 0, map->value_size); 477 470 memset(kvalue, 0, map->value_size); 478 471 unlock: 472 + kfree(tprogs); 479 473 mutex_unlock(&st_map->lock); 480 474 return err; 481 475 }

+20 -7

kernel/bpf/btf.c

··· 3710 3710 nr_args--; 3711 3711 } 3712 3712 3713 - if (prog->expected_attach_type == BPF_TRACE_FEXIT && 3714 - arg == nr_args) { 3715 - if (!t) 3716 - /* Default prog with 5 args. 6th arg is retval. */ 3717 - return true; 3718 - /* function return type */ 3719 - t = btf_type_by_id(btf, t->type); 3713 + if (arg == nr_args) { 3714 + if (prog->expected_attach_type == BPF_TRACE_FEXIT) { 3715 + if (!t) 3716 + return true; 3717 + t = btf_type_by_id(btf, t->type); 3718 + } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { 3719 + /* For now the BPF_MODIFY_RETURN can only be attached to 3720 + * functions that return an int. 3721 + */ 3722 + if (!t) 3723 + return false; 3724 + 3725 + t = btf_type_skip_modifiers(btf, t->type, NULL); 3726 + if (!btf_type_is_int(t)) { 3727 + bpf_log(log, 3728 + "ret type %s not allowed for fmod_ret\n", 3729 + btf_kind_str[BTF_INFO_KIND(t->info)]); 3730 + return false; 3731 + } 3732 + } 3720 3733 } else if (arg >= nr_args) { 3721 3734 bpf_log(log, "func '%s' doesn't have %d-th argument\n", 3722 3735 tname, arg + 1);

+65 -56

kernel/bpf/core.c

··· 97 97 fp->aux->prog = fp; 98 98 fp->jit_requested = ebpf_jit_enabled(); 99 99 100 - INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); 100 + INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); 101 101 102 102 return fp; 103 103 } ··· 523 523 int bpf_jit_harden __read_mostly; 524 524 long bpf_jit_limit __read_mostly; 525 525 526 - static __always_inline void 527 - bpf_get_prog_addr_region(const struct bpf_prog *prog, 528 - unsigned long *symbol_start, 529 - unsigned long *symbol_end) 526 + static void 527 + bpf_prog_ksym_set_addr(struct bpf_prog *prog) 530 528 { 531 529 const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); 532 530 unsigned long addr = (unsigned long)hdr; 533 531 534 532 WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); 535 533 536 - *symbol_start = addr; 537 - *symbol_end = addr + hdr->pages * PAGE_SIZE; 534 + prog->aux->ksym.start = (unsigned long) prog->bpf_func; 535 + prog->aux->ksym.end = addr + hdr->pages * PAGE_SIZE; 538 536 } 539 537 540 - void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) 538 + static void 539 + bpf_prog_ksym_set_name(struct bpf_prog *prog) 541 540 { 541 + char *sym = prog->aux->ksym.name; 542 542 const char *end = sym + KSYM_NAME_LEN; 543 543 const struct btf_type *type; 544 544 const char *func_name; ··· 572 572 *sym = 0; 573 573 } 574 574 575 - static __always_inline unsigned long 576 - bpf_get_prog_addr_start(struct latch_tree_node *n) 575 + static unsigned long bpf_get_ksym_start(struct latch_tree_node *n) 577 576 { 578 - unsigned long symbol_start, symbol_end; 579 - const struct bpf_prog_aux *aux; 580 - 581 - aux = container_of(n, struct bpf_prog_aux, ksym_tnode); 582 - bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); 583 - 584 - return symbol_start; 577 + return container_of(n, struct bpf_ksym, tnode)->start; 585 578 } 586 579 587 580 static __always_inline bool bpf_tree_less(struct latch_tree_node *a, 588 581 struct latch_tree_node *b) 589 582 { 590 - return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b); 583 + return bpf_get_ksym_start(a) < bpf_get_ksym_start(b); 591 584 } 592 585 593 586 static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) 594 587 { 595 588 unsigned long val = (unsigned long)key; 596 - unsigned long symbol_start, symbol_end; 597 - const struct bpf_prog_aux *aux; 589 + const struct bpf_ksym *ksym; 598 590 599 - aux = container_of(n, struct bpf_prog_aux, ksym_tnode); 600 - bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); 591 + ksym = container_of(n, struct bpf_ksym, tnode); 601 592 602 - if (val < symbol_start) 593 + if (val < ksym->start) 603 594 return -1; 604 - if (val >= symbol_end) 595 + if (val >= ksym->end) 605 596 return 1; 606 597 607 598 return 0; ··· 607 616 static LIST_HEAD(bpf_kallsyms); 608 617 static struct latch_tree_root bpf_tree __cacheline_aligned; 609 618 610 - static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) 619 + void bpf_ksym_add(struct bpf_ksym *ksym) 611 620 { 612 - WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); 613 - list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms); 614 - latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); 621 + spin_lock_bh(&bpf_lock); 622 + WARN_ON_ONCE(!list_empty(&ksym->lnode)); 623 + list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms); 624 + latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops); 625 + spin_unlock_bh(&bpf_lock); 615 626 } 616 627 617 - static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux) 628 + static void __bpf_ksym_del(struct bpf_ksym *ksym) 618 629 { 619 - if (list_empty(&aux->ksym_lnode)) 630 + if (list_empty(&ksym->lnode)) 620 631 return; 621 632 622 - latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); 623 - list_del_rcu(&aux->ksym_lnode); 633 + latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops); 634 + list_del_rcu(&ksym->lnode); 635 + } 636 + 637 + void bpf_ksym_del(struct bpf_ksym *ksym) 638 + { 639 + spin_lock_bh(&bpf_lock); 640 + __bpf_ksym_del(ksym); 641 + spin_unlock_bh(&bpf_lock); 624 642 } 625 643 626 644 static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) ··· 639 639 640 640 static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) 641 641 { 642 - return list_empty(&fp->aux->ksym_lnode) || 643 - fp->aux->ksym_lnode.prev == LIST_POISON2; 642 + return list_empty(&fp->aux->ksym.lnode) || 643 + fp->aux->ksym.lnode.prev == LIST_POISON2; 644 644 } 645 645 646 646 void bpf_prog_kallsyms_add(struct bpf_prog *fp) ··· 649 649 !capable(CAP_SYS_ADMIN)) 650 650 return; 651 651 652 - spin_lock_bh(&bpf_lock); 653 - bpf_prog_ksym_node_add(fp->aux); 654 - spin_unlock_bh(&bpf_lock); 652 + bpf_prog_ksym_set_addr(fp); 653 + bpf_prog_ksym_set_name(fp); 654 + fp->aux->ksym.prog = true; 655 + 656 + bpf_ksym_add(&fp->aux->ksym); 655 657 } 656 658 657 659 void bpf_prog_kallsyms_del(struct bpf_prog *fp) ··· 661 659 if (!bpf_prog_kallsyms_candidate(fp)) 662 660 return; 663 661 664 - spin_lock_bh(&bpf_lock); 665 - bpf_prog_ksym_node_del(fp->aux); 666 - spin_unlock_bh(&bpf_lock); 662 + bpf_ksym_del(&fp->aux->ksym); 667 663 } 668 664 669 - static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) 665 + static struct bpf_ksym *bpf_ksym_find(unsigned long addr) 670 666 { 671 667 struct latch_tree_node *n; 672 668 673 669 n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); 674 - return n ? 675 - container_of(n, struct bpf_prog_aux, ksym_tnode)->prog : 676 - NULL; 670 + return n ? container_of(n, struct bpf_ksym, tnode) : NULL; 677 671 } 678 672 679 673 const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, 680 674 unsigned long *off, char *sym) 681 675 { 682 - unsigned long symbol_start, symbol_end; 683 - struct bpf_prog *prog; 676 + struct bpf_ksym *ksym; 684 677 char *ret = NULL; 685 678 686 679 rcu_read_lock(); 687 - prog = bpf_prog_kallsyms_find(addr); 688 - if (prog) { 689 - bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end); 690 - bpf_get_prog_name(prog, sym); 680 + ksym = bpf_ksym_find(addr); 681 + if (ksym) { 682 + unsigned long symbol_start = ksym->start; 683 + unsigned long symbol_end = ksym->end; 684 + 685 + strncpy(sym, ksym->name, KSYM_NAME_LEN); 691 686 692 687 ret = sym; 693 688 if (size) ··· 702 703 bool ret; 703 704 704 705 rcu_read_lock(); 705 - ret = bpf_prog_kallsyms_find(addr) != NULL; 706 + ret = bpf_ksym_find(addr) != NULL; 706 707 rcu_read_unlock(); 707 708 708 709 return ret; 710 + } 711 + 712 + static struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) 713 + { 714 + struct bpf_ksym *ksym = bpf_ksym_find(addr); 715 + 716 + return ksym && ksym->prog ? 717 + container_of(ksym, struct bpf_prog_aux, ksym)->prog : 718 + NULL; 709 719 } 710 720 711 721 const struct exception_table_entry *search_bpf_extables(unsigned long addr) ··· 723 715 struct bpf_prog *prog; 724 716 725 717 rcu_read_lock(); 726 - prog = bpf_prog_kallsyms_find(addr); 718 + prog = bpf_prog_ksym_find(addr); 727 719 if (!prog) 728 720 goto out; 729 721 if (!prog->aux->num_exentries) ··· 738 730 int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, 739 731 char *sym) 740 732 { 741 - struct bpf_prog_aux *aux; 733 + struct bpf_ksym *ksym; 742 734 unsigned int it = 0; 743 735 int ret = -ERANGE; 744 736 ··· 746 738 return ret; 747 739 748 740 rcu_read_lock(); 749 - list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) { 741 + list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) { 750 742 if (it++ != symnum) 751 743 continue; 752 744 753 - bpf_get_prog_name(aux->prog, sym); 745 + strncpy(sym, ksym->name, KSYM_NAME_LEN); 754 746 755 - *value = (unsigned long)aux->prog->bpf_func; 747 + *value = ksym->start; 756 748 *type = BPF_SYM_ELF_TYPE; 757 749 758 750 ret = 0; ··· 2157 2149 const struct bpf_func_proto bpf_get_current_comm_proto __weak; 2158 2150 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; 2159 2151 const struct bpf_func_proto bpf_get_local_storage_proto __weak; 2152 + const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; 2160 2153 2161 2154 const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) 2162 2155 {

+3 -2

kernel/bpf/dispatcher.c

··· 113 113 noff = 0; 114 114 } else { 115 115 old = d->image + d->image_off; 116 - noff = d->image_off ^ (BPF_IMAGE_SIZE / 2); 116 + noff = d->image_off ^ (PAGE_SIZE / 2); 117 117 } 118 118 119 119 new = d->num_progs ? d->image + noff : NULL; ··· 140 140 141 141 mutex_lock(&d->mutex); 142 142 if (!d->image) { 143 - d->image = bpf_image_alloc(); 143 + d->image = bpf_jit_alloc_exec_page(); 144 144 if (!d->image) 145 145 goto out; 146 + bpf_image_ksym_add(d->image, &d->ksym); 146 147 } 147 148 148 149 prev_num_progs = d->num_progs;

+45

kernel/bpf/helpers.c

··· 12 12 #include <linux/filter.h> 13 13 #include <linux/ctype.h> 14 14 #include <linux/jiffies.h> 15 + #include <linux/pid_namespace.h> 16 + #include <linux/proc_ns.h> 15 17 16 18 #include "../../lib/kstrtox.h" 17 19 ··· 501 499 .arg4_type = ARG_PTR_TO_LONG, 502 500 }; 503 501 #endif 502 + 503 + BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, 504 + struct bpf_pidns_info *, nsdata, u32, size) 505 + { 506 + struct task_struct *task = current; 507 + struct pid_namespace *pidns; 508 + int err = -EINVAL; 509 + 510 + if (unlikely(size != sizeof(struct bpf_pidns_info))) 511 + goto clear; 512 + 513 + if (unlikely((u64)(dev_t)dev != dev)) 514 + goto clear; 515 + 516 + if (unlikely(!task)) 517 + goto clear; 518 + 519 + pidns = task_active_pid_ns(task); 520 + if (unlikely(!pidns)) { 521 + err = -ENOENT; 522 + goto clear; 523 + } 524 + 525 + if (!ns_match(&pidns->ns, (dev_t)dev, ino)) 526 + goto clear; 527 + 528 + nsdata->pid = task_pid_nr_ns(task, pidns); 529 + nsdata->tgid = task_tgid_nr_ns(task, pidns); 530 + return 0; 531 + clear: 532 + memset((void *)nsdata, 0, (size_t) size); 533 + return err; 534 + } 535 + 536 + const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { 537 + .func = bpf_get_ns_current_pid_tgid, 538 + .gpl_only = false, 539 + .ret_type = RET_INTEGER, 540 + .arg1_type = ARG_ANYTHING, 541 + .arg2_type = ARG_ANYTHING, 542 + .arg3_type = ARG_PTR_TO_UNINIT_MEM, 543 + .arg4_type = ARG_CONST_SIZE, 544 + };

+39 -5

kernel/bpf/inode.c

··· 25 25 BPF_TYPE_UNSPEC = 0, 26 26 BPF_TYPE_PROG, 27 27 BPF_TYPE_MAP, 28 + BPF_TYPE_LINK, 28 29 }; 29 30 30 31 static void *bpf_any_get(void *raw, enum bpf_type type) ··· 36 35 break; 37 36 case BPF_TYPE_MAP: 38 37 bpf_map_inc_with_uref(raw); 38 + break; 39 + case BPF_TYPE_LINK: 40 + bpf_link_inc(raw); 39 41 break; 40 42 default: 41 43 WARN_ON_ONCE(1); ··· 57 53 case BPF_TYPE_MAP: 58 54 bpf_map_put_with_uref(raw); 59 55 break; 56 + case BPF_TYPE_LINK: 57 + bpf_link_put(raw); 58 + break; 60 59 default: 61 60 WARN_ON_ONCE(1); 62 61 break; ··· 70 63 { 71 64 void *raw; 72 65 73 - *type = BPF_TYPE_MAP; 74 66 raw = bpf_map_get_with_uref(ufd); 75 - if (IS_ERR(raw)) { 76 - *type = BPF_TYPE_PROG; 77 - raw = bpf_prog_get(ufd); 67 + if (!IS_ERR(raw)) { 68 + *type = BPF_TYPE_MAP; 69 + return raw; 78 70 } 79 71 80 - return raw; 72 + raw = bpf_prog_get(ufd); 73 + if (!IS_ERR(raw)) { 74 + *type = BPF_TYPE_PROG; 75 + return raw; 76 + } 77 + 78 + raw = bpf_link_get_from_fd(ufd); 79 + if (!IS_ERR(raw)) { 80 + *type = BPF_TYPE_LINK; 81 + return raw; 82 + } 83 + 84 + return ERR_PTR(-EINVAL); 81 85 } 82 86 83 87 static const struct inode_operations bpf_dir_iops; 84 88 85 89 static const struct inode_operations bpf_prog_iops = { }; 86 90 static const struct inode_operations bpf_map_iops = { }; 91 + static const struct inode_operations bpf_link_iops = { }; 87 92 88 93 static struct inode *bpf_get_inode(struct super_block *sb, 89 94 const struct inode *dir, ··· 133 114 *type = BPF_TYPE_PROG; 134 115 else if (inode->i_op == &bpf_map_iops) 135 116 *type = BPF_TYPE_MAP; 117 + else if (inode->i_op == &bpf_link_iops) 118 + *type = BPF_TYPE_LINK; 136 119 else 137 120 return -EACCES; 138 121 ··· 356 335 &bpffs_map_fops : &bpffs_obj_fops); 357 336 } 358 337 338 + static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg) 339 + { 340 + return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops, 341 + &bpffs_obj_fops); 342 + } 343 + 359 344 static struct dentry * 360 345 bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) 361 346 { ··· 437 410 break; 438 411 case BPF_TYPE_MAP: 439 412 ret = vfs_mkobj(dentry, mode, bpf_mkmap, raw); 413 + break; 414 + case BPF_TYPE_LINK: 415 + ret = vfs_mkobj(dentry, mode, bpf_mklink, raw); 440 416 break; 441 417 default: 442 418 ret = -EPERM; ··· 517 487 ret = bpf_prog_new_fd(raw); 518 488 else if (type == BPF_TYPE_MAP) 519 489 ret = bpf_map_new_fd(raw, f_flags); 490 + else if (type == BPF_TYPE_LINK) 491 + ret = bpf_link_new_fd(raw); 520 492 else 521 493 return -ENOENT; 522 494 ··· 535 503 return ERR_PTR(ret); 536 504 537 505 if (inode->i_op == &bpf_map_iops) 506 + return ERR_PTR(-EINVAL); 507 + if (inode->i_op == &bpf_link_iops) 538 508 return ERR_PTR(-EINVAL); 539 509 if (inode->i_op != &bpf_prog_iops) 540 510 return ERR_PTR(-EACCES);

+257 -59

kernel/bpf/syscall.c

··· 2173 2173 attr->file_flags); 2174 2174 } 2175 2175 2176 - static int bpf_tracing_prog_release(struct inode *inode, struct file *filp) 2177 - { 2178 - struct bpf_prog *prog = filp->private_data; 2176 + struct bpf_link { 2177 + atomic64_t refcnt; 2178 + const struct bpf_link_ops *ops; 2179 + struct bpf_prog *prog; 2180 + struct work_struct work; 2181 + }; 2179 2182 2180 - WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); 2181 - bpf_prog_put(prog); 2183 + void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, 2184 + struct bpf_prog *prog) 2185 + { 2186 + atomic64_set(&link->refcnt, 1); 2187 + link->ops = ops; 2188 + link->prog = prog; 2189 + } 2190 + 2191 + /* Clean up bpf_link and corresponding anon_inode file and FD. After 2192 + * anon_inode is created, bpf_link can't be just kfree()'d due to deferred 2193 + * anon_inode's release() call. This helper manages marking bpf_link as 2194 + * defunct, releases anon_inode file and puts reserved FD. 2195 + */ 2196 + static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, 2197 + int link_fd) 2198 + { 2199 + link->prog = NULL; 2200 + fput(link_file); 2201 + put_unused_fd(link_fd); 2202 + } 2203 + 2204 + void bpf_link_inc(struct bpf_link *link) 2205 + { 2206 + atomic64_inc(&link->refcnt); 2207 + } 2208 + 2209 + /* bpf_link_free is guaranteed to be called from process context */ 2210 + static void bpf_link_free(struct bpf_link *link) 2211 + { 2212 + if (link->prog) { 2213 + /* detach BPF program, clean up used resources */ 2214 + link->ops->release(link); 2215 + bpf_prog_put(link->prog); 2216 + } 2217 + /* free bpf_link and its containing memory */ 2218 + link->ops->dealloc(link); 2219 + } 2220 + 2221 + static void bpf_link_put_deferred(struct work_struct *work) 2222 + { 2223 + struct bpf_link *link = container_of(work, struct bpf_link, work); 2224 + 2225 + bpf_link_free(link); 2226 + } 2227 + 2228 + /* bpf_link_put can be called from atomic context, but ensures that resources 2229 + * are freed from process context 2230 + */ 2231 + void bpf_link_put(struct bpf_link *link) 2232 + { 2233 + if (!atomic64_dec_and_test(&link->refcnt)) 2234 + return; 2235 + 2236 + if (in_atomic()) { 2237 + INIT_WORK(&link->work, bpf_link_put_deferred); 2238 + schedule_work(&link->work); 2239 + } else { 2240 + bpf_link_free(link); 2241 + } 2242 + } 2243 + 2244 + static int bpf_link_release(struct inode *inode, struct file *filp) 2245 + { 2246 + struct bpf_link *link = filp->private_data; 2247 + 2248 + bpf_link_put(link); 2182 2249 return 0; 2183 2250 } 2184 2251 2185 - static const struct file_operations bpf_tracing_prog_fops = { 2186 - .release = bpf_tracing_prog_release, 2252 + #ifdef CONFIG_PROC_FS 2253 + static const struct bpf_link_ops bpf_raw_tp_lops; 2254 + static const struct bpf_link_ops bpf_tracing_link_lops; 2255 + static const struct bpf_link_ops bpf_xdp_link_lops; 2256 + 2257 + static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) 2258 + { 2259 + const struct bpf_link *link = filp->private_data; 2260 + const struct bpf_prog *prog = link->prog; 2261 + char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; 2262 + const char *link_type; 2263 + 2264 + if (link->ops == &bpf_raw_tp_lops) 2265 + link_type = "raw_tracepoint"; 2266 + else if (link->ops == &bpf_tracing_link_lops) 2267 + link_type = "tracing"; 2268 + else 2269 + link_type = "unknown"; 2270 + 2271 + bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); 2272 + seq_printf(m, 2273 + "link_type:\t%s\n" 2274 + "prog_tag:\t%s\n" 2275 + "prog_id:\t%u\n", 2276 + link_type, 2277 + prog_tag, 2278 + prog->aux->id); 2279 + } 2280 + #endif 2281 + 2282 + const struct file_operations bpf_link_fops = { 2283 + #ifdef CONFIG_PROC_FS 2284 + .show_fdinfo = bpf_link_show_fdinfo, 2285 + #endif 2286 + .release = bpf_link_release, 2187 2287 .read = bpf_dummy_read, 2188 2288 .write = bpf_dummy_write, 2189 2289 }; 2190 2290 2291 + int bpf_link_new_fd(struct bpf_link *link) 2292 + { 2293 + return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); 2294 + } 2295 + 2296 + /* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but 2297 + * instead of immediately installing fd in fdtable, just reserve it and 2298 + * return. Caller then need to either install it with fd_install(fd, file) or 2299 + * release with put_unused_fd(fd). 2300 + * This is useful for cases when bpf_link attachment/detachment are 2301 + * complicated and expensive operations and should be delayed until all the fd 2302 + * reservation and anon_inode creation succeeds. 2303 + */ 2304 + struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd) 2305 + { 2306 + struct file *file; 2307 + int fd; 2308 + 2309 + fd = get_unused_fd_flags(O_CLOEXEC); 2310 + if (fd < 0) 2311 + return ERR_PTR(fd); 2312 + 2313 + file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); 2314 + if (IS_ERR(file)) { 2315 + put_unused_fd(fd); 2316 + return file; 2317 + } 2318 + 2319 + *reserved_fd = fd; 2320 + return file; 2321 + } 2322 + 2323 + struct bpf_link *bpf_link_get_from_fd(u32 ufd) 2324 + { 2325 + struct fd f = fdget(ufd); 2326 + struct bpf_link *link; 2327 + 2328 + if (!f.file) 2329 + return ERR_PTR(-EBADF); 2330 + if (f.file->f_op != &bpf_link_fops) { 2331 + fdput(f); 2332 + return ERR_PTR(-EINVAL); 2333 + } 2334 + 2335 + link = f.file->private_data; 2336 + bpf_link_inc(link); 2337 + fdput(f); 2338 + 2339 + return link; 2340 + } 2341 + 2342 + struct bpf_tracing_link { 2343 + struct bpf_link link; 2344 + }; 2345 + 2346 + static void bpf_tracing_link_release(struct bpf_link *link) 2347 + { 2348 + WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog)); 2349 + } 2350 + 2351 + static void bpf_tracing_link_dealloc(struct bpf_link *link) 2352 + { 2353 + struct bpf_tracing_link *tr_link = 2354 + container_of(link, struct bpf_tracing_link, link); 2355 + 2356 + kfree(tr_link); 2357 + } 2358 + 2359 + static const struct bpf_link_ops bpf_tracing_link_lops = { 2360 + .release = bpf_tracing_link_release, 2361 + .dealloc = bpf_tracing_link_dealloc, 2362 + }; 2363 + 2191 2364 static int bpf_tracing_prog_attach(struct bpf_prog *prog) 2192 2365 { 2193 - int tr_fd, err; 2366 + struct bpf_tracing_link *link; 2367 + struct file *link_file; 2368 + int link_fd, err; 2194 2369 2195 2370 if (prog->expected_attach_type != BPF_TRACE_FENTRY && 2196 2371 prog->expected_attach_type != BPF_TRACE_FEXIT && 2372 + prog->expected_attach_type != BPF_MODIFY_RETURN && 2197 2373 prog->type != BPF_PROG_TYPE_EXT) { 2198 2374 err = -EINVAL; 2199 2375 goto out_put_prog; 2200 2376 } 2201 2377 2202 - err = bpf_trampoline_link_prog(prog); 2203 - if (err) 2204 - goto out_put_prog; 2205 - 2206 - tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops, 2207 - prog, O_CLOEXEC); 2208 - if (tr_fd < 0) { 2209 - WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); 2210 - err = tr_fd; 2378 + link = kzalloc(sizeof(*link), GFP_USER); 2379 + if (!link) { 2380 + err = -ENOMEM; 2211 2381 goto out_put_prog; 2212 2382 } 2213 - return tr_fd; 2383 + bpf_link_init(&link->link, &bpf_tracing_link_lops, prog); 2384 + 2385 + link_file = bpf_link_new_file(&link->link, &link_fd); 2386 + if (IS_ERR(link_file)) { 2387 + kfree(link); 2388 + err = PTR_ERR(link_file); 2389 + goto out_put_prog; 2390 + } 2391 + 2392 + err = bpf_trampoline_link_prog(prog); 2393 + if (err) { 2394 + bpf_link_cleanup(&link->link, link_file, link_fd); 2395 + goto out_put_prog; 2396 + } 2397 + 2398 + fd_install(link_fd, link_file); 2399 + return link_fd; 2214 2400 2215 2401 out_put_prog: 2216 2402 bpf_prog_put(prog); 2217 2403 return err; 2218 2404 } 2219 2405 2220 - struct bpf_raw_tracepoint { 2406 + struct bpf_raw_tp_link { 2407 + struct bpf_link link; 2221 2408 struct bpf_raw_event_map *btp; 2222 - struct bpf_prog *prog; 2223 2409 }; 2224 2410 2225 - static int bpf_raw_tracepoint_release(struct inode *inode, struct file *filp) 2411 + static void bpf_raw_tp_link_release(struct bpf_link *link) 2226 2412 { 2227 - struct bpf_raw_tracepoint *raw_tp = filp->private_data; 2413 + struct bpf_raw_tp_link *raw_tp = 2414 + container_of(link, struct bpf_raw_tp_link, link); 2228 2415 2229 - if (raw_tp->prog) { 2230 - bpf_probe_unregister(raw_tp->btp, raw_tp->prog); 2231 - bpf_prog_put(raw_tp->prog); 2232 - } 2416 + bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); 2233 2417 bpf_put_raw_tracepoint(raw_tp->btp); 2234 - kfree(raw_tp); 2235 - return 0; 2236 2418 } 2237 2419 2238 - static const struct file_operations bpf_raw_tp_fops = { 2239 - .release = bpf_raw_tracepoint_release, 2240 - .read = bpf_dummy_read, 2241 - .write = bpf_dummy_write, 2420 + static void bpf_raw_tp_link_dealloc(struct bpf_link *link) 2421 + { 2422 + struct bpf_raw_tp_link *raw_tp = 2423 + container_of(link, struct bpf_raw_tp_link, link); 2424 + 2425 + kfree(raw_tp); 2426 + } 2427 + 2428 + static const struct bpf_link_ops bpf_raw_tp_lops = { 2429 + .release = bpf_raw_tp_link_release, 2430 + .dealloc = bpf_raw_tp_link_dealloc, 2242 2431 }; 2243 2432 2244 2433 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd 2245 2434 2246 2435 static int bpf_raw_tracepoint_open(const union bpf_attr *attr) 2247 2436 { 2248 - struct bpf_raw_tracepoint *raw_tp; 2437 + struct bpf_raw_tp_link *link; 2249 2438 struct bpf_raw_event_map *btp; 2439 + struct file *link_file; 2250 2440 struct bpf_prog *prog; 2251 2441 const char *tp_name; 2252 2442 char buf[128]; 2253 - int tp_fd, err; 2443 + int link_fd, err; 2254 2444 2255 2445 if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) 2256 2446 return -EINVAL; ··· 2487 2297 goto out_put_prog; 2488 2298 } 2489 2299 2490 - raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); 2491 - if (!raw_tp) { 2300 + link = kzalloc(sizeof(*link), GFP_USER); 2301 + if (!link) { 2492 2302 err = -ENOMEM; 2493 2303 goto out_put_btp; 2494 2304 } 2495 - raw_tp->btp = btp; 2496 - raw_tp->prog = prog; 2305 + bpf_link_init(&link->link, &bpf_raw_tp_lops, prog); 2306 + link->btp = btp; 2497 2307 2498 - err = bpf_probe_register(raw_tp->btp, prog); 2499 - if (err) 2500 - goto out_free_tp; 2501 - 2502 - tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, 2503 - O_CLOEXEC); 2504 - if (tp_fd < 0) { 2505 - bpf_probe_unregister(raw_tp->btp, prog); 2506 - err = tp_fd; 2507 - goto out_free_tp; 2308 + link_file = bpf_link_new_file(&link->link, &link_fd); 2309 + if (IS_ERR(link_file)) { 2310 + kfree(link); 2311 + err = PTR_ERR(link_file); 2312 + goto out_put_btp; 2508 2313 } 2509 - return tp_fd; 2510 2314 2511 - out_free_tp: 2512 - kfree(raw_tp); 2315 + err = bpf_probe_register(link->btp, prog); 2316 + if (err) { 2317 + bpf_link_cleanup(&link->link, link_file, link_fd); 2318 + goto out_put_btp; 2319 + } 2320 + 2321 + fd_install(link_fd, link_file); 2322 + return link_fd; 2323 + 2513 2324 out_put_btp: 2514 2325 bpf_put_raw_tracepoint(btp); 2515 2326 out_put_prog: ··· 3457 3266 if (err) 3458 3267 goto out; 3459 3268 3460 - if (file->f_op == &bpf_raw_tp_fops) { 3461 - struct bpf_raw_tracepoint *raw_tp = file->private_data; 3462 - struct bpf_raw_event_map *btp = raw_tp->btp; 3269 + if (file->f_op == &bpf_link_fops) { 3270 + struct bpf_link *link = file->private_data; 3463 3271 3464 - err = bpf_task_fd_query_copy(attr, uattr, 3465 - raw_tp->prog->aux->id, 3466 - BPF_FD_TYPE_RAW_TRACEPOINT, 3467 - btp->tp->name, 0, 0); 3468 - goto put_file; 3272 + if (link->ops == &bpf_raw_tp_lops) { 3273 + struct bpf_raw_tp_link *raw_tp = 3274 + container_of(link, struct bpf_raw_tp_link, link); 3275 + struct bpf_raw_event_map *btp = raw_tp->btp; 3276 + 3277 + err = bpf_task_fd_query_copy(attr, uattr, 3278 + raw_tp->link.prog->aux->id, 3279 + BPF_FD_TYPE_RAW_TRACEPOINT, 3280 + btp->tp->name, 0, 0); 3281 + goto put_file; 3282 + } 3283 + goto out_not_supp; 3469 3284 } 3470 3285 3471 3286 event = perf_get_event(file); ··· 3491 3294 goto put_file; 3492 3295 } 3493 3296 3297 + out_not_supp: 3494 3298 err = -ENOTSUPP; 3495 3299 put_file: 3496 3300 fput(file);

+65 -87

kernel/bpf/trampoline.c

··· 5 5 #include <linux/filter.h> 6 6 #include <linux/ftrace.h> 7 7 #include <linux/rbtree_latch.h> 8 + #include <linux/perf_event.h> 8 9 9 10 /* dummy _ops. The verifier will operate on target program's ops. */ 10 11 const struct bpf_verifier_ops bpf_extension_verifier_ops = { ··· 18 17 #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) 19 18 20 19 static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; 21 - static struct latch_tree_root image_tree __cacheline_aligned; 22 20 23 - /* serializes access to trampoline_table and image_tree */ 21 + /* serializes access to trampoline_table */ 24 22 static DEFINE_MUTEX(trampoline_mutex); 25 23 26 - static void *bpf_jit_alloc_exec_page(void) 24 + void *bpf_jit_alloc_exec_page(void) 27 25 { 28 26 void *image; 29 27 ··· 38 38 return image; 39 39 } 40 40 41 - static __always_inline bool image_tree_less(struct latch_tree_node *a, 42 - struct latch_tree_node *b) 41 + void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym) 43 42 { 44 - struct bpf_image *ia = container_of(a, struct bpf_image, tnode); 45 - struct bpf_image *ib = container_of(b, struct bpf_image, tnode); 46 - 47 - return ia < ib; 43 + ksym->start = (unsigned long) data; 44 + ksym->end = ksym->start + PAGE_SIZE; 45 + bpf_ksym_add(ksym); 46 + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 47 + PAGE_SIZE, false, ksym->name); 48 48 } 49 49 50 - static __always_inline int image_tree_comp(void *addr, struct latch_tree_node *n) 50 + void bpf_image_ksym_del(struct bpf_ksym *ksym) 51 51 { 52 - void *image = container_of(n, struct bpf_image, tnode); 53 - 54 - if (addr < image) 55 - return -1; 56 - if (addr >= image + PAGE_SIZE) 57 - return 1; 58 - 59 - return 0; 52 + bpf_ksym_del(ksym); 53 + perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, ksym->start, 54 + PAGE_SIZE, true, ksym->name); 60 55 } 61 56 62 - static const struct latch_tree_ops image_tree_ops = { 63 - .less = image_tree_less, 64 - .comp = image_tree_comp, 65 - }; 66 - 67 - static void *__bpf_image_alloc(bool lock) 57 + static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr) 68 58 { 69 - struct bpf_image *image; 59 + struct bpf_ksym *ksym = &tr->ksym; 70 60 71 - image = bpf_jit_alloc_exec_page(); 72 - if (!image) 73 - return NULL; 74 - 75 - if (lock) 76 - mutex_lock(&trampoline_mutex); 77 - latch_tree_insert(&image->tnode, &image_tree, &image_tree_ops); 78 - if (lock) 79 - mutex_unlock(&trampoline_mutex); 80 - return image->data; 81 - } 82 - 83 - void *bpf_image_alloc(void) 84 - { 85 - return __bpf_image_alloc(true); 86 - } 87 - 88 - bool is_bpf_image_address(unsigned long addr) 89 - { 90 - bool ret; 91 - 92 - rcu_read_lock(); 93 - ret = latch_tree_find((void *) addr, &image_tree, &image_tree_ops) != NULL; 94 - rcu_read_unlock(); 95 - 96 - return ret; 61 + snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key); 62 + bpf_image_ksym_add(tr->image, ksym); 97 63 } 98 64 99 65 struct bpf_trampoline *bpf_trampoline_lookup(u64 key) ··· 82 116 goto out; 83 117 84 118 /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ 85 - image = __bpf_image_alloc(false); 119 + image = bpf_jit_alloc_exec_page(); 86 120 if (!image) { 87 121 kfree(tr); 88 122 tr = NULL; ··· 97 131 for (i = 0; i < BPF_TRAMP_MAX; i++) 98 132 INIT_HLIST_HEAD(&tr->progs_hlist[i]); 99 133 tr->image = image; 134 + INIT_LIST_HEAD_RCU(&tr->ksym.lnode); 135 + bpf_trampoline_ksym_add(tr); 100 136 out: 101 137 mutex_unlock(&trampoline_mutex); 102 138 return tr; ··· 158 190 return ret; 159 191 } 160 192 161 - /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 162 - * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 163 - */ 164 - #define BPF_MAX_TRAMP_PROGS 40 193 + static struct bpf_tramp_progs * 194 + bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) 195 + { 196 + const struct bpf_prog_aux *aux; 197 + struct bpf_tramp_progs *tprogs; 198 + struct bpf_prog **progs; 199 + int kind; 200 + 201 + *total = 0; 202 + tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); 203 + if (!tprogs) 204 + return ERR_PTR(-ENOMEM); 205 + 206 + for (kind = 0; kind < BPF_TRAMP_MAX; kind++) { 207 + tprogs[kind].nr_progs = tr->progs_cnt[kind]; 208 + *total += tr->progs_cnt[kind]; 209 + progs = tprogs[kind].progs; 210 + 211 + hlist_for_each_entry(aux, &tr->progs_hlist[kind], tramp_hlist) 212 + *progs++ = aux->prog; 213 + } 214 + return tprogs; 215 + } 165 216 166 217 static int bpf_trampoline_update(struct bpf_trampoline *tr) 167 218 { 168 - void *old_image = tr->image + ((tr->selector + 1) & 1) * BPF_IMAGE_SIZE/2; 169 - void *new_image = tr->image + (tr->selector & 1) * BPF_IMAGE_SIZE/2; 170 - struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; 171 - int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; 172 - int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; 173 - struct bpf_prog **progs, **fentry, **fexit; 219 + void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; 220 + void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; 221 + struct bpf_tramp_progs *tprogs; 174 222 u32 flags = BPF_TRAMP_F_RESTORE_REGS; 175 - struct bpf_prog_aux *aux; 176 - int err; 223 + int err, total; 177 224 178 - if (fentry_cnt + fexit_cnt == 0) { 225 + tprogs = bpf_trampoline_get_progs(tr, &total); 226 + if (IS_ERR(tprogs)) 227 + return PTR_ERR(tprogs); 228 + 229 + if (total == 0) { 179 230 err = unregister_fentry(tr, old_image); 180 231 tr->selector = 0; 181 232 goto out; 182 233 } 183 234 184 - /* populate fentry progs */ 185 - fentry = progs = progs_to_run; 186 - hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist) 187 - *progs++ = aux->prog; 188 - 189 - /* populate fexit progs */ 190 - fexit = progs; 191 - hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist) 192 - *progs++ = aux->prog; 193 - 194 - if (fexit_cnt) 235 + if (tprogs[BPF_TRAMP_FEXIT].nr_progs || 236 + tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) 195 237 flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; 196 238 197 239 /* Though the second half of trampoline page is unused a task could be ··· 210 232 * preempted task. Hence wait for tasks to voluntarily schedule or go 211 233 * to userspace. 212 234 */ 235 + 213 236 synchronize_rcu_tasks(); 214 237 215 - err = arch_prepare_bpf_trampoline(new_image, new_image + BPF_IMAGE_SIZE / 2, 216 - &tr->func.model, flags, 217 - fentry, fentry_cnt, 218 - fexit, fexit_cnt, 238 + err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, 239 + &tr->func.model, flags, tprogs, 219 240 tr->func.addr); 220 241 if (err < 0) 221 242 goto out; ··· 229 252 goto out; 230 253 tr->selector++; 231 254 out: 255 + kfree(tprogs); 232 256 return err; 233 257 } 234 258 ··· 238 260 switch (t) { 239 261 case BPF_TRACE_FENTRY: 240 262 return BPF_TRAMP_FENTRY; 263 + case BPF_MODIFY_RETURN: 264 + return BPF_TRAMP_MODIFY_RETURN; 241 265 case BPF_TRACE_FEXIT: 242 266 return BPF_TRAMP_FEXIT; 243 267 default: ··· 324 344 325 345 void bpf_trampoline_put(struct bpf_trampoline *tr) 326 346 { 327 - struct bpf_image *image; 328 - 329 347 if (!tr) 330 348 return; 331 349 mutex_lock(&trampoline_mutex); ··· 334 356 goto out; 335 357 if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) 336 358 goto out; 337 - image = container_of(tr->image, struct bpf_image, data); 338 - latch_tree_erase(&image->tnode, &image_tree, &image_tree_ops); 359 + bpf_image_ksym_del(&tr->ksym); 339 360 /* wait for tasks to get out of trampoline before freeing it */ 340 361 synchronize_rcu_tasks(); 341 - bpf_jit_free_exec(image); 362 + bpf_jit_free_exec(tr->image); 342 363 hlist_del(&tr->hlist); 343 364 kfree(tr); 344 365 out: ··· 352 375 * call __bpf_prog_exit 353 376 */ 354 377 u64 notrace __bpf_prog_enter(void) 378 + __acquires(RCU) 355 379 { 356 380 u64 start = 0; 357 381 ··· 364 386 } 365 387 366 388 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) 389 + __releases(RCU) 367 390 { 368 391 struct bpf_prog_stats *stats; 369 392 ··· 388 409 int __weak 389 410 arch_prepare_bpf_trampoline(void *image, void *image_end, 390 411 const struct btf_func_model *m, u32 flags, 391 - struct bpf_prog **fentry_progs, int fentry_cnt, 392 - struct bpf_prog **fexit_progs, int fexit_cnt, 412 + struct bpf_tramp_progs *tprogs, 393 413 void *orig_call) 394 414 { 395 415 return -ENOTSUPP;

+28 -1

kernel/bpf/verifier.c

··· 19 19 #include <linux/sort.h> 20 20 #include <linux/perf_event.h> 21 21 #include <linux/ctype.h> 22 + #include <linux/error-injection.h> 22 23 23 24 #include "disasm.h" 24 25 ··· 3650 3649 if (func_id != BPF_FUNC_perf_event_read && 3651 3650 func_id != BPF_FUNC_perf_event_output && 3652 3651 func_id != BPF_FUNC_skb_output && 3653 - func_id != BPF_FUNC_perf_event_read_value) 3652 + func_id != BPF_FUNC_perf_event_read_value && 3653 + func_id != BPF_FUNC_xdp_output) 3654 3654 goto error; 3655 3655 break; 3656 3656 case BPF_MAP_TYPE_STACK_TRACE: ··· 3741 3739 case BPF_FUNC_perf_event_output: 3742 3740 case BPF_FUNC_perf_event_read_value: 3743 3741 case BPF_FUNC_skb_output: 3742 + case BPF_FUNC_xdp_output: 3744 3743 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) 3745 3744 goto error; 3746 3745 break; ··· 9803 9800 9804 9801 return 0; 9805 9802 } 9803 + #define SECURITY_PREFIX "security_" 9804 + 9805 + static int check_attach_modify_return(struct bpf_verifier_env *env) 9806 + { 9807 + struct bpf_prog *prog = env->prog; 9808 + unsigned long addr = (unsigned long) prog->aux->trampoline->func.addr; 9809 + 9810 + /* This is expected to be cleaned up in the future with the KRSI effort 9811 + * introducing the LSM_HOOK macro for cleaning up lsm_hooks.h. 9812 + */ 9813 + if (within_error_injection_list(addr) || 9814 + !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name, 9815 + sizeof(SECURITY_PREFIX) - 1)) 9816 + return 0; 9817 + 9818 + verbose(env, "fmod_ret attach_btf_id %u (%s) is not modifiable\n", 9819 + prog->aux->attach_btf_id, prog->aux->attach_func_name); 9820 + 9821 + return -EINVAL; 9822 + } 9806 9823 9807 9824 static int check_attach_btf_id(struct bpf_verifier_env *env) 9808 9825 { ··· 9973 9950 if (!prog_extension) 9974 9951 return -EINVAL; 9975 9952 /* fallthrough */ 9953 + case BPF_MODIFY_RETURN: 9976 9954 case BPF_TRACE_FENTRY: 9977 9955 case BPF_TRACE_FEXIT: 9978 9956 if (!btf_type_is_func(t)) { ··· 10023 9999 } 10024 10000 tr->func.addr = (void *)addr; 10025 10001 prog->aux->trampoline = tr; 10002 + 10003 + if (prog->expected_attach_type == BPF_MODIFY_RETURN) 10004 + ret = check_attach_modify_return(env); 10026 10005 out: 10027 10006 mutex_unlock(&tr->mutex); 10028 10007 if (ret)

+4 -5

kernel/events/core.c

··· 8255 8255 enum perf_bpf_event_type type) 8256 8256 { 8257 8257 bool unregister = type == PERF_BPF_EVENT_PROG_UNLOAD; 8258 - char sym[KSYM_NAME_LEN]; 8259 8258 int i; 8260 8259 8261 8260 if (prog->aux->func_cnt == 0) { 8262 - bpf_get_prog_name(prog, sym); 8263 8261 perf_event_ksymbol(PERF_RECORD_KSYMBOL_TYPE_BPF, 8264 8262 (u64)(unsigned long)prog->bpf_func, 8265 - prog->jited_len, unregister, sym); 8263 + prog->jited_len, unregister, 8264 + prog->aux->ksym.name); 8266 8265 } else { 8267 8266 for (i = 0; i < prog->aux->func_cnt; i++) { 8268 8267 struct bpf_prog *subprog = prog->aux->func[i]; 8269 8268 8270 - bpf_get_prog_name(subprog, sym); 8271 8269 perf_event_ksymbol( 8272 8270 PERF_RECORD_KSYMBOL_TYPE_BPF, 8273 8271 (u64)(unsigned long)subprog->bpf_func, 8274 - subprog->jited_len, unregister, sym); 8272 + subprog->jited_len, unregister, 8273 + prog->aux->ksym.name); 8275 8274 } 8276 8275 } 8277 8276 }

-2

kernel/extable.c

··· 149 149 goto out; 150 150 if (is_bpf_text_address(addr)) 151 151 goto out; 152 - if (is_bpf_image_address(addr)) 153 - goto out; 154 152 ret = 0; 155 153 out: 156 154 if (no_rcu)

+13

kernel/trace/bpf_trace.c

··· 843 843 return &bpf_send_signal_thread_proto; 844 844 case BPF_FUNC_perf_event_read_value: 845 845 return &bpf_perf_event_read_value_proto; 846 + case BPF_FUNC_get_ns_current_pid_tgid: 847 + return &bpf_get_ns_current_pid_tgid_proto; 846 848 default: 847 849 return NULL; 848 850 } ··· 1145 1143 }; 1146 1144 1147 1145 extern const struct bpf_func_proto bpf_skb_output_proto; 1146 + extern const struct bpf_func_proto bpf_xdp_output_proto; 1148 1147 1149 1148 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, 1150 1149 struct bpf_map *, map, u64, flags) ··· 1221 1218 #ifdef CONFIG_NET 1222 1219 case BPF_FUNC_skb_output: 1223 1220 return &bpf_skb_output_proto; 1221 + case BPF_FUNC_xdp_output: 1222 + return &bpf_xdp_output_proto; 1224 1223 #endif 1225 1224 default: 1226 1225 return raw_tp_prog_func_proto(func_id, prog); ··· 1257 1252 return btf_ctx_access(off, size, type, prog, info); 1258 1253 } 1259 1254 1255 + int __weak bpf_prog_test_run_tracing(struct bpf_prog *prog, 1256 + const union bpf_attr *kattr, 1257 + union bpf_attr __user *uattr) 1258 + { 1259 + return -ENOTSUPP; 1260 + } 1261 + 1260 1262 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { 1261 1263 .get_func_proto = raw_tp_prog_func_proto, 1262 1264 .is_valid_access = raw_tp_prog_is_valid_access, ··· 1278 1266 }; 1279 1267 1280 1268 const struct bpf_prog_ops tracing_prog_ops = { 1269 + .test_run = bpf_prog_test_run_tracing, 1281 1270 }; 1282 1271 1283 1272 static bool raw_tp_writable_prog_is_valid_access(int off, int size,

+55 -9

net/bpf/test_run.c

··· 10 10 #include <net/bpf_sk_storage.h> 11 11 #include <net/sock.h> 12 12 #include <net/tcp.h> 13 + #include <linux/error-injection.h> 13 14 14 15 #define CREATE_TRACE_POINTS 15 16 #include <trace/events/bpf_test_run.h> ··· 144 143 return a + (long)b + c + d + (long)e + f; 145 144 } 146 145 146 + int noinline bpf_modify_return_test(int a, int *b) 147 + { 148 + *b += 1; 149 + return a + *b; 150 + } 151 + 152 + ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO); 153 + 147 154 static void *bpf_test_init(const union bpf_attr *kattr, u32 size, 148 155 u32 headroom, u32 tailroom) 149 156 { ··· 169 160 kfree(data); 170 161 return ERR_PTR(-EFAULT); 171 162 } 172 - if (bpf_fentry_test1(1) != 2 || 173 - bpf_fentry_test2(2, 3) != 5 || 174 - bpf_fentry_test3(4, 5, 6) != 15 || 175 - bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || 176 - bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || 177 - bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) { 178 - kfree(data); 179 - return ERR_PTR(-EFAULT); 180 - } 163 + 181 164 return data; 165 + } 166 + 167 + int bpf_prog_test_run_tracing(struct bpf_prog *prog, 168 + const union bpf_attr *kattr, 169 + union bpf_attr __user *uattr) 170 + { 171 + u16 side_effect = 0, ret = 0; 172 + int b = 2, err = -EFAULT; 173 + u32 retval = 0; 174 + 175 + switch (prog->expected_attach_type) { 176 + case BPF_TRACE_FENTRY: 177 + case BPF_TRACE_FEXIT: 178 + if (bpf_fentry_test1(1) != 2 || 179 + bpf_fentry_test2(2, 3) != 5 || 180 + bpf_fentry_test3(4, 5, 6) != 15 || 181 + bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || 182 + bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || 183 + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) 184 + goto out; 185 + break; 186 + case BPF_MODIFY_RETURN: 187 + ret = bpf_modify_return_test(1, &b); 188 + if (b != 2) 189 + side_effect = 1; 190 + break; 191 + default: 192 + goto out; 193 + } 194 + 195 + retval = ((u32)side_effect << 16) | ret; 196 + if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval))) 197 + goto out; 198 + 199 + err = 0; 200 + out: 201 + trace_bpf_test_finish(&err); 202 + return err; 182 203 } 183 204 184 205 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size) ··· 316 277 /* gso_segs is allowed */ 317 278 318 279 if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), 280 + offsetof(struct __sk_buff, gso_size))) 281 + return -EINVAL; 282 + 283 + /* gso_size is allowed */ 284 + 285 + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), 319 286 sizeof(struct __sk_buff))) 320 287 return -EINVAL; 321 288 ··· 342 297 if (__skb->gso_segs > GSO_MAX_SEGS) 343 298 return -EINVAL; 344 299 skb_shinfo(skb)->gso_segs = __skb->gso_segs; 300 + skb_shinfo(skb)->gso_size = __skb->gso_size; 345 301 346 302 return 0; 347 303 }

+47 -18

net/core/filter.c

··· 4061 4061 4062 4062 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) 4063 4063 return -EINVAL; 4064 - if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) 4064 + if (unlikely(!xdp || 4065 + xdp_size > (unsigned long)(xdp->data_end - xdp->data))) 4065 4066 return -EFAULT; 4066 4067 4067 4068 return bpf_event_output(map, flags, meta, meta_size, xdp->data, ··· 4078 4077 .arg3_type = ARG_ANYTHING, 4079 4078 .arg4_type = ARG_PTR_TO_MEM, 4080 4079 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4080 + }; 4081 + 4082 + static int bpf_xdp_output_btf_ids[5]; 4083 + const struct bpf_func_proto bpf_xdp_output_proto = { 4084 + .func = bpf_xdp_event_output, 4085 + .gpl_only = true, 4086 + .ret_type = RET_INTEGER, 4087 + .arg1_type = ARG_PTR_TO_BTF_ID, 4088 + .arg2_type = ARG_CONST_MAP_PTR, 4089 + .arg3_type = ARG_ANYTHING, 4090 + .arg4_type = ARG_PTR_TO_MEM, 4091 + .arg5_type = ARG_CONST_SIZE_OR_ZERO, 4092 + .btf_id = bpf_xdp_output_btf_ids, 4081 4093 }; 4082 4094 4083 4095 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) ··· 7153 7139 return insn - insn_buf; 7154 7140 } 7155 7141 7142 + static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si, 7143 + struct bpf_insn *insn) 7144 + { 7145 + /* si->dst_reg = skb_shinfo(SKB); */ 7146 + #ifdef NET_SKBUFF_DATA_USES_OFFSET 7147 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 7148 + BPF_REG_AX, si->src_reg, 7149 + offsetof(struct sk_buff, end)); 7150 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), 7151 + si->dst_reg, si->src_reg, 7152 + offsetof(struct sk_buff, head)); 7153 + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); 7154 + #else 7155 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 7156 + si->dst_reg, si->src_reg, 7157 + offsetof(struct sk_buff, end)); 7158 + #endif 7159 + 7160 + return insn; 7161 + } 7162 + 7156 7163 static u32 bpf_convert_ctx_access(enum bpf_access_type type, 7157 7164 const struct bpf_insn *si, 7158 7165 struct bpf_insn *insn_buf, ··· 7496 7461 break; 7497 7462 7498 7463 case offsetof(struct __sk_buff, gso_segs): 7499 - /* si->dst_reg = skb_shinfo(SKB); */ 7500 - #ifdef NET_SKBUFF_DATA_USES_OFFSET 7501 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 7502 - BPF_REG_AX, si->src_reg, 7503 - offsetof(struct sk_buff, end)); 7504 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), 7505 - si->dst_reg, si->src_reg, 7506 - offsetof(struct sk_buff, head)); 7507 - *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); 7508 - #else 7509 - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), 7510 - si->dst_reg, si->src_reg, 7511 - offsetof(struct sk_buff, end)); 7512 - #endif 7464 + insn = bpf_convert_shinfo_access(si, insn); 7513 7465 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), 7514 7466 si->dst_reg, si->dst_reg, 7515 7467 bpf_target_off(struct skb_shared_info, 7516 7468 gso_segs, 2, 7469 + target_size)); 7470 + break; 7471 + case offsetof(struct __sk_buff, gso_size): 7472 + insn = bpf_convert_shinfo_access(si, insn); 7473 + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size), 7474 + si->dst_reg, si->dst_reg, 7475 + bpf_target_off(struct skb_shared_info, 7476 + gso_size, 2, 7517 7477 target_size)); 7518 7478 break; 7519 7479 case offsetof(struct __sk_buff, wire_len): ··· 8859 8829 }; 8860 8830 #endif /* CONFIG_INET */ 8861 8831 8862 - DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp) 8832 + DEFINE_BPF_DISPATCHER(xdp) 8863 8833 8864 8834 void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) 8865 8835 { 8866 - bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp), 8867 - prev_prog, prog); 8836 + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog); 8868 8837 }

+131 -26

net/core/sock_map.c

··· 11 11 #include <linux/list.h> 12 12 #include <linux/jhash.h> 13 13 #include <linux/sock_diag.h> 14 + #include <net/udp.h> 14 15 15 16 struct bpf_stab { 16 17 struct bpf_map map; ··· 142 141 } 143 142 } 144 143 144 + static int sock_map_init_proto(struct sock *sk, struct sk_psock *psock) 145 + { 146 + struct proto *prot; 147 + 148 + sock_owned_by_me(sk); 149 + 150 + switch (sk->sk_type) { 151 + case SOCK_STREAM: 152 + prot = tcp_bpf_get_proto(sk, psock); 153 + break; 154 + 155 + case SOCK_DGRAM: 156 + prot = udp_bpf_get_proto(sk, psock); 157 + break; 158 + 159 + default: 160 + return -EINVAL; 161 + } 162 + 163 + if (IS_ERR(prot)) 164 + return PTR_ERR(prot); 165 + 166 + sk_psock_update_proto(sk, psock, prot); 167 + return 0; 168 + } 169 + 170 + static struct sk_psock *sock_map_psock_get_checked(struct sock *sk) 171 + { 172 + struct sk_psock *psock; 173 + 174 + rcu_read_lock(); 175 + psock = sk_psock(sk); 176 + if (psock) { 177 + if (sk->sk_prot->close != sock_map_close) { 178 + psock = ERR_PTR(-EBUSY); 179 + goto out; 180 + } 181 + 182 + if (!refcount_inc_not_zero(&psock->refcnt)) 183 + psock = ERR_PTR(-EBUSY); 184 + } 185 + out: 186 + rcu_read_unlock(); 187 + return psock; 188 + } 189 + 145 190 static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs, 146 191 struct sock *sk) 147 192 { 148 193 struct bpf_prog *msg_parser, *skb_parser, *skb_verdict; 149 - bool skb_progs, sk_psock_is_new = false; 150 194 struct sk_psock *psock; 195 + bool skb_progs; 151 196 int ret; 152 197 153 198 skb_verdict = READ_ONCE(progs->skb_verdict); ··· 219 172 } 220 173 } 221 174 222 - psock = sk_psock_get_checked(sk); 175 + psock = sock_map_psock_get_checked(sk); 223 176 if (IS_ERR(psock)) { 224 177 ret = PTR_ERR(psock); 225 178 goto out_progs; ··· 238 191 ret = -ENOMEM; 239 192 goto out_progs; 240 193 } 241 - sk_psock_is_new = true; 242 194 } 243 195 244 196 if (msg_parser) 245 197 psock_set_prog(&psock->progs.msg_parser, msg_parser); 246 - if (sk_psock_is_new) { 247 - ret = tcp_bpf_init(sk); 248 - if (ret < 0) 249 - goto out_drop; 250 - } else { 251 - tcp_bpf_reinit(sk); 252 - } 198 + 199 + ret = sock_map_init_proto(sk, psock); 200 + if (ret < 0) 201 + goto out_drop; 253 202 254 203 write_lock_bh(&sk->sk_callback_lock); 255 204 if (skb_progs && !psock->parser.enabled) { ··· 278 235 struct sk_psock *psock; 279 236 int ret; 280 237 281 - psock = sk_psock_get_checked(sk); 238 + psock = sock_map_psock_get_checked(sk); 282 239 if (IS_ERR(psock)) 283 240 return PTR_ERR(psock); 284 241 285 - if (psock) { 286 - tcp_bpf_reinit(sk); 287 - return 0; 242 + if (!psock) { 243 + psock = sk_psock_init(sk, map->numa_node); 244 + if (!psock) 245 + return -ENOMEM; 288 246 } 289 247 290 - psock = sk_psock_init(sk, map->numa_node); 291 - if (!psock) 292 - return -ENOMEM; 293 - 294 - ret = tcp_bpf_init(sk); 248 + ret = sock_map_init_proto(sk, psock); 295 249 if (ret < 0) 296 250 sk_psock_put(sk, psock); 297 251 return ret; ··· 424 384 struct sock *sk, u64 flags) 425 385 { 426 386 struct bpf_stab *stab = container_of(map, struct bpf_stab, map); 427 - struct inet_connection_sock *icsk = inet_csk(sk); 428 387 struct sk_psock_link *link; 429 388 struct sk_psock *psock; 430 389 struct sock *osk; ··· 434 395 return -EINVAL; 435 396 if (unlikely(idx >= map->max_entries)) 436 397 return -E2BIG; 437 - if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data))) 398 + if (inet_csk_has_ulp(sk)) 438 399 return -EINVAL; 439 400 440 401 link = sk_psock_init_link(); ··· 487 448 ops->op == BPF_SOCK_OPS_TCP_LISTEN_CB; 488 449 } 489 450 490 - static bool sock_map_sk_is_suitable(const struct sock *sk) 451 + static bool sk_is_tcp(const struct sock *sk) 491 452 { 492 453 return sk->sk_type == SOCK_STREAM && 493 454 sk->sk_protocol == IPPROTO_TCP; 494 455 } 495 456 457 + static bool sk_is_udp(const struct sock *sk) 458 + { 459 + return sk->sk_type == SOCK_DGRAM && 460 + sk->sk_protocol == IPPROTO_UDP; 461 + } 462 + 463 + static bool sock_map_sk_is_suitable(const struct sock *sk) 464 + { 465 + return sk_is_tcp(sk) || sk_is_udp(sk); 466 + } 467 + 496 468 static bool sock_map_sk_state_allowed(const struct sock *sk) 497 469 { 498 - return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); 470 + if (sk_is_tcp(sk)) 471 + return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN); 472 + else if (sk_is_udp(sk)) 473 + return sk_hashed(sk); 474 + 475 + return false; 499 476 } 500 477 501 478 static int sock_map_update_elem(struct bpf_map *map, void *key, ··· 793 738 struct sock *sk, u64 flags) 794 739 { 795 740 struct bpf_htab *htab = container_of(map, struct bpf_htab, map); 796 - struct inet_connection_sock *icsk = inet_csk(sk); 797 741 u32 key_size = map->key_size, hash; 798 742 struct bpf_htab_elem *elem, *elem_new; 799 743 struct bpf_htab_bucket *bucket; ··· 803 749 WARN_ON_ONCE(!rcu_read_lock_held()); 804 750 if (unlikely(flags > BPF_EXIST)) 805 751 return -EINVAL; 806 - if (unlikely(icsk->icsk_ulp_data)) 752 + if (inet_csk_has_ulp(sk)) 807 753 return -EINVAL; 808 754 809 755 link = sk_psock_init_link(); ··· 1183 1129 return 0; 1184 1130 } 1185 1131 1186 - void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link) 1132 + static void sock_map_unlink(struct sock *sk, struct sk_psock_link *link) 1187 1133 { 1188 1134 switch (link->map->map_type) { 1189 1135 case BPF_MAP_TYPE_SOCKMAP: ··· 1195 1141 default: 1196 1142 break; 1197 1143 } 1144 + } 1145 + 1146 + static void sock_map_remove_links(struct sock *sk, struct sk_psock *psock) 1147 + { 1148 + struct sk_psock_link *link; 1149 + 1150 + while ((link = sk_psock_link_pop(psock))) { 1151 + sock_map_unlink(sk, link); 1152 + sk_psock_free_link(link); 1153 + } 1154 + } 1155 + 1156 + void sock_map_unhash(struct sock *sk) 1157 + { 1158 + void (*saved_unhash)(struct sock *sk); 1159 + struct sk_psock *psock; 1160 + 1161 + rcu_read_lock(); 1162 + psock = sk_psock(sk); 1163 + if (unlikely(!psock)) { 1164 + rcu_read_unlock(); 1165 + if (sk->sk_prot->unhash) 1166 + sk->sk_prot->unhash(sk); 1167 + return; 1168 + } 1169 + 1170 + saved_unhash = psock->saved_unhash; 1171 + sock_map_remove_links(sk, psock); 1172 + rcu_read_unlock(); 1173 + saved_unhash(sk); 1174 + } 1175 + 1176 + void sock_map_close(struct sock *sk, long timeout) 1177 + { 1178 + void (*saved_close)(struct sock *sk, long timeout); 1179 + struct sk_psock *psock; 1180 + 1181 + lock_sock(sk); 1182 + rcu_read_lock(); 1183 + psock = sk_psock(sk); 1184 + if (unlikely(!psock)) { 1185 + rcu_read_unlock(); 1186 + release_sock(sk); 1187 + return sk->sk_prot->close(sk, timeout); 1188 + } 1189 + 1190 + saved_close = psock->saved_close; 1191 + sock_map_remove_links(sk, psock); 1192 + rcu_read_unlock(); 1193 + release_sock(sk); 1194 + saved_close(sk, timeout); 1198 1195 }

+1

net/ipv4/Makefile

··· 61 61 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o 62 62 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o 63 63 obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o 64 + obj-$(CONFIG_BPF_STREAM_PARSER) += udp_bpf.o 64 65 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 65 66 66 67 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \

+14 -100

net/ipv4/tcp_bpf.c

··· 528 528 return copied ? copied : err; 529 529 } 530 530 531 - static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock) 532 - { 533 - struct sk_psock_link *link; 534 - 535 - while ((link = sk_psock_link_pop(psock))) { 536 - sk_psock_unlink(sk, link); 537 - sk_psock_free_link(link); 538 - } 539 - } 540 - 541 - static void tcp_bpf_unhash(struct sock *sk) 542 - { 543 - void (*saved_unhash)(struct sock *sk); 544 - struct sk_psock *psock; 545 - 546 - rcu_read_lock(); 547 - psock = sk_psock(sk); 548 - if (unlikely(!psock)) { 549 - rcu_read_unlock(); 550 - if (sk->sk_prot->unhash) 551 - sk->sk_prot->unhash(sk); 552 - return; 553 - } 554 - 555 - saved_unhash = psock->saved_unhash; 556 - tcp_bpf_remove(sk, psock); 557 - rcu_read_unlock(); 558 - saved_unhash(sk); 559 - } 560 - 561 - static void tcp_bpf_close(struct sock *sk, long timeout) 562 - { 563 - void (*saved_close)(struct sock *sk, long timeout); 564 - struct sk_psock *psock; 565 - 566 - lock_sock(sk); 567 - rcu_read_lock(); 568 - psock = sk_psock(sk); 569 - if (unlikely(!psock)) { 570 - rcu_read_unlock(); 571 - release_sock(sk); 572 - return sk->sk_prot->close(sk, timeout); 573 - } 574 - 575 - saved_close = psock->saved_close; 576 - tcp_bpf_remove(sk, psock); 577 - rcu_read_unlock(); 578 - release_sock(sk); 579 - saved_close(sk, timeout); 580 - } 581 - 531 + #ifdef CONFIG_BPF_STREAM_PARSER 582 532 enum { 583 533 TCP_BPF_IPV4, 584 534 TCP_BPF_IPV6, ··· 549 599 struct proto *base) 550 600 { 551 601 prot[TCP_BPF_BASE] = *base; 552 - prot[TCP_BPF_BASE].unhash = tcp_bpf_unhash; 553 - prot[TCP_BPF_BASE].close = tcp_bpf_close; 602 + prot[TCP_BPF_BASE].unhash = sock_map_unhash; 603 + prot[TCP_BPF_BASE].close = sock_map_close; 554 604 prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; 555 605 prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; 556 606 ··· 579 629 } 580 630 core_initcall(tcp_bpf_v4_build_proto); 581 631 582 - static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock) 583 - { 584 - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; 585 - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; 586 - 587 - sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]); 588 - } 589 - 590 - static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock) 591 - { 592 - int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; 593 - int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; 594 - 595 - /* Reinit occurs when program types change e.g. TCP_BPF_TX is removed 596 - * or added requiring sk_prot hook updates. We keep original saved 597 - * hooks in this case. 598 - * 599 - * Pairs with lockless read in sk_clone_lock(). 600 - */ 601 - WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]); 602 - } 603 - 604 632 static int tcp_bpf_assert_proto_ops(struct proto *ops) 605 633 { 606 634 /* In order to avoid retpoline, we make assumptions when we call ··· 590 662 ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP; 591 663 } 592 664 593 - void tcp_bpf_reinit(struct sock *sk) 665 + struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) 594 666 { 595 - struct sk_psock *psock; 667 + int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4; 668 + int config = psock->progs.msg_parser ? TCP_BPF_TX : TCP_BPF_BASE; 596 669 597 - sock_owned_by_me(sk); 670 + if (!psock->sk_proto) { 671 + struct proto *ops = READ_ONCE(sk->sk_prot); 598 672 599 - rcu_read_lock(); 600 - psock = sk_psock(sk); 601 - tcp_bpf_reinit_sk_prot(sk, psock); 602 - rcu_read_unlock(); 603 - } 673 + if (tcp_bpf_assert_proto_ops(ops)) 674 + return ERR_PTR(-EINVAL); 604 675 605 - int tcp_bpf_init(struct sock *sk) 606 - { 607 - struct proto *ops = READ_ONCE(sk->sk_prot); 608 - struct sk_psock *psock; 609 - 610 - sock_owned_by_me(sk); 611 - 612 - rcu_read_lock(); 613 - psock = sk_psock(sk); 614 - if (unlikely(!psock || psock->sk_proto || 615 - tcp_bpf_assert_proto_ops(ops))) { 616 - rcu_read_unlock(); 617 - return -EINVAL; 676 + tcp_bpf_check_v6_needs_rebuild(sk, ops); 618 677 } 619 - tcp_bpf_check_v6_needs_rebuild(sk, ops); 620 - tcp_bpf_update_sk_prot(sk, psock); 621 - rcu_read_unlock(); 622 - return 0; 678 + 679 + return &tcp_bpf_prots[family][config]; 623 680 } 624 681 625 682 /* If a child got cloned from a listening socket that had tcp_bpf ··· 620 707 if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE]) 621 708 newsk->sk_prot = sk->sk_prot_creator; 622 709 } 710 + #endif /* CONFIG_BPF_STREAM_PARSER */

-7

net/ipv4/tcp_ulp.c

··· 105 105 { 106 106 struct inet_connection_sock *icsk = inet_csk(sk); 107 107 108 - if (!icsk->icsk_ulp_ops) { 109 - sk->sk_write_space = write_space; 110 - /* Pairs with lockless read in sk_clone_lock() */ 111 - WRITE_ONCE(sk->sk_prot, proto); 112 - return; 113 - } 114 - 115 108 if (icsk->icsk_ulp_ops->update) 116 109 icsk->icsk_ulp_ops->update(sk, proto, write_space); 117 110 }

+53

net/ipv4/udp_bpf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Cloudflare Ltd https://cloudflare.com */ 3 + 4 + #include <linux/skmsg.h> 5 + #include <net/sock.h> 6 + #include <net/udp.h> 7 + 8 + enum { 9 + UDP_BPF_IPV4, 10 + UDP_BPF_IPV6, 11 + UDP_BPF_NUM_PROTS, 12 + }; 13 + 14 + static struct proto *udpv6_prot_saved __read_mostly; 15 + static DEFINE_SPINLOCK(udpv6_prot_lock); 16 + static struct proto udp_bpf_prots[UDP_BPF_NUM_PROTS]; 17 + 18 + static void udp_bpf_rebuild_protos(struct proto *prot, const struct proto *base) 19 + { 20 + *prot = *base; 21 + prot->unhash = sock_map_unhash; 22 + prot->close = sock_map_close; 23 + } 24 + 25 + static void udp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops) 26 + { 27 + if (sk->sk_family == AF_INET6 && 28 + unlikely(ops != smp_load_acquire(&udpv6_prot_saved))) { 29 + spin_lock_bh(&udpv6_prot_lock); 30 + if (likely(ops != udpv6_prot_saved)) { 31 + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV6], ops); 32 + smp_store_release(&udpv6_prot_saved, ops); 33 + } 34 + spin_unlock_bh(&udpv6_prot_lock); 35 + } 36 + } 37 + 38 + static int __init udp_bpf_v4_build_proto(void) 39 + { 40 + udp_bpf_rebuild_protos(&udp_bpf_prots[UDP_BPF_IPV4], &udp_prot); 41 + return 0; 42 + } 43 + core_initcall(udp_bpf_v4_build_proto); 44 + 45 + struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock) 46 + { 47 + int family = sk->sk_family == AF_INET ? UDP_BPF_IPV4 : UDP_BPF_IPV6; 48 + 49 + if (!psock->sk_proto) 50 + udp_bpf_check_v6_needs_rebuild(sk, READ_ONCE(sk->sk_prot)); 51 + 52 + return &udp_bpf_prots[family]; 53 + }

+2

scripts/bpf_helpers_doc.py

··· 400 400 'struct bpf_fib_lookup', 401 401 'struct bpf_perf_event_data', 402 402 'struct bpf_perf_event_value', 403 + 'struct bpf_pidns_info', 403 404 'struct bpf_sock', 404 405 'struct bpf_sock_addr', 405 406 'struct bpf_sock_ops', ··· 436 435 'struct bpf_fib_lookup', 437 436 'struct bpf_perf_event_data', 438 437 'struct bpf_perf_event_value', 438 + 'struct bpf_pidns_info', 439 439 'struct bpf_sock', 440 440 'struct bpf_sock_addr', 441 441 'struct bpf_sock_ops',

+19 -9

scripts/link-vmlinux.sh

··· 63 63 local lds="${objtree}/${KBUILD_LDS}" 64 64 local output=${1} 65 65 local objects 66 + local strip_debug 66 67 67 68 info LD ${output} 68 69 69 70 # skip output file argument 70 71 shift 72 + 73 + # The kallsyms linking does not need debug symbols included. 74 + if [ "$output" != "${output#.tmp_vmlinux.kallsyms}" ] ; then 75 + strip_debug=-Wl,--strip-debug 76 + fi 71 77 72 78 if [ "${SRCARCH}" != "um" ]; then 73 79 objects="--whole-archive \ ··· 85 79 ${@}" 86 80 87 81 ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ 82 + ${strip_debug#-Wl,} \ 88 83 -o ${output} \ 89 84 -T ${lds} ${objects} 90 85 else ··· 98 91 ${@}" 99 92 100 93 ${CC} ${CFLAGS_vmlinux} \ 94 + ${strip_debug} \ 101 95 -o ${output} \ 102 96 -Wl,-T,${lds} \ 103 97 ${objects} \ ··· 114 106 { 115 107 local pahole_ver 116 108 local bin_arch 109 + local bin_format 110 + local bin_file 117 111 118 112 if ! [ -x "$(command -v ${PAHOLE})" ]; then 119 113 echo >&2 "BTF: ${1}: pahole (${PAHOLE}) is not available" ··· 128 118 return 1 129 119 fi 130 120 131 - info "BTF" ${2} 132 121 vmlinux_link ${1} 122 + 123 + info "BTF" ${2} 133 124 LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} 134 125 135 126 # dump .BTF section into raw binary file to link with final vmlinux ··· 138 127 cut -d, -f1 | cut -d' ' -f2) 139 128 bin_format=$(LANG=C ${OBJDUMP} -f ${1} | grep 'file format' | \ 140 129 awk '{print $4}') 130 + bin_file=.btf.vmlinux.bin 141 131 ${OBJCOPY} --change-section-address .BTF=0 \ 142 132 --set-section-flags .BTF=alloc -O binary \ 143 - --only-section=.BTF ${1} .btf.vmlinux.bin 133 + --only-section=.BTF ${1} $bin_file 144 134 ${OBJCOPY} -I binary -O ${bin_format} -B ${bin_arch} \ 145 - --rename-section .data=.BTF .btf.vmlinux.bin ${2} 135 + --rename-section .data=.BTF $bin_file ${2} 146 136 } 147 137 148 138 # Create ${2} .o file with all symbols from the ${1} object file ··· 178 166 kallsyms_step() 179 167 { 180 168 kallsymso_prev=${kallsymso} 181 - kallsymso=.tmp_kallsyms${1}.o 182 - kallsyms_vmlinux=.tmp_vmlinux${1} 169 + kallsyms_vmlinux=.tmp_vmlinux.kallsyms${1} 170 + kallsymso=${kallsyms_vmlinux}.o 183 171 184 172 vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o} 185 173 kallsyms ${kallsyms_vmlinux} ${kallsymso} ··· 202 190 { 203 191 rm -f .btf.* 204 192 rm -f .tmp_System.map 205 - rm -f .tmp_kallsyms* 206 193 rm -f .tmp_vmlinux* 207 194 rm -f System.map 208 195 rm -f vmlinux ··· 268 257 269 258 btf_vmlinux_bin_o="" 270 259 if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then 271 - if gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then 272 - btf_vmlinux_bin_o=.btf.vmlinux.bin.o 273 - else 260 + btf_vmlinux_bin_o=.btf.vmlinux.bin.o 261 + if ! gen_btf .tmp_vmlinux.btf $btf_vmlinux_bin_o ; then 274 262 echo >&2 "Failed to generate BTF for vmlinux" 275 263 echo >&2 "Try to disable CONFIG_DEBUG_INFO_BTF" 276 264 exit 1

+2

tools/bpf/bpftool/.gitignore

··· 1 1 *.d 2 + /_bpftool 2 3 /bpftool 3 4 bpftool*.8 4 5 bpf-helpers.* 5 6 FEATURE-DUMP.bpftool 6 7 feature 7 8 libbpf 9 + profiler.skel.h

+19

tools/bpf/bpftool/Documentation/bpftool-prog.rst

··· 30 30 | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] 31 31 | **bpftool** **prog tracelog** 32 32 | **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] 33 + | **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* 33 34 | **bpftool** **prog help** 34 35 | 35 36 | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } ··· 48 47 | } 49 48 | *ATTACH_TYPE* := { 50 49 | **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** 50 + | } 51 + | *METRIC* := { 52 + | **cycles** | **instructions** | **l1d_loads** | **llc_misses** 51 53 | } 52 54 53 55 ··· 193 189 not all of them can take the **ctx_in**/**ctx_out** 194 190 arguments. bpftool does not perform checks on program types. 195 191 192 + **bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs* 193 + Profile *METRICs* for bpf program *PROG* for *DURATION* 194 + seconds or until user hits Ctrl-C. *DURATION* is optional. 195 + If *DURATION* is not specified, the profiling will run up to 196 + UINT_MAX seconds. 197 + 196 198 **bpftool prog help** 197 199 Print short help message. 198 200 ··· 320 310 xlated 488B jited 336B memlock 4096B map_ids 7 321 311 322 312 **# rm /sys/fs/bpf/xdp1** 313 + 314 + | 315 + | **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses** 316 + 317 + :: 318 + 51397 run_cnt 319 + 40176203 cycles (83.05%) 320 + 42518139 instructions # 1.06 insns per cycle (83.39%) 321 + 123 llc_misses # 2.89 LLC misses per million insns (83.15%) 323 322 324 323 SEE ALSO 325 324 ========

+32 -4

tools/bpf/bpftool/Makefile

··· 59 59 60 60 INSTALL ?= install 61 61 RM ?= rm -f 62 + CLANG ?= clang 62 63 63 64 FEATURE_USER = .bpftool 64 - FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib 65 - FEATURE_DISPLAY = libbfd disassembler-four-args zlib 65 + FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \ 66 + clang-bpf-global-var 67 + FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var 66 68 67 69 check_feat := 1 68 70 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall ··· 112 110 endif 113 111 114 112 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o 113 + _OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o 114 + 115 + ifeq ($(feature-clang-bpf-global-var),1) 116 + __OBJS = $(OBJS) 117 + else 118 + __OBJS = $(_OBJS) 119 + endif 120 + 121 + $(OUTPUT)_prog.o: prog.c 122 + $(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< 123 + 124 + $(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) 125 + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) 126 + 127 + skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) 128 + $(QUIET_CLANG)$(CLANG) \ 129 + -I$(srctree)/tools/include/uapi/ \ 130 + -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ 131 + -g -O2 -target bpf -c $< -o $@ 132 + 133 + profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o 134 + $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ 135 + 136 + $(OUTPUT)prog.o: prog.c profiler.skel.h 137 + $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< 115 138 116 139 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c 117 140 $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< 118 141 119 142 $(OUTPUT)feature.o: | zdep 120 143 121 - $(OUTPUT)bpftool: $(OBJS) $(LIBBPF) 122 - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) 144 + $(OUTPUT)bpftool: $(__OBJS) $(LIBBPF) 145 + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS) 123 146 124 147 $(OUTPUT)%.o: %.c 125 148 $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $< ··· 152 125 clean: $(LIBBPF)-clean 153 126 $(call QUIET_CLEAN, bpftool) 154 127 $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d 128 + $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o 155 129 $(Q)$(RM) -r -- $(OUTPUT)libbpf/ 156 130 $(call QUIET_CLEAN, core-gen) 157 131 $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool

+66 -10

tools/bpf/bpftool/bash-completion/bpftool

··· 337 337 338 338 local PROG_TYPE='id pinned tag name' 339 339 local MAP_TYPE='id pinned name' 340 + local METRIC_TYPE='cycles instructions l1d_loads llc_misses' 340 341 case $command in 341 342 show|list) 342 343 [[ $prev != "$command" ]] && return 0 ··· 389 388 _bpftool_get_prog_ids 390 389 ;; 391 390 name) 392 - _bpftool_get_map_names 391 + _bpftool_get_prog_names 393 392 ;; 394 393 pinned) 395 394 _filedir ··· 499 498 tracelog) 500 499 return 0 501 500 ;; 501 + profile) 502 + case $cword in 503 + 3) 504 + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) 505 + return 0 506 + ;; 507 + 4) 508 + case $prev in 509 + id) 510 + _bpftool_get_prog_ids 511 + ;; 512 + name) 513 + _bpftool_get_prog_names 514 + ;; 515 + pinned) 516 + _filedir 517 + ;; 518 + esac 519 + return 0 520 + ;; 521 + 5) 522 + COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) ) 523 + return 0 524 + ;; 525 + 6) 526 + case $prev in 527 + duration) 528 + return 0 529 + ;; 530 + *) 531 + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) 532 + return 0 533 + ;; 534 + esac 535 + return 0 536 + ;; 537 + *) 538 + COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) ) 539 + return 0 540 + ;; 541 + esac 542 + ;; 502 543 run) 503 - if [[ ${#words[@]} -lt 5 ]]; then 504 - _filedir 544 + if [[ ${#words[@]} -eq 4 ]]; then 545 + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) 505 546 return 0 506 547 fi 507 548 case $prev in 508 549 id) 509 550 _bpftool_get_prog_ids 551 + return 0 552 + ;; 553 + name) 554 + _bpftool_get_prog_names 510 555 return 0 511 556 ;; 512 557 data_in|data_out|ctx_in|ctx_out) ··· 572 525 *) 573 526 [[ $prev == $object ]] && \ 574 527 COMPREPLY=( $( compgen -W 'dump help pin attach detach \ 575 - load loadall show list tracelog run' -- "$cur" ) ) 528 + load loadall show list tracelog run profile' -- "$cur" ) ) 576 529 ;; 577 530 esac 578 531 ;; ··· 760 713 esac 761 714 ;; 762 715 pin) 763 - if [[ $prev == "$command" ]]; then 764 - COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) ) 765 - else 766 - _filedir 767 - fi 716 + case $prev in 717 + $command) 718 + COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) ) 719 + ;; 720 + id) 721 + _bpftool_get_map_ids 722 + ;; 723 + name) 724 + _bpftool_get_map_names 725 + ;; 726 + esac 768 727 return 0 769 728 ;; 770 729 event_pipe) ··· 897 844 case $command in 898 845 skeleton) 899 846 _filedir 900 - ;; 847 + ;; 901 848 *) 902 849 [[ $prev == $object ]] && \ 903 850 COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) ) ··· 996 943 case $prev in 997 944 id) 998 945 _bpftool_get_prog_ids 946 + ;; 947 + name) 948 + _bpftool_get_prog_names 999 949 ;; 1000 950 pinned) 1001 951 _filedir

+5

tools/bpf/bpftool/btf.c

··· 389 389 if (IS_ERR(d)) 390 390 return PTR_ERR(d); 391 391 392 + printf("#ifndef __VMLINUX_H__\n"); 393 + printf("#define __VMLINUX_H__\n"); 394 + printf("\n"); 392 395 printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); 393 396 printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); 394 397 printf("#endif\n\n"); ··· 415 412 printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); 416 413 printf("#pragma clang attribute pop\n"); 417 414 printf("#endif\n"); 415 + printf("\n"); 416 + printf("#endif /* __VMLINUX_H__ */\n"); 418 417 419 418 done: 420 419 btf_dump__free(d);

+11 -29

tools/bpf/bpftool/common.c

··· 211 211 return err; 212 212 } 213 213 214 - int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)) 214 + int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) 215 215 { 216 - unsigned int id; 217 - char *endptr; 218 216 int err; 219 217 int fd; 220 218 221 - if (argc < 3) { 222 - p_err("too few arguments, id ID and FILE path is required"); 223 - return -1; 224 - } else if (argc > 3) { 225 - p_err("too many arguments"); 226 - return -1; 227 - } 228 - 229 - if (!is_prefix(*argv, "id")) { 230 - p_err("expected 'id' got %s", *argv); 231 - return -1; 232 - } 233 - NEXT_ARG(); 234 - 235 - id = strtoul(*argv, &endptr, 0); 236 - if (*endptr) { 237 - p_err("can't parse %s as ID", *argv); 238 - return -1; 239 - } 240 - NEXT_ARG(); 241 - 242 - fd = get_fd_by_id(id); 243 - if (fd < 0) { 244 - p_err("can't open object by id (%u): %s", id, strerror(errno)); 245 - return -1; 246 - } 219 + fd = get_fd(&argc, &argv); 220 + if (fd < 0) 221 + return fd; 247 222 248 223 err = do_pin_fd(fd, *argv); 249 224 ··· 571 596 NEXT_ARGP(); 572 597 573 598 return 0; 599 + } 600 + 601 + int __printf(2, 0) 602 + print_all_levels(__maybe_unused enum libbpf_print_level level, 603 + const char *format, va_list args) 604 + { 605 + return vfprintf(stderr, format, args); 574 606 }

-7

tools/bpf/bpftool/main.c

··· 79 79 return 0; 80 80 } 81 81 82 - static int __printf(2, 0) 83 - print_all_levels(__maybe_unused enum libbpf_print_level level, 84 - const char *format, va_list args) 85 - { 86 - return vfprintf(stderr, format, args); 87 - } 88 - 89 82 int cmd_select(const struct cmd *cmds, int argc, char **argv, 90 83 int (*help)(int argc, char **argv)) 91 84 {

+6 -1

tools/bpf/bpftool/main.h

··· 14 14 #include <linux/hashtable.h> 15 15 #include <tools/libc_compat.h> 16 16 17 + #include <bpf/libbpf.h> 18 + 17 19 #include "json_writer.h" 18 20 19 21 #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) ··· 148 146 int open_obj_pinned(char *path, bool quiet); 149 147 int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type); 150 148 int mount_bpffs_for_pin(const char *name); 151 - int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32)); 149 + int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); 152 150 int do_pin_fd(int fd, const char *name); 153 151 154 152 int do_prog(int argc, char **arg); ··· 231 229 int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb); 232 230 int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind, 233 231 const char *devname, int ifindex); 232 + 233 + int print_all_levels(__maybe_unused enum libbpf_print_level level, 234 + const char *format, va_list args); 234 235 #endif

+1 -1

tools/bpf/bpftool/map.c

··· 1384 1384 { 1385 1385 int err; 1386 1386 1387 - err = do_pin_any(argc, argv, bpf_map_get_fd_by_id); 1387 + err = do_pin_any(argc, argv, map_parse_fd); 1388 1388 if (!err && json_output) 1389 1389 jsonw_null(json_wtr); 1390 1390 return err;

+448 -6

tools/bpf/bpftool/prog.c

··· 4 4 #define _GNU_SOURCE 5 5 #include <errno.h> 6 6 #include <fcntl.h> 7 + #include <signal.h> 7 8 #include <stdarg.h> 8 9 #include <stdio.h> 9 10 #include <stdlib.h> ··· 12 11 #include <time.h> 13 12 #include <unistd.h> 14 13 #include <net/if.h> 14 + #include <sys/ioctl.h> 15 15 #include <sys/types.h> 16 16 #include <sys/stat.h> 17 + #include <sys/syscall.h> 17 18 18 19 #include <linux/err.h> 20 + #include <linux/perf_event.h> 19 21 #include <linux/sizes.h> 20 22 21 23 #include <bpf/bpf.h> ··· 813 809 { 814 810 int err; 815 811 816 - err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id); 812 + err = do_pin_any(argc, argv, prog_parse_fd); 817 813 if (!err && json_output) 818 814 jsonw_null(json_wtr); 819 815 return err; ··· 1247 1243 return err; 1248 1244 } 1249 1245 1246 + static int 1247 + get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, 1248 + enum bpf_attach_type *expected_attach_type) 1249 + { 1250 + libbpf_print_fn_t print_backup; 1251 + int ret; 1252 + 1253 + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); 1254 + if (!ret) 1255 + return ret; 1256 + 1257 + /* libbpf_prog_type_by_name() failed, let's re-run with debug level */ 1258 + print_backup = libbpf_set_print(print_all_levels); 1259 + ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type); 1260 + libbpf_set_print(print_backup); 1261 + 1262 + return ret; 1263 + } 1264 + 1250 1265 static int load_with_options(int argc, char **argv, bool first_prog_only) 1251 1266 { 1252 1267 enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; ··· 1315 1292 strcat(type, *argv); 1316 1293 strcat(type, "/"); 1317 1294 1318 - err = libbpf_prog_type_by_name(type, &common_prog_type, 1319 - &expected_attach_type); 1295 + err = get_prog_type_by_name(type, &common_prog_type, 1296 + &expected_attach_type); 1320 1297 free(type); 1321 1298 if (err < 0) 1322 1299 goto err_free_reuse_maps; ··· 1415 1392 if (prog_type == BPF_PROG_TYPE_UNSPEC) { 1416 1393 const char *sec_name = bpf_program__title(pos, false); 1417 1394 1418 - err = libbpf_prog_type_by_name(sec_name, &prog_type, 1419 - &expected_attach_type); 1395 + err = get_prog_type_by_name(sec_name, &prog_type, 1396 + &expected_attach_type); 1420 1397 if (err < 0) 1421 1398 goto err_close_obj; 1422 1399 } ··· 1560 1537 return load_with_options(argc, argv, false); 1561 1538 } 1562 1539 1540 + #ifdef BPFTOOL_WITHOUT_SKELETONS 1541 + 1542 + static int do_profile(int argc, char **argv) 1543 + { 1544 + p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0"); 1545 + return 0; 1546 + } 1547 + 1548 + #else /* BPFTOOL_WITHOUT_SKELETONS */ 1549 + 1550 + #include "profiler.skel.h" 1551 + 1552 + struct profile_metric { 1553 + const char *name; 1554 + struct bpf_perf_event_value val; 1555 + struct perf_event_attr attr; 1556 + bool selected; 1557 + 1558 + /* calculate ratios like instructions per cycle */ 1559 + const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */ 1560 + const char *ratio_desc; 1561 + const float ratio_mul; 1562 + } metrics[] = { 1563 + { 1564 + .name = "cycles", 1565 + .attr = { 1566 + .type = PERF_TYPE_HARDWARE, 1567 + .config = PERF_COUNT_HW_CPU_CYCLES, 1568 + .exclude_user = 1, 1569 + }, 1570 + }, 1571 + { 1572 + .name = "instructions", 1573 + .attr = { 1574 + .type = PERF_TYPE_HARDWARE, 1575 + .config = PERF_COUNT_HW_INSTRUCTIONS, 1576 + .exclude_user = 1, 1577 + }, 1578 + .ratio_metric = 1, 1579 + .ratio_desc = "insns per cycle", 1580 + .ratio_mul = 1.0, 1581 + }, 1582 + { 1583 + .name = "l1d_loads", 1584 + .attr = { 1585 + .type = PERF_TYPE_HW_CACHE, 1586 + .config = 1587 + PERF_COUNT_HW_CACHE_L1D | 1588 + (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1589 + (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16), 1590 + .exclude_user = 1, 1591 + }, 1592 + }, 1593 + { 1594 + .name = "llc_misses", 1595 + .attr = { 1596 + .type = PERF_TYPE_HW_CACHE, 1597 + .config = 1598 + PERF_COUNT_HW_CACHE_LL | 1599 + (PERF_COUNT_HW_CACHE_OP_READ << 8) | 1600 + (PERF_COUNT_HW_CACHE_RESULT_MISS << 16), 1601 + .exclude_user = 1 1602 + }, 1603 + .ratio_metric = 2, 1604 + .ratio_desc = "LLC misses per million insns", 1605 + .ratio_mul = 1e6, 1606 + }, 1607 + }; 1608 + 1609 + static __u64 profile_total_count; 1610 + 1611 + #define MAX_NUM_PROFILE_METRICS 4 1612 + 1613 + static int profile_parse_metrics(int argc, char **argv) 1614 + { 1615 + unsigned int metric_cnt; 1616 + int selected_cnt = 0; 1617 + unsigned int i; 1618 + 1619 + metric_cnt = sizeof(metrics) / sizeof(struct profile_metric); 1620 + 1621 + while (argc > 0) { 1622 + for (i = 0; i < metric_cnt; i++) { 1623 + if (is_prefix(argv[0], metrics[i].name)) { 1624 + if (!metrics[i].selected) 1625 + selected_cnt++; 1626 + metrics[i].selected = true; 1627 + break; 1628 + } 1629 + } 1630 + if (i == metric_cnt) { 1631 + p_err("unknown metric %s", argv[0]); 1632 + return -1; 1633 + } 1634 + NEXT_ARG(); 1635 + } 1636 + if (selected_cnt > MAX_NUM_PROFILE_METRICS) { 1637 + p_err("too many (%d) metrics, please specify no more than %d metrics at at time", 1638 + selected_cnt, MAX_NUM_PROFILE_METRICS); 1639 + return -1; 1640 + } 1641 + return selected_cnt; 1642 + } 1643 + 1644 + static void profile_read_values(struct profiler_bpf *obj) 1645 + { 1646 + __u32 m, cpu, num_cpu = obj->rodata->num_cpu; 1647 + int reading_map_fd, count_map_fd; 1648 + __u64 counts[num_cpu]; 1649 + __u32 key = 0; 1650 + int err; 1651 + 1652 + reading_map_fd = bpf_map__fd(obj->maps.accum_readings); 1653 + count_map_fd = bpf_map__fd(obj->maps.counts); 1654 + if (reading_map_fd < 0 || count_map_fd < 0) { 1655 + p_err("failed to get fd for map"); 1656 + return; 1657 + } 1658 + 1659 + err = bpf_map_lookup_elem(count_map_fd, &key, counts); 1660 + if (err) { 1661 + p_err("failed to read count_map: %s", strerror(errno)); 1662 + return; 1663 + } 1664 + 1665 + profile_total_count = 0; 1666 + for (cpu = 0; cpu < num_cpu; cpu++) 1667 + profile_total_count += counts[cpu]; 1668 + 1669 + for (m = 0; m < ARRAY_SIZE(metrics); m++) { 1670 + struct bpf_perf_event_value values[num_cpu]; 1671 + 1672 + if (!metrics[m].selected) 1673 + continue; 1674 + 1675 + err = bpf_map_lookup_elem(reading_map_fd, &key, values); 1676 + if (err) { 1677 + p_err("failed to read reading_map: %s", 1678 + strerror(errno)); 1679 + return; 1680 + } 1681 + for (cpu = 0; cpu < num_cpu; cpu++) { 1682 + metrics[m].val.counter += values[cpu].counter; 1683 + metrics[m].val.enabled += values[cpu].enabled; 1684 + metrics[m].val.running += values[cpu].running; 1685 + } 1686 + key++; 1687 + } 1688 + } 1689 + 1690 + static void profile_print_readings_json(void) 1691 + { 1692 + __u32 m; 1693 + 1694 + jsonw_start_array(json_wtr); 1695 + for (m = 0; m < ARRAY_SIZE(metrics); m++) { 1696 + if (!metrics[m].selected) 1697 + continue; 1698 + jsonw_start_object(json_wtr); 1699 + jsonw_string_field(json_wtr, "metric", metrics[m].name); 1700 + jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count); 1701 + jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter); 1702 + jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled); 1703 + jsonw_lluint_field(json_wtr, "running", metrics[m].val.running); 1704 + 1705 + jsonw_end_object(json_wtr); 1706 + } 1707 + jsonw_end_array(json_wtr); 1708 + } 1709 + 1710 + static void profile_print_readings_plain(void) 1711 + { 1712 + __u32 m; 1713 + 1714 + printf("\n%18llu %-20s\n", profile_total_count, "run_cnt"); 1715 + for (m = 0; m < ARRAY_SIZE(metrics); m++) { 1716 + struct bpf_perf_event_value *val = &metrics[m].val; 1717 + int r; 1718 + 1719 + if (!metrics[m].selected) 1720 + continue; 1721 + printf("%18llu %-20s", val->counter, metrics[m].name); 1722 + 1723 + r = metrics[m].ratio_metric - 1; 1724 + if (r >= 0 && metrics[r].selected && 1725 + metrics[r].val.counter > 0) { 1726 + printf("# %8.2f %-30s", 1727 + val->counter * metrics[m].ratio_mul / 1728 + metrics[r].val.counter, 1729 + metrics[m].ratio_desc); 1730 + } else { 1731 + printf("%-41s", ""); 1732 + } 1733 + 1734 + if (val->enabled > val->running) 1735 + printf("(%4.2f%%)", 1736 + val->running * 100.0 / val->enabled); 1737 + printf("\n"); 1738 + } 1739 + } 1740 + 1741 + static void profile_print_readings(void) 1742 + { 1743 + if (json_output) 1744 + profile_print_readings_json(); 1745 + else 1746 + profile_print_readings_plain(); 1747 + } 1748 + 1749 + static char *profile_target_name(int tgt_fd) 1750 + { 1751 + struct bpf_prog_info_linear *info_linear; 1752 + struct bpf_func_info *func_info; 1753 + const struct btf_type *t; 1754 + char *name = NULL; 1755 + struct btf *btf; 1756 + 1757 + info_linear = bpf_program__get_prog_info_linear( 1758 + tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); 1759 + if (IS_ERR_OR_NULL(info_linear)) { 1760 + p_err("failed to get info_linear for prog FD %d", tgt_fd); 1761 + return NULL; 1762 + } 1763 + 1764 + if (info_linear->info.btf_id == 0 || 1765 + btf__get_from_id(info_linear->info.btf_id, &btf)) { 1766 + p_err("prog FD %d doesn't have valid btf", tgt_fd); 1767 + goto out; 1768 + } 1769 + 1770 + func_info = (struct bpf_func_info *)(info_linear->info.func_info); 1771 + t = btf__type_by_id(btf, func_info[0].type_id); 1772 + if (!t) { 1773 + p_err("btf %d doesn't have type %d", 1774 + info_linear->info.btf_id, func_info[0].type_id); 1775 + goto out; 1776 + } 1777 + name = strdup(btf__name_by_offset(btf, t->name_off)); 1778 + out: 1779 + free(info_linear); 1780 + return name; 1781 + } 1782 + 1783 + static struct profiler_bpf *profile_obj; 1784 + static int profile_tgt_fd = -1; 1785 + static char *profile_tgt_name; 1786 + static int *profile_perf_events; 1787 + static int profile_perf_event_cnt; 1788 + 1789 + static void profile_close_perf_events(struct profiler_bpf *obj) 1790 + { 1791 + int i; 1792 + 1793 + for (i = profile_perf_event_cnt - 1; i >= 0; i--) 1794 + close(profile_perf_events[i]); 1795 + 1796 + free(profile_perf_events); 1797 + profile_perf_event_cnt = 0; 1798 + } 1799 + 1800 + static int profile_open_perf_events(struct profiler_bpf *obj) 1801 + { 1802 + unsigned int cpu, m; 1803 + int map_fd, pmu_fd; 1804 + 1805 + profile_perf_events = calloc( 1806 + sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); 1807 + if (!profile_perf_events) { 1808 + p_err("failed to allocate memory for perf_event array: %s", 1809 + strerror(errno)); 1810 + return -1; 1811 + } 1812 + map_fd = bpf_map__fd(obj->maps.events); 1813 + if (map_fd < 0) { 1814 + p_err("failed to get fd for events map"); 1815 + return -1; 1816 + } 1817 + 1818 + for (m = 0; m < ARRAY_SIZE(metrics); m++) { 1819 + if (!metrics[m].selected) 1820 + continue; 1821 + for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { 1822 + pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, 1823 + -1/*pid*/, cpu, -1/*group_fd*/, 0); 1824 + if (pmu_fd < 0 || 1825 + bpf_map_update_elem(map_fd, &profile_perf_event_cnt, 1826 + &pmu_fd, BPF_ANY) || 1827 + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { 1828 + p_err("failed to create event %s on cpu %d", 1829 + metrics[m].name, cpu); 1830 + return -1; 1831 + } 1832 + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; 1833 + } 1834 + } 1835 + return 0; 1836 + } 1837 + 1838 + static void profile_print_and_cleanup(void) 1839 + { 1840 + profile_close_perf_events(profile_obj); 1841 + profile_read_values(profile_obj); 1842 + profile_print_readings(); 1843 + profiler_bpf__destroy(profile_obj); 1844 + 1845 + close(profile_tgt_fd); 1846 + free(profile_tgt_name); 1847 + } 1848 + 1849 + static void int_exit(int signo) 1850 + { 1851 + profile_print_and_cleanup(); 1852 + exit(0); 1853 + } 1854 + 1855 + static int do_profile(int argc, char **argv) 1856 + { 1857 + int num_metric, num_cpu, err = -1; 1858 + struct bpf_program *prog; 1859 + unsigned long duration; 1860 + char *endptr; 1861 + 1862 + /* we at least need two args for the prog and one metric */ 1863 + if (!REQ_ARGS(3)) 1864 + return -EINVAL; 1865 + 1866 + /* parse target fd */ 1867 + profile_tgt_fd = prog_parse_fd(&argc, &argv); 1868 + if (profile_tgt_fd < 0) { 1869 + p_err("failed to parse fd"); 1870 + return -1; 1871 + } 1872 + 1873 + /* parse profiling optional duration */ 1874 + if (argc > 2 && is_prefix(argv[0], "duration")) { 1875 + NEXT_ARG(); 1876 + duration = strtoul(*argv, &endptr, 0); 1877 + if (*endptr) 1878 + usage(); 1879 + NEXT_ARG(); 1880 + } else { 1881 + duration = UINT_MAX; 1882 + } 1883 + 1884 + num_metric = profile_parse_metrics(argc, argv); 1885 + if (num_metric <= 0) 1886 + goto out; 1887 + 1888 + num_cpu = libbpf_num_possible_cpus(); 1889 + if (num_cpu <= 0) { 1890 + p_err("failed to identify number of CPUs"); 1891 + goto out; 1892 + } 1893 + 1894 + profile_obj = profiler_bpf__open(); 1895 + if (!profile_obj) { 1896 + p_err("failed to open and/or load BPF object"); 1897 + goto out; 1898 + } 1899 + 1900 + profile_obj->rodata->num_cpu = num_cpu; 1901 + profile_obj->rodata->num_metric = num_metric; 1902 + 1903 + /* adjust map sizes */ 1904 + bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu); 1905 + bpf_map__resize(profile_obj->maps.fentry_readings, num_metric); 1906 + bpf_map__resize(profile_obj->maps.accum_readings, num_metric); 1907 + bpf_map__resize(profile_obj->maps.counts, 1); 1908 + 1909 + /* change target name */ 1910 + profile_tgt_name = profile_target_name(profile_tgt_fd); 1911 + if (!profile_tgt_name) 1912 + goto out; 1913 + 1914 + bpf_object__for_each_program(prog, profile_obj->obj) { 1915 + err = bpf_program__set_attach_target(prog, profile_tgt_fd, 1916 + profile_tgt_name); 1917 + if (err) { 1918 + p_err("failed to set attach target\n"); 1919 + goto out; 1920 + } 1921 + } 1922 + 1923 + set_max_rlimit(); 1924 + err = profiler_bpf__load(profile_obj); 1925 + if (err) { 1926 + p_err("failed to load profile_obj"); 1927 + goto out; 1928 + } 1929 + 1930 + err = profile_open_perf_events(profile_obj); 1931 + if (err) 1932 + goto out; 1933 + 1934 + err = profiler_bpf__attach(profile_obj); 1935 + if (err) { 1936 + p_err("failed to attach profile_obj"); 1937 + goto out; 1938 + } 1939 + signal(SIGINT, int_exit); 1940 + 1941 + sleep(duration); 1942 + profile_print_and_cleanup(); 1943 + return 0; 1944 + 1945 + out: 1946 + profile_close_perf_events(profile_obj); 1947 + if (profile_obj) 1948 + profiler_bpf__destroy(profile_obj); 1949 + close(profile_tgt_fd); 1950 + free(profile_tgt_name); 1951 + return err; 1952 + } 1953 + 1954 + #endif /* BPFTOOL_WITHOUT_SKELETONS */ 1955 + 1563 1956 static int do_help(int argc, char **argv) 1564 1957 { 1565 1958 if (json_output) { ··· 1999 1560 " [data_out FILE [data_size_out L]] \\\n" 2000 1561 " [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n" 2001 1562 " [repeat N]\n" 1563 + " %s %s profile PROG [duration DURATION] METRICs\n" 2002 1564 " %s %s tracelog\n" 2003 1565 " %s %s help\n" 2004 1566 "\n" ··· 2017 1577 " struct_ops | fentry | fexit | freplace }\n" 2018 1578 " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" 2019 1579 " flow_dissector }\n" 1580 + " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n" 2020 1581 " " HELP_SPEC_OPTIONS "\n" 2021 1582 "", 2022 1583 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], 2023 1584 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], 2024 1585 bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], 2025 - bin_name, argv[-2]); 1586 + bin_name, argv[-2], bin_name, argv[-2]); 2026 1587 2027 1588 return 0; 2028 1589 } ··· 2040 1599 { "detach", do_detach }, 2041 1600 { "tracelog", do_tracelog }, 2042 1601 { "run", do_run }, 1602 + { "profile", do_profile }, 2043 1603 { 0 } 2044 1604 }; 2045 1605

+119

tools/bpf/bpftool/skeleton/profiler.bpf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2020 Facebook 3 + #include "profiler.h" 4 + #include <linux/bpf.h> 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 7 + 8 + /* map of perf event fds, num_cpu * num_metric entries */ 9 + struct { 10 + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 11 + __uint(key_size, sizeof(u32)); 12 + __uint(value_size, sizeof(int)); 13 + } events SEC(".maps"); 14 + 15 + /* readings at fentry */ 16 + struct { 17 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 18 + __uint(key_size, sizeof(u32)); 19 + __uint(value_size, sizeof(struct bpf_perf_event_value)); 20 + } fentry_readings SEC(".maps"); 21 + 22 + /* accumulated readings */ 23 + struct { 24 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 25 + __uint(key_size, sizeof(u32)); 26 + __uint(value_size, sizeof(struct bpf_perf_event_value)); 27 + } accum_readings SEC(".maps"); 28 + 29 + /* sample counts, one per cpu */ 30 + struct { 31 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 32 + __uint(key_size, sizeof(u32)); 33 + __uint(value_size, sizeof(u64)); 34 + } counts SEC(".maps"); 35 + 36 + const volatile __u32 num_cpu = 1; 37 + const volatile __u32 num_metric = 1; 38 + #define MAX_NUM_MATRICS 4 39 + 40 + SEC("fentry/XXX") 41 + int BPF_PROG(fentry_XXX) 42 + { 43 + struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS]; 44 + u32 key = bpf_get_smp_processor_id(); 45 + u32 i; 46 + 47 + /* look up before reading, to reduce error */ 48 + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { 49 + u32 flag = i; 50 + 51 + ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag); 52 + if (!ptrs[i]) 53 + return 0; 54 + } 55 + 56 + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { 57 + struct bpf_perf_event_value reading; 58 + int err; 59 + 60 + err = bpf_perf_event_read_value(&events, key, &reading, 61 + sizeof(reading)); 62 + if (err) 63 + return 0; 64 + *(ptrs[i]) = reading; 65 + key += num_cpu; 66 + } 67 + 68 + return 0; 69 + } 70 + 71 + static inline void 72 + fexit_update_maps(u32 id, struct bpf_perf_event_value *after) 73 + { 74 + struct bpf_perf_event_value *before, diff, *accum; 75 + 76 + before = bpf_map_lookup_elem(&fentry_readings, &id); 77 + /* only account samples with a valid fentry_reading */ 78 + if (before && before->counter) { 79 + struct bpf_perf_event_value *accum; 80 + 81 + diff.counter = after->counter - before->counter; 82 + diff.enabled = after->enabled - before->enabled; 83 + diff.running = after->running - before->running; 84 + 85 + accum = bpf_map_lookup_elem(&accum_readings, &id); 86 + if (accum) { 87 + accum->counter += diff.counter; 88 + accum->enabled += diff.enabled; 89 + accum->running += diff.running; 90 + } 91 + } 92 + } 93 + 94 + SEC("fexit/XXX") 95 + int BPF_PROG(fexit_XXX) 96 + { 97 + struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; 98 + u32 cpu = bpf_get_smp_processor_id(); 99 + u32 i, one = 1, zero = 0; 100 + int err; 101 + u64 *count; 102 + 103 + /* read all events before updating the maps, to reduce error */ 104 + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) { 105 + err = bpf_perf_event_read_value(&events, cpu + i * num_cpu, 106 + readings + i, sizeof(*readings)); 107 + if (err) 108 + return 0; 109 + } 110 + count = bpf_map_lookup_elem(&counts, &zero); 111 + if (count) { 112 + *count += 1; 113 + for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) 114 + fexit_update_maps(i, &readings[i]); 115 + } 116 + return 0; 117 + } 118 + 119 + char LICENSE[] SEC("license") = "GPL";

+46

tools/bpf/bpftool/skeleton/profiler.h

··· 1 + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 + #ifndef __PROFILER_H 3 + #define __PROFILER_H 4 + 5 + /* useful typedefs from vmlinux.h */ 6 + 7 + typedef signed char __s8; 8 + typedef unsigned char __u8; 9 + typedef short int __s16; 10 + typedef short unsigned int __u16; 11 + typedef int __s32; 12 + typedef unsigned int __u32; 13 + typedef long long int __s64; 14 + typedef long long unsigned int __u64; 15 + 16 + typedef __s8 s8; 17 + typedef __u8 u8; 18 + typedef __s16 s16; 19 + typedef __u16 u16; 20 + typedef __s32 s32; 21 + typedef __u32 u32; 22 + typedef __s64 s64; 23 + typedef __u64 u64; 24 + 25 + enum { 26 + false = 0, 27 + true = 1, 28 + }; 29 + 30 + #ifdef __CHECKER__ 31 + #define __bitwise__ __attribute__((bitwise)) 32 + #else 33 + #define __bitwise__ 34 + #endif 35 + 36 + typedef __u16 __bitwise__ __le16; 37 + typedef __u16 __bitwise__ __be16; 38 + typedef __u32 __bitwise__ __le32; 39 + typedef __u32 __bitwise__ __be32; 40 + typedef __u64 __bitwise__ __le64; 41 + typedef __u64 __bitwise__ __be64; 42 + 43 + typedef __u16 __bitwise__ __sum16; 44 + typedef __u32 __bitwise__ __wsum; 45 + 46 + #endif /* __PROFILER_H */

+1 -3

tools/bpf/runqslower/runqslower.bpf.c

··· 5 5 #include "runqslower.h" 6 6 7 7 #define TASK_RUNNING 0 8 - 9 - #define BPF_F_INDEX_MASK 0xffffffffULL 10 - #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK 8 + #define BPF_F_CURRENT_CPU 0xffffffffULL 11 9 12 10 const volatile __u64 min_us = 0; 13 11 const volatile pid_t targ_pid = 0;

+8 -1

tools/build/feature/Makefile

··· 67 67 test-llvm.bin \ 68 68 test-llvm-version.bin \ 69 69 test-libaio.bin \ 70 - test-libzstd.bin 70 + test-libzstd.bin \ 71 + test-clang-bpf-global-var.bin 71 72 72 73 FILES := $(addprefix $(OUTPUT),$(FILES)) 73 74 ··· 76 75 CXX ?= $(CROSS_COMPILE)g++ 77 76 PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config 78 77 LLVM_CONFIG ?= llvm-config 78 + CLANG ?= clang 79 79 80 80 all: $(FILES) 81 81 ··· 322 320 323 321 $(OUTPUT)test-libzstd.bin: 324 322 $(BUILD) -lzstd 323 + 324 + $(OUTPUT)test-clang-bpf-global-var.bin: 325 + $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ 326 + grep BTF_KIND_VAR 327 + 325 328 326 329 ############################### 327 330

+4

tools/build/feature/test-clang-bpf-global-var.c

+155 -68

tools/include/uapi/linux/bpf.h

··· 73 73 /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ 74 74 struct bpf_lpm_trie_key { 75 75 __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ 76 - __u8 data[0]; /* Arbitrary size */ 76 + __u8 data[]; /* Arbitrary size */ 77 77 }; 78 78 79 79 struct bpf_cgroup_storage_key { ··· 210 210 BPF_TRACE_RAW_TP, 211 211 BPF_TRACE_FENTRY, 212 212 BPF_TRACE_FEXIT, 213 + BPF_MODIFY_RETURN, 213 214 __MAX_BPF_ATTACH_TYPE 214 215 }; 215 216 ··· 326 325 #define BPF_PSEUDO_CALL 1 327 326 328 327 /* flags for BPF_MAP_UPDATE_ELEM command */ 329 - #define BPF_ANY 0 /* create new element or update existing */ 330 - #define BPF_NOEXIST 1 /* create new element if it didn't exist */ 331 - #define BPF_EXIST 2 /* update existing element */ 332 - #define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ 328 + enum { 329 + BPF_ANY = 0, /* create new element or update existing */ 330 + BPF_NOEXIST = 1, /* create new element if it didn't exist */ 331 + BPF_EXIST = 2, /* update existing element */ 332 + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ 333 + }; 333 334 334 335 /* flags for BPF_MAP_CREATE command */ 335 - #define BPF_F_NO_PREALLOC (1U << 0) 336 + enum { 337 + BPF_F_NO_PREALLOC = (1U << 0), 336 338 /* Instead of having one common LRU list in the 337 339 * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list 338 340 * which can scale and perform better. 339 341 * Note, the LRU nodes (including free nodes) cannot be moved 340 342 * across different LRU lists. 341 343 */ 342 - #define BPF_F_NO_COMMON_LRU (1U << 1) 344 + BPF_F_NO_COMMON_LRU = (1U << 1), 343 345 /* Specify numa node during map creation */ 344 - #define BPF_F_NUMA_NODE (1U << 2) 345 - 346 - #define BPF_OBJ_NAME_LEN 16U 346 + BPF_F_NUMA_NODE = (1U << 2), 347 347 348 348 /* Flags for accessing BPF object from syscall side. */ 349 - #define BPF_F_RDONLY (1U << 3) 350 - #define BPF_F_WRONLY (1U << 4) 349 + BPF_F_RDONLY = (1U << 3), 350 + BPF_F_WRONLY = (1U << 4), 351 351 352 352 /* Flag for stack_map, store build_id+offset instead of pointer */ 353 - #define BPF_F_STACK_BUILD_ID (1U << 5) 353 + BPF_F_STACK_BUILD_ID = (1U << 5), 354 354 355 355 /* Zero-initialize hash function seed. This should only be used for testing. */ 356 - #define BPF_F_ZERO_SEED (1U << 6) 356 + BPF_F_ZERO_SEED = (1U << 6), 357 357 358 358 /* Flags for accessing BPF object from program side. */ 359 - #define BPF_F_RDONLY_PROG (1U << 7) 360 - #define BPF_F_WRONLY_PROG (1U << 8) 359 + BPF_F_RDONLY_PROG = (1U << 7), 360 + BPF_F_WRONLY_PROG = (1U << 8), 361 361 362 362 /* Clone map from listener for newly accepted socket */ 363 - #define BPF_F_CLONE (1U << 9) 363 + BPF_F_CLONE = (1U << 9), 364 364 365 365 /* Enable memory-mapping BPF map */ 366 - #define BPF_F_MMAPABLE (1U << 10) 366 + BPF_F_MMAPABLE = (1U << 10), 367 + }; 367 368 368 369 /* Flags for BPF_PROG_QUERY. */ 369 370 ··· 393 390 __u64 ip; 394 391 }; 395 392 }; 393 + 394 + #define BPF_OBJ_NAME_LEN 16U 396 395 397 396 union bpf_attr { 398 397 struct { /* anonymous struct used by BPF_MAP_CREATE command */ ··· 2914 2909 * of sizeof(struct perf_branch_entry). 2915 2910 * 2916 2911 * **-ENOENT** if architecture does not support branch records. 2912 + * 2913 + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) 2914 + * Description 2915 + * Returns 0 on success, values for *pid* and *tgid* as seen from the current 2916 + * *namespace* will be returned in *nsdata*. 2917 + * 2918 + * On failure, the returned value is one of the following: 2919 + * 2920 + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number 2921 + * with nsfs of current task, or if dev conversion to dev_t lost high bits. 2922 + * 2923 + * **-ENOENT** if pidns does not exists for the current task. 2924 + * 2925 + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) 2926 + * Description 2927 + * Write raw *data* blob into a special BPF perf event held by 2928 + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf 2929 + * event must have the following attributes: **PERF_SAMPLE_RAW** 2930 + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and 2931 + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. 2932 + * 2933 + * The *flags* are used to indicate the index in *map* for which 2934 + * the value must be put, masked with **BPF_F_INDEX_MASK**. 2935 + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** 2936 + * to indicate that the index of the current CPU core should be 2937 + * used. 2938 + * 2939 + * The value to write, of *size*, is passed through eBPF stack and 2940 + * pointed by *data*. 2941 + * 2942 + * *ctx* is a pointer to in-kernel struct xdp_buff. 2943 + * 2944 + * This helper is similar to **bpf_perf_eventoutput**\ () but 2945 + * restricted to raw_tracepoint bpf programs. 2946 + * Return 2947 + * 0 on success, or a negative error in case of failure. 2917 2948 */ 2918 2949 #define __BPF_FUNC_MAPPER(FN) \ 2919 2950 FN(unspec), \ ··· 3071 3030 FN(tcp_send_ack), \ 3072 3031 FN(send_signal_thread), \ 3073 3032 FN(jiffies64), \ 3074 - FN(read_branch_records), 3033 + FN(read_branch_records), \ 3034 + FN(get_ns_current_pid_tgid), \ 3035 + FN(xdp_output), 3075 3036 3076 3037 /* integer value in 'imm' field of BPF_CALL instruction selects which helper 3077 3038 * function eBPF program intends to call ··· 3088 3045 /* All flags used by eBPF helper functions, placed here. */ 3089 3046 3090 3047 /* BPF_FUNC_skb_store_bytes flags. */ 3091 - #define BPF_F_RECOMPUTE_CSUM (1ULL << 0) 3092 - #define BPF_F_INVALIDATE_HASH (1ULL << 1) 3048 + enum { 3049 + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), 3050 + BPF_F_INVALIDATE_HASH = (1ULL << 1), 3051 + }; 3093 3052 3094 3053 /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. 3095 3054 * First 4 bits are for passing the header field size. 3096 3055 */ 3097 - #define BPF_F_HDR_FIELD_MASK 0xfULL 3056 + enum { 3057 + BPF_F_HDR_FIELD_MASK = 0xfULL, 3058 + }; 3098 3059 3099 3060 /* BPF_FUNC_l4_csum_replace flags. */ 3100 - #define BPF_F_PSEUDO_HDR (1ULL << 4) 3101 - #define BPF_F_MARK_MANGLED_0 (1ULL << 5) 3102 - #define BPF_F_MARK_ENFORCE (1ULL << 6) 3061 + enum { 3062 + BPF_F_PSEUDO_HDR = (1ULL << 4), 3063 + BPF_F_MARK_MANGLED_0 = (1ULL << 5), 3064 + BPF_F_MARK_ENFORCE = (1ULL << 6), 3065 + }; 3103 3066 3104 3067 /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ 3105 - #define BPF_F_INGRESS (1ULL << 0) 3068 + enum { 3069 + BPF_F_INGRESS = (1ULL << 0), 3070 + }; 3106 3071 3107 3072 /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ 3108 - #define BPF_F_TUNINFO_IPV6 (1ULL << 0) 3073 + enum { 3074 + BPF_F_TUNINFO_IPV6 = (1ULL << 0), 3075 + }; 3109 3076 3110 3077 /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ 3111 - #define BPF_F_SKIP_FIELD_MASK 0xffULL 3112 - #define BPF_F_USER_STACK (1ULL << 8) 3078 + enum { 3079 + BPF_F_SKIP_FIELD_MASK = 0xffULL, 3080 + BPF_F_USER_STACK = (1ULL << 8), 3113 3081 /* flags used by BPF_FUNC_get_stackid only. */ 3114 - #define BPF_F_FAST_STACK_CMP (1ULL << 9) 3115 - #define BPF_F_REUSE_STACKID (1ULL << 10) 3082 + BPF_F_FAST_STACK_CMP = (1ULL << 9), 3083 + BPF_F_REUSE_STACKID = (1ULL << 10), 3116 3084 /* flags used by BPF_FUNC_get_stack only. */ 3117 - #define BPF_F_USER_BUILD_ID (1ULL << 11) 3085 + BPF_F_USER_BUILD_ID = (1ULL << 11), 3086 + }; 3118 3087 3119 3088 /* BPF_FUNC_skb_set_tunnel_key flags. */ 3120 - #define BPF_F_ZERO_CSUM_TX (1ULL << 1) 3121 - #define BPF_F_DONT_FRAGMENT (1ULL << 2) 3122 - #define BPF_F_SEQ_NUMBER (1ULL << 3) 3089 + enum { 3090 + BPF_F_ZERO_CSUM_TX = (1ULL << 1), 3091 + BPF_F_DONT_FRAGMENT = (1ULL << 2), 3092 + BPF_F_SEQ_NUMBER = (1ULL << 3), 3093 + }; 3123 3094 3124 3095 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and 3125 3096 * BPF_FUNC_perf_event_read_value flags. 3126 3097 */ 3127 - #define BPF_F_INDEX_MASK 0xffffffffULL 3128 - #define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK 3098 + enum { 3099 + BPF_F_INDEX_MASK = 0xffffffffULL, 3100 + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, 3129 3101 /* BPF_FUNC_perf_event_output for sk_buff input context. */ 3130 - #define BPF_F_CTXLEN_MASK (0xfffffULL << 32) 3102 + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), 3103 + }; 3131 3104 3132 3105 /* Current network namespace */ 3133 - #define BPF_F_CURRENT_NETNS (-1L) 3106 + enum { 3107 + BPF_F_CURRENT_NETNS = (-1L), 3108 + }; 3134 3109 3135 3110 /* BPF_FUNC_skb_adjust_room flags. */ 3136 - #define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) 3111 + enum { 3112 + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), 3113 + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), 3114 + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), 3115 + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), 3116 + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), 3117 + }; 3137 3118 3138 - #define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff 3139 - #define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 3119 + enum { 3120 + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, 3121 + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, 3122 + }; 3140 3123 3141 - #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) 3142 - #define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) 3143 - #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) 3144 - #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) 3145 3124 #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ 3146 3125 BPF_ADJ_ROOM_ENCAP_L2_MASK) \ 3147 3126 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) 3148 3127 3149 3128 /* BPF_FUNC_sysctl_get_name flags. */ 3150 - #define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) 3129 + enum { 3130 + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), 3131 + }; 3151 3132 3152 3133 /* BPF_FUNC_sk_storage_get flags */ 3153 - #define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) 3134 + enum { 3135 + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), 3136 + }; 3154 3137 3155 3138 /* BPF_FUNC_read_branch_records flags. */ 3156 - #define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) 3139 + enum { 3140 + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), 3141 + }; 3157 3142 3158 3143 /* Mode for BPF_FUNC_skb_adjust_room helper. */ 3159 3144 enum bpf_adj_room_mode { ··· 3247 3176 __u32 wire_len; 3248 3177 __u32 gso_segs; 3249 3178 __bpf_md_ptr(struct bpf_sock *, sk); 3179 + __u32 gso_size; 3250 3180 }; 3251 3181 3252 3182 struct bpf_tunnel_key { ··· 3600 3528 }; 3601 3529 3602 3530 /* Definitions for bpf_sock_ops_cb_flags */ 3603 - #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) 3604 - #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) 3605 - #define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) 3606 - #define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) 3607 - #define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently 3608 - * supported cb flags 3609 - */ 3531 + enum { 3532 + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), 3533 + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), 3534 + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), 3535 + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), 3536 + /* Mask of all currently supported cb flags */ 3537 + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, 3538 + }; 3610 3539 3611 3540 /* List of known BPF sock_ops operators. 3612 3541 * New entries can only be added at the end ··· 3686 3613 BPF_TCP_MAX_STATES /* Leave at the end! */ 3687 3614 }; 3688 3615 3689 - #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ 3690 - #define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ 3616 + enum { 3617 + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ 3618 + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ 3619 + }; 3691 3620 3692 3621 struct bpf_perf_event_value { 3693 3622 __u64 counter; ··· 3697 3622 __u64 running; 3698 3623 }; 3699 3624 3700 - #define BPF_DEVCG_ACC_MKNOD (1ULL << 0) 3701 - #define BPF_DEVCG_ACC_READ (1ULL << 1) 3702 - #define BPF_DEVCG_ACC_WRITE (1ULL << 2) 3625 + enum { 3626 + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), 3627 + BPF_DEVCG_ACC_READ = (1ULL << 1), 3628 + BPF_DEVCG_ACC_WRITE = (1ULL << 2), 3629 + }; 3703 3630 3704 - #define BPF_DEVCG_DEV_BLOCK (1ULL << 0) 3705 - #define BPF_DEVCG_DEV_CHAR (1ULL << 1) 3631 + enum { 3632 + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), 3633 + BPF_DEVCG_DEV_CHAR = (1ULL << 1), 3634 + }; 3706 3635 3707 3636 struct bpf_cgroup_dev_ctx { 3708 3637 /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ ··· 3722 3643 /* DIRECT: Skip the FIB rules and go to FIB table associated with device 3723 3644 * OUTPUT: Do lookup from egress perspective; default is ingress 3724 3645 */ 3725 - #define BPF_FIB_LOOKUP_DIRECT (1U << 0) 3726 - #define BPF_FIB_LOOKUP_OUTPUT (1U << 1) 3646 + enum { 3647 + BPF_FIB_LOOKUP_DIRECT = (1U << 0), 3648 + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), 3649 + }; 3727 3650 3728 3651 enum { 3729 3652 BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ ··· 3797 3716 BPF_FD_TYPE_URETPROBE, /* filename + offset */ 3798 3717 }; 3799 3718 3800 - #define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) 3801 - #define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) 3802 - #define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) 3719 + enum { 3720 + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), 3721 + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), 3722 + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), 3723 + }; 3803 3724 3804 3725 struct bpf_flow_keys { 3805 3726 __u16 nhoff; ··· 3867 3784 __s32 retval; 3868 3785 }; 3869 3786 3787 + struct bpf_pidns_info { 3788 + __u32 pid; 3789 + __u32 tgid; 3790 + }; 3870 3791 #endif /* _UAPI__LINUX_BPF_H__ */

+222 -1

tools/lib/bpf/bpf_tracing.h

··· 49 49 50 50 #if defined(bpf_target_x86) 51 51 52 - #ifdef __KERNEL__ 52 + #if defined(__KERNEL__) || defined(__VMLINUX_H__) 53 + 53 54 #define PT_REGS_PARM1(x) ((x)->di) 54 55 #define PT_REGS_PARM2(x) ((x)->si) 55 56 #define PT_REGS_PARM3(x) ((x)->dx) ··· 61 60 #define PT_REGS_RC(x) ((x)->ax) 62 61 #define PT_REGS_SP(x) ((x)->sp) 63 62 #define PT_REGS_IP(x) ((x)->ip) 63 + 64 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) 65 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) 66 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) 67 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) 68 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) 69 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) 70 + #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) 71 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) 72 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) 73 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) 74 + 64 75 #else 76 + 65 77 #ifdef __i386__ 66 78 /* i386 kernel is built with -mregparm=3 */ 67 79 #define PT_REGS_PARM1(x) ((x)->eax) ··· 87 73 #define PT_REGS_RC(x) ((x)->eax) 88 74 #define PT_REGS_SP(x) ((x)->esp) 89 75 #define PT_REGS_IP(x) ((x)->eip) 76 + 77 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) 78 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) 79 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) 80 + #define PT_REGS_PARM4_CORE(x) 0 81 + #define PT_REGS_PARM5_CORE(x) 0 82 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) 83 + #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) 84 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) 85 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) 86 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) 87 + 90 88 #else 89 + 91 90 #define PT_REGS_PARM1(x) ((x)->rdi) 92 91 #define PT_REGS_PARM2(x) ((x)->rsi) 93 92 #define PT_REGS_PARM3(x) ((x)->rdx) ··· 111 84 #define PT_REGS_RC(x) ((x)->rax) 112 85 #define PT_REGS_SP(x) ((x)->rsp) 113 86 #define PT_REGS_IP(x) ((x)->rip) 87 + 88 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) 89 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) 90 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) 91 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) 92 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) 93 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) 94 + #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) 95 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) 96 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) 97 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) 98 + 114 99 #endif 115 100 #endif 116 101 ··· 143 104 #define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) 144 105 #define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) 145 106 107 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) 108 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) 109 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) 110 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) 111 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) 112 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), grps[14]) 113 + #define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) 114 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) 115 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) 116 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), pdw.addr) 117 + 146 118 #elif defined(bpf_target_arm) 147 119 148 120 #define PT_REGS_PARM1(x) ((x)->uregs[0]) ··· 166 116 #define PT_REGS_RC(x) ((x)->uregs[0]) 167 117 #define PT_REGS_SP(x) ((x)->uregs[13]) 168 118 #define PT_REGS_IP(x) ((x)->uregs[12]) 119 + 120 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) 121 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) 122 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) 123 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) 124 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) 125 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) 126 + #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) 127 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) 128 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) 129 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) 169 130 170 131 #elif defined(bpf_target_arm64) 171 132 ··· 195 134 #define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) 196 135 #define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) 197 136 137 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) 138 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) 139 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) 140 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) 141 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) 142 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) 143 + #define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) 144 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) 145 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) 146 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) 147 + 198 148 #elif defined(bpf_target_mips) 199 149 200 150 #define PT_REGS_PARM1(x) ((x)->regs[4]) ··· 219 147 #define PT_REGS_SP(x) ((x)->regs[29]) 220 148 #define PT_REGS_IP(x) ((x)->cp0_epc) 221 149 150 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) 151 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) 152 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) 153 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) 154 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) 155 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) 156 + #define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) 157 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1]) 158 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) 159 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) 160 + 222 161 #elif defined(bpf_target_powerpc) 223 162 224 163 #define PT_REGS_PARM1(x) ((x)->gpr[3]) ··· 240 157 #define PT_REGS_RC(x) ((x)->gpr[3]) 241 158 #define PT_REGS_SP(x) ((x)->sp) 242 159 #define PT_REGS_IP(x) ((x)->nip) 160 + 161 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) 162 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) 163 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) 164 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) 165 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) 166 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) 167 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) 168 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) 243 169 244 170 #elif defined(bpf_target_sparc) 245 171 ··· 261 169 #define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) 262 170 #define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) 263 171 172 + #define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) 173 + #define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) 174 + #define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) 175 + #define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) 176 + #define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) 177 + #define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) 178 + #define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) 179 + #define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) 180 + 264 181 /* Should this also be a bpf_target check for the sparc case? */ 265 182 #if defined(__arch64__) 266 183 #define PT_REGS_IP(x) ((x)->tpc) 184 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) 267 185 #else 268 186 #define PT_REGS_IP(x) ((x)->pc) 187 + #define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) 269 188 #endif 270 189 271 190 #endif ··· 294 191 ({ bpf_probe_read(&(ip), sizeof(ip), \ 295 192 (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) 296 193 #endif 194 + 195 + #define ___bpf_concat(a, b) a ## b 196 + #define ___bpf_apply(fn, n) ___bpf_concat(fn, n) 197 + #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N 198 + #define ___bpf_narg(...) \ 199 + ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) 200 + #define ___bpf_empty(...) \ 201 + ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) 202 + 203 + #define ___bpf_ctx_cast0() ctx 204 + #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] 205 + #define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] 206 + #define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] 207 + #define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] 208 + #define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] 209 + #define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] 210 + #define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] 211 + #define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] 212 + #define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] 213 + #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] 214 + #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] 215 + #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] 216 + #define ___bpf_ctx_cast(args...) \ 217 + ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) 218 + 219 + /* 220 + * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and 221 + * similar kinds of BPF programs, that accept input arguments as a single 222 + * pointer to untyped u64 array, where each u64 can actually be a typed 223 + * pointer or integer of different size. Instead of requring user to write 224 + * manual casts and work with array elements by index, BPF_PROG macro 225 + * allows user to declare a list of named and typed input arguments in the 226 + * same syntax as for normal C function. All the casting is hidden and 227 + * performed transparently, while user code can just assume working with 228 + * function arguments of specified type and name. 229 + * 230 + * Original raw context argument is preserved as well as 'ctx' argument. 231 + * This is useful when using BPF helpers that expect original context 232 + * as one of the parameters (e.g., for bpf_perf_event_output()). 233 + */ 234 + #define BPF_PROG(name, args...) \ 235 + name(unsigned long long *ctx); \ 236 + static __attribute__((always_inline)) typeof(name(0)) \ 237 + ____##name(unsigned long long *ctx, ##args); \ 238 + typeof(name(0)) name(unsigned long long *ctx) \ 239 + { \ 240 + _Pragma("GCC diagnostic push") \ 241 + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 242 + return ____##name(___bpf_ctx_cast(args)); \ 243 + _Pragma("GCC diagnostic pop") \ 244 + } \ 245 + static __attribute__((always_inline)) typeof(name(0)) \ 246 + ____##name(unsigned long long *ctx, ##args) 247 + 248 + struct pt_regs; 249 + 250 + #define ___bpf_kprobe_args0() ctx 251 + #define ___bpf_kprobe_args1(x) \ 252 + ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) 253 + #define ___bpf_kprobe_args2(x, args...) \ 254 + ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) 255 + #define ___bpf_kprobe_args3(x, args...) \ 256 + ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) 257 + #define ___bpf_kprobe_args4(x, args...) \ 258 + ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) 259 + #define ___bpf_kprobe_args5(x, args...) \ 260 + ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) 261 + #define ___bpf_kprobe_args(args...) \ 262 + ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) 263 + 264 + /* 265 + * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for 266 + * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific 267 + * low-level way of getting kprobe input arguments from struct pt_regs, and 268 + * provides a familiar typed and named function arguments syntax and 269 + * semantics of accessing kprobe input paremeters. 270 + * 271 + * Original struct pt_regs* context is preserved as 'ctx' argument. This might 272 + * be necessary when using BPF helpers like bpf_perf_event_output(). 273 + */ 274 + #define BPF_KPROBE(name, args...) \ 275 + name(struct pt_regs *ctx); \ 276 + static __attribute__((always_inline)) typeof(name(0)) \ 277 + ____##name(struct pt_regs *ctx, ##args); \ 278 + typeof(name(0)) name(struct pt_regs *ctx) \ 279 + { \ 280 + _Pragma("GCC diagnostic push") \ 281 + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 282 + return ____##name(___bpf_kprobe_args(args)); \ 283 + _Pragma("GCC diagnostic pop") \ 284 + } \ 285 + static __attribute__((always_inline)) typeof(name(0)) \ 286 + ____##name(struct pt_regs *ctx, ##args) 287 + 288 + #define ___bpf_kretprobe_args0() ctx 289 + #define ___bpf_kretprobe_args1(x) \ 290 + ___bpf_kretprobe_args0(), (void *)PT_REGS_RET(ctx) 291 + #define ___bpf_kretprobe_args(args...) \ 292 + ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) 293 + 294 + /* 295 + * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional 296 + * return value (in addition to `struct pt_regs *ctx`), but no input 297 + * arguments, because they will be clobbered by the time probed function 298 + * returns. 299 + */ 300 + #define BPF_KRETPROBE(name, args...) \ 301 + name(struct pt_regs *ctx); \ 302 + static __attribute__((always_inline)) typeof(name(0)) \ 303 + ____##name(struct pt_regs *ctx, ##args); \ 304 + typeof(name(0)) name(struct pt_regs *ctx) \ 305 + { \ 306 + _Pragma("GCC diagnostic push") \ 307 + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 308 + return ____##name(___bpf_kretprobe_args(args)); \ 309 + _Pragma("GCC diagnostic pop") \ 310 + } \ 311 + static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) 297 312 298 313 #endif

+5 -5

tools/lib/bpf/btf_dump.c

··· 916 916 /* enumerators share namespace with typedef idents */ 917 917 dup_cnt = btf_dump_name_dups(d, d->ident_names, name); 918 918 if (dup_cnt > 1) { 919 - btf_dump_printf(d, "\n%s%s___%zu = %d,", 919 + btf_dump_printf(d, "\n%s%s___%zu = %u,", 920 920 pfx(lvl + 1), name, dup_cnt, 921 - (__s32)v->val); 921 + (__u32)v->val); 922 922 } else { 923 - btf_dump_printf(d, "\n%s%s = %d,", 923 + btf_dump_printf(d, "\n%s%s = %u,", 924 924 pfx(lvl + 1), name, 925 - (__s32)v->val); 925 + (__u32)v->val); 926 926 } 927 927 } 928 928 btf_dump_printf(d, "\n%s}", pfx(lvl)); ··· 1030 1030 if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) 1031 1031 return -EINVAL; 1032 1032 1033 - fname = OPTS_GET(opts, field_name, NULL); 1033 + fname = OPTS_GET(opts, field_name, ""); 1034 1034 lvl = OPTS_GET(opts, indent_level, 0); 1035 1035 btf_dump_emit_type_decl(d, id, fname, lvl); 1036 1036 return 0;

+124 -32

tools/lib/bpf/libbpf.c

··· 2284 2284 } 2285 2285 } 2286 2286 2287 - static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) 2287 + static bool libbpf_needs_btf(const struct bpf_object *obj) 2288 2288 { 2289 - return obj->efile.st_ops_shndx >= 0 || obj->nr_extern > 0; 2289 + return obj->efile.btf_maps_shndx >= 0 || 2290 + obj->efile.st_ops_shndx >= 0 || 2291 + obj->nr_extern > 0; 2292 + } 2293 + 2294 + static bool kernel_needs_btf(const struct bpf_object *obj) 2295 + { 2296 + return obj->efile.st_ops_shndx >= 0; 2290 2297 } 2291 2298 2292 2299 static int bpf_object__init_btf(struct bpf_object *obj, ··· 2329 2322 } 2330 2323 } 2331 2324 out: 2332 - if (err && bpf_object__is_btf_mandatory(obj)) { 2325 + if (err && libbpf_needs_btf(obj)) { 2333 2326 pr_warn("BTF is required, but is missing or corrupted.\n"); 2334 2327 return err; 2335 2328 } ··· 2353 2346 btf_ext__free(obj->btf_ext); 2354 2347 obj->btf_ext = NULL; 2355 2348 2356 - if (bpf_object__is_btf_mandatory(obj)) { 2349 + if (libbpf_needs_btf(obj)) { 2357 2350 pr_warn("BTF is required, but is missing or corrupted.\n"); 2358 2351 return -ENOENT; 2359 2352 } ··· 2417 2410 obj->btf_ext = NULL; 2418 2411 } 2419 2412 2420 - if (bpf_object__is_btf_mandatory(obj)) 2413 + if (kernel_needs_btf(obj)) 2421 2414 return err; 2422 2415 } 2423 2416 return 0; ··· 3871 3864 t = btf__type_by_id(targ_btf, i); 3872 3865 targ_name = btf__name_by_offset(targ_btf, t->name_off); 3873 3866 if (str_is_empty(targ_name)) 3867 + continue; 3868 + 3869 + t = skip_mods_and_typedefs(targ_btf, i, NULL); 3870 + if (!btf_is_composite(t) && !btf_is_array(t)) 3874 3871 continue; 3875 3872 3876 3873 targ_essent_len = bpf_core_essential_name_len(targ_name); ··· 6299 6288 .expected_attach_type = BPF_TRACE_FENTRY, 6300 6289 .is_attach_btf = true, 6301 6290 .attach_fn = attach_trace), 6291 + SEC_DEF("fmod_ret/", TRACING, 6292 + .expected_attach_type = BPF_MODIFY_RETURN, 6293 + .is_attach_btf = true, 6294 + .attach_fn = attach_trace), 6302 6295 SEC_DEF("fexit/", TRACING, 6303 6296 .expected_attach_type = BPF_TRACE_FEXIT, 6304 6297 .is_attach_btf = true, ··· 6946 6931 struct bpf_link { 6947 6932 int (*detach)(struct bpf_link *link); 6948 6933 int (*destroy)(struct bpf_link *link); 6934 + char *pin_path; /* NULL, if not pinned */ 6935 + int fd; /* hook FD, -1 if not applicable */ 6949 6936 bool disconnected; 6950 6937 }; 6951 6938 ··· 6977 6960 err = link->detach(link); 6978 6961 if (link->destroy) 6979 6962 link->destroy(link); 6963 + if (link->pin_path) 6964 + free(link->pin_path); 6980 6965 free(link); 6981 6966 6982 6967 return err; 6983 6968 } 6984 6969 6985 - struct bpf_link_fd { 6986 - struct bpf_link link; /* has to be at the top of struct */ 6987 - int fd; /* hook FD */ 6988 - }; 6970 + int bpf_link__fd(const struct bpf_link *link) 6971 + { 6972 + return link->fd; 6973 + } 6974 + 6975 + const char *bpf_link__pin_path(const struct bpf_link *link) 6976 + { 6977 + return link->pin_path; 6978 + } 6979 + 6980 + static int bpf_link__detach_fd(struct bpf_link *link) 6981 + { 6982 + return close(link->fd); 6983 + } 6984 + 6985 + struct bpf_link *bpf_link__open(const char *path) 6986 + { 6987 + struct bpf_link *link; 6988 + int fd; 6989 + 6990 + fd = bpf_obj_get(path); 6991 + if (fd < 0) { 6992 + fd = -errno; 6993 + pr_warn("failed to open link at %s: %d\n", path, fd); 6994 + return ERR_PTR(fd); 6995 + } 6996 + 6997 + link = calloc(1, sizeof(*link)); 6998 + if (!link) { 6999 + close(fd); 7000 + return ERR_PTR(-ENOMEM); 7001 + } 7002 + link->detach = &bpf_link__detach_fd; 7003 + link->fd = fd; 7004 + 7005 + link->pin_path = strdup(path); 7006 + if (!link->pin_path) { 7007 + bpf_link__destroy(link); 7008 + return ERR_PTR(-ENOMEM); 7009 + } 7010 + 7011 + return link; 7012 + } 7013 + 7014 + int bpf_link__pin(struct bpf_link *link, const char *path) 7015 + { 7016 + int err; 7017 + 7018 + if (link->pin_path) 7019 + return -EBUSY; 7020 + err = make_parent_dir(path); 7021 + if (err) 7022 + return err; 7023 + err = check_path(path); 7024 + if (err) 7025 + return err; 7026 + 7027 + link->pin_path = strdup(path); 7028 + if (!link->pin_path) 7029 + return -ENOMEM; 7030 + 7031 + if (bpf_obj_pin(link->fd, link->pin_path)) { 7032 + err = -errno; 7033 + zfree(&link->pin_path); 7034 + return err; 7035 + } 7036 + 7037 + pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path); 7038 + return 0; 7039 + } 7040 + 7041 + int bpf_link__unpin(struct bpf_link *link) 7042 + { 7043 + int err; 7044 + 7045 + if (!link->pin_path) 7046 + return -EINVAL; 7047 + 7048 + err = unlink(link->pin_path); 7049 + if (err != 0) 7050 + return -errno; 7051 + 7052 + pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path); 7053 + zfree(&link->pin_path); 7054 + return 0; 7055 + } 6989 7056 6990 7057 static int bpf_link__detach_perf_event(struct bpf_link *link) 6991 7058 { 6992 - struct bpf_link_fd *l = (void *)link; 6993 7059 int err; 6994 7060 6995 - err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0); 7061 + err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0); 6996 7062 if (err) 6997 7063 err = -errno; 6998 7064 6999 - close(l->fd); 7065 + close(link->fd); 7000 7066 return err; 7001 7067 } 7002 7068 ··· 7087 6987 int pfd) 7088 6988 { 7089 6989 char errmsg[STRERR_BUFSIZE]; 7090 - struct bpf_link_fd *link; 6990 + struct bpf_link *link; 7091 6991 int prog_fd, err; 7092 6992 7093 6993 if (pfd < 0) { ··· 7105 7005 link = calloc(1, sizeof(*link)); 7106 7006 if (!link) 7107 7007 return ERR_PTR(-ENOMEM); 7108 - link->link.detach = &bpf_link__detach_perf_event; 7008 + link->detach = &bpf_link__detach_perf_event; 7109 7009 link->fd = pfd; 7110 7010 7111 7011 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { ··· 7124 7024 libbpf_strerror_r(err, errmsg, sizeof(errmsg))); 7125 7025 return ERR_PTR(err); 7126 7026 } 7127 - return (struct bpf_link *)link; 7027 + return link; 7128 7028 } 7129 7029 7130 7030 /* ··· 7412 7312 return link; 7413 7313 } 7414 7314 7415 - static int bpf_link__detach_fd(struct bpf_link *link) 7416 - { 7417 - struct bpf_link_fd *l = (void *)link; 7418 - 7419 - return close(l->fd); 7420 - } 7421 - 7422 7315 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, 7423 7316 const char *tp_name) 7424 7317 { 7425 7318 char errmsg[STRERR_BUFSIZE]; 7426 - struct bpf_link_fd *link; 7319 + struct bpf_link *link; 7427 7320 int prog_fd, pfd; 7428 7321 7429 7322 prog_fd = bpf_program__fd(prog); ··· 7429 7336 link = calloc(1, sizeof(*link)); 7430 7337 if (!link) 7431 7338 return ERR_PTR(-ENOMEM); 7432 - link->link.detach = &bpf_link__detach_fd; 7339 + link->detach = &bpf_link__detach_fd; 7433 7340 7434 7341 pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); 7435 7342 if (pfd < 0) { ··· 7441 7348 return ERR_PTR(pfd); 7442 7349 } 7443 7350 link->fd = pfd; 7444 - return (struct bpf_link *)link; 7351 + return link; 7445 7352 } 7446 7353 7447 7354 static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, ··· 7455 7362 struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) 7456 7363 { 7457 7364 char errmsg[STRERR_BUFSIZE]; 7458 - struct bpf_link_fd *link; 7365 + struct bpf_link *link; 7459 7366 int prog_fd, pfd; 7460 7367 7461 7368 prog_fd = bpf_program__fd(prog); ··· 7468 7375 link = calloc(1, sizeof(*link)); 7469 7376 if (!link) 7470 7377 return ERR_PTR(-ENOMEM); 7471 - link->link.detach = &bpf_link__detach_fd; 7378 + link->detach = &bpf_link__detach_fd; 7472 7379 7473 7380 pfd = bpf_raw_tracepoint_open(NULL, prog_fd); 7474 7381 if (pfd < 0) { ··· 7502 7409 7503 7410 static int bpf_link__detach_struct_ops(struct bpf_link *link) 7504 7411 { 7505 - struct bpf_link_fd *l = (void *)link; 7506 7412 __u32 zero = 0; 7507 7413 7508 - if (bpf_map_delete_elem(l->fd, &zero)) 7414 + if (bpf_map_delete_elem(link->fd, &zero)) 7509 7415 return -errno; 7510 7416 7511 7417 return 0; ··· 7513 7421 struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map) 7514 7422 { 7515 7423 struct bpf_struct_ops *st_ops; 7516 - struct bpf_link_fd *link; 7424 + struct bpf_link *link; 7517 7425 __u32 i, zero = 0; 7518 7426 int err; 7519 7427 ··· 7545 7453 return ERR_PTR(err); 7546 7454 } 7547 7455 7548 - link->link.detach = bpf_link__detach_struct_ops; 7456 + link->detach = bpf_link__detach_struct_ops; 7549 7457 link->fd = map->fd; 7550 7458 7551 - return (struct bpf_link *)link; 7459 + return link; 7552 7460 } 7553 7461 7554 7462 enum bpf_perf_event_ret

+5

tools/lib/bpf/libbpf.h

··· 219 219 220 220 struct bpf_link; 221 221 222 + LIBBPF_API struct bpf_link *bpf_link__open(const char *path); 223 + LIBBPF_API int bpf_link__fd(const struct bpf_link *link); 224 + LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); 225 + LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); 226 + LIBBPF_API int bpf_link__unpin(struct bpf_link *link); 222 227 LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); 223 228 LIBBPF_API int bpf_link__destroy(struct bpf_link *link); 224 229

+5

tools/lib/bpf/libbpf.map

··· 238 238 239 239 LIBBPF_0.0.8 { 240 240 global: 241 + bpf_link__fd; 242 + bpf_link__open; 243 + bpf_link__pin; 244 + bpf_link__pin_path; 245 + bpf_link__unpin; 241 246 bpf_program__set_attach_target; 242 247 } LIBBPF_0.0.7;

+1

tools/scripts/Makefile.include

··· 106 106 ifneq ($(V),1) 107 107 QUIET_CC = @echo ' CC '$@; 108 108 QUIET_CC_FPIC = @echo ' CC FPIC '$@; 109 + QUIET_CLANG = @echo ' CLANG '$@; 109 110 QUIET_AR = @echo ' AR '$@; 110 111 QUIET_LINK = @echo ' LINK '$@; 111 112 QUIET_MKDIR = @echo ' MKDIR '$@;

+1

tools/testing/selftests/bpf/.gitignore

··· 31 31 test_sysctl 32 32 test_hashmap 33 33 test_btf_dump 34 + test_current_pid_tgid_new_ns 34 35 xdping 35 36 test_cpp 36 37 *.skel.h

+18 -8

tools/testing/selftests/bpf/Makefile

··· 20 20 LLC ?= llc 21 21 LLVM_OBJCOPY ?= llvm-objcopy 22 22 BPF_GCC ?= $(shell command -v bpf-gcc;) 23 - CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \ 23 + CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR) \ 24 24 -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \ 25 + -I$(APIDIR) \ 25 26 -Dbpf_prog_load=bpf_prog_test_load \ 26 27 -Dbpf_load_program=bpf_test_load_program 27 28 LDLIBS += -lcap -lelf -lz -lrt -lpthread ··· 33 32 test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ 34 33 test_cgroup_storage \ 35 34 test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ 36 - test_progs-no_alu32 35 + test_progs-no_alu32 \ 36 + test_current_pid_tgid_new_ns 37 37 38 38 # Also test bpf-gcc, if present 39 39 ifneq ($(BPF_GCC),) ··· 131 129 $(call msg,CC,,$@) 132 130 $(CC) -c $(CFLAGS) -o $@ $< 133 131 134 - VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \ 135 - /sys/kernel/btf/vmlinux \ 136 - /boot/vmlinux-$(shell uname -r) 137 - VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS))) 132 + VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ 133 + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ 134 + ../../../../vmlinux \ 135 + /sys/kernel/btf/vmlinux \ 136 + /boot/vmlinux-$(shell uname -r) 137 + VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) 138 + 138 139 $(OUTPUT)/runqslower: $(BPFOBJ) 139 140 $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ 140 141 OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \ ··· 177 172 $(call msg,MKDIR,,$@) 178 173 mkdir -p $@ 179 174 175 + $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) 176 + $(call msg,GEN,,$@) 177 + $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ 178 + 180 179 # Get Clang's default includes on this system, as opposed to those seen by 181 180 # '-target bpf'. This fixes "missing" files on some architectures/distros, 182 181 # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. ··· 199 190 200 191 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) 201 192 BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \ 202 - -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \ 203 - -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include) 193 + -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \ 194 + -I$(abspath $(OUTPUT)/../usr/include) 204 195 205 196 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ 206 197 -Wno-compare-distinct-pointer-types ··· 289 280 $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ 290 281 $(TRUNNER_BPF_PROGS_DIR)/%.c \ 291 282 $(TRUNNER_BPF_PROGS_DIR)/*.h \ 283 + $$(INCLUDE_DIR)/vmlinux.h \ 292 284 $$(BPFOBJ) | $(TRUNNER_OUTPUT) 293 285 $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ 294 286 $(TRUNNER_BPF_CFLAGS), \

+1 -1

tools/testing/selftests/bpf/bpf_tcp_helpers.h

··· 6 6 #include <linux/types.h> 7 7 #include <bpf/bpf_helpers.h> 8 8 #include <bpf/bpf_core_read.h> 9 - #include "bpf_trace_helpers.h" 9 + #include <bpf/bpf_tracing.h> 10 10 11 11 #define BPF_STRUCT_OPS(name, args...) \ 12 12 SEC("struct_ops/"#name) \

-120

tools/testing/selftests/bpf/bpf_trace_helpers.h

··· 1 - /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 - #ifndef __BPF_TRACE_HELPERS_H 3 - #define __BPF_TRACE_HELPERS_H 4 - 5 - #include <bpf/bpf_helpers.h> 6 - 7 - #define ___bpf_concat(a, b) a ## b 8 - #define ___bpf_apply(fn, n) ___bpf_concat(fn, n) 9 - #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N 10 - #define ___bpf_narg(...) \ 11 - ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) 12 - #define ___bpf_empty(...) \ 13 - ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) 14 - 15 - #define ___bpf_ctx_cast0() ctx 16 - #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] 17 - #define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] 18 - #define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] 19 - #define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] 20 - #define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] 21 - #define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] 22 - #define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] 23 - #define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] 24 - #define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] 25 - #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9] 26 - #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10] 27 - #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] 28 - #define ___bpf_ctx_cast(args...) \ 29 - ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) 30 - 31 - /* 32 - * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and 33 - * similar kinds of BPF programs, that accept input arguments as a single 34 - * pointer to untyped u64 array, where each u64 can actually be a typed 35 - * pointer or integer of different size. Instead of requring user to write 36 - * manual casts and work with array elements by index, BPF_PROG macro 37 - * allows user to declare a list of named and typed input arguments in the 38 - * same syntax as for normal C function. All the casting is hidden and 39 - * performed transparently, while user code can just assume working with 40 - * function arguments of specified type and name. 41 - * 42 - * Original raw context argument is preserved as well as 'ctx' argument. 43 - * This is useful when using BPF helpers that expect original context 44 - * as one of the parameters (e.g., for bpf_perf_event_output()). 45 - */ 46 - #define BPF_PROG(name, args...) \ 47 - name(unsigned long long *ctx); \ 48 - static __always_inline typeof(name(0)) \ 49 - ____##name(unsigned long long *ctx, ##args); \ 50 - typeof(name(0)) name(unsigned long long *ctx) \ 51 - { \ 52 - _Pragma("GCC diagnostic push") \ 53 - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 54 - return ____##name(___bpf_ctx_cast(args)); \ 55 - _Pragma("GCC diagnostic pop") \ 56 - } \ 57 - static __always_inline typeof(name(0)) \ 58 - ____##name(unsigned long long *ctx, ##args) 59 - 60 - struct pt_regs; 61 - 62 - #define ___bpf_kprobe_args0() ctx 63 - #define ___bpf_kprobe_args1(x) \ 64 - ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) 65 - #define ___bpf_kprobe_args2(x, args...) \ 66 - ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) 67 - #define ___bpf_kprobe_args3(x, args...) \ 68 - ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) 69 - #define ___bpf_kprobe_args4(x, args...) \ 70 - ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) 71 - #define ___bpf_kprobe_args5(x, args...) \ 72 - ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) 73 - #define ___bpf_kprobe_args(args...) \ 74 - ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) 75 - 76 - /* 77 - * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for 78 - * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific 79 - * low-level way of getting kprobe input arguments from struct pt_regs, and 80 - * provides a familiar typed and named function arguments syntax and 81 - * semantics of accessing kprobe input paremeters. 82 - * 83 - * Original struct pt_regs* context is preserved as 'ctx' argument. This might 84 - * be necessary when using BPF helpers like bpf_perf_event_output(). 85 - */ 86 - #define BPF_KPROBE(name, args...) \ 87 - name(struct pt_regs *ctx); \ 88 - static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ 89 - typeof(name(0)) name(struct pt_regs *ctx) \ 90 - { \ 91 - _Pragma("GCC diagnostic push") \ 92 - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 93 - return ____##name(___bpf_kprobe_args(args)); \ 94 - _Pragma("GCC diagnostic pop") \ 95 - } \ 96 - static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) 97 - 98 - #define ___bpf_kretprobe_args0() ctx 99 - #define ___bpf_kretprobe_argsN(x, args...) \ 100 - ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx) 101 - #define ___bpf_kretprobe_args(args...) \ 102 - ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args) 103 - 104 - /* 105 - * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all 106 - * input kprobe arguments, one last extra argument has to be specified, which 107 - * captures kprobe return value. 108 - */ 109 - #define BPF_KRETPROBE(name, args...) \ 110 - name(struct pt_regs *ctx); \ 111 - static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\ 112 - typeof(name(0)) name(struct pt_regs *ctx) \ 113 - { \ 114 - _Pragma("GCC diagnostic push") \ 115 - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ 116 - return ____##name(___bpf_kretprobe_args(args)); \ 117 - _Pragma("GCC diagnostic pop") \ 118 - } \ 119 - static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args) 120 - #endif

tools/testing/selftests/bpf/include/uapi/linux/types.h tools/include/uapi/linux/types.h

+1 -1

tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c

··· 6 6 7 7 #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" 8 8 9 - char bpf_log_buf[BPF_LOG_BUF_SIZE]; 9 + static char bpf_log_buf[BPF_LOG_BUF_SIZE]; 10 10 11 11 static int prog_load(void) 12 12 {

+1 -1

tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c

··· 6 6 7 7 #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" 8 8 9 - char bpf_log_buf[BPF_LOG_BUF_SIZE]; 9 + static char bpf_log_buf[BPF_LOG_BUF_SIZE]; 10 10 11 11 static int map_fd = -1; 12 12

+1 -1

tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c

··· 8 8 #define BAR "/foo/bar/" 9 9 #define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" 10 10 11 - char bpf_log_buf[BPF_LOG_BUF_SIZE]; 11 + static char bpf_log_buf[BPF_LOG_BUF_SIZE]; 12 12 13 13 static int prog_load(int verdict) 14 14 {

+3 -9

tools/testing/selftests/bpf/prog_tests/fentry_fexit.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <test_progs.h> 4 - #include "test_pkt_access.skel.h" 5 4 #include "fentry_test.skel.h" 6 5 #include "fexit_test.skel.h" 7 6 8 7 void test_fentry_fexit(void) 9 8 { 10 - struct test_pkt_access *pkt_skel = NULL; 11 9 struct fentry_test *fentry_skel = NULL; 12 10 struct fexit_test *fexit_skel = NULL; 13 11 __u64 *fentry_res, *fexit_res; 14 12 __u32 duration = 0, retval; 15 - int err, pkt_fd, i; 13 + int err, prog_fd, i; 16 14 17 - pkt_skel = test_pkt_access__open_and_load(); 18 - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) 19 - return; 20 15 fentry_skel = fentry_test__open_and_load(); 21 16 if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) 22 17 goto close_prog; ··· 26 31 if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) 27 32 goto close_prog; 28 33 29 - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); 30 - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 34 + prog_fd = bpf_program__fd(fexit_skel->progs.test1); 35 + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, 31 36 NULL, NULL, &retval, &duration); 32 37 CHECK(err || retval, "ipv6", 33 38 "err %d errno %d retval %d duration %d\n", ··· 44 49 } 45 50 46 51 close_prog: 47 - test_pkt_access__destroy(pkt_skel); 48 52 fentry_test__destroy(fentry_skel); 49 53 fexit_test__destroy(fexit_skel); 50 54 }

+4 -10

tools/testing/selftests/bpf/prog_tests/fentry_test.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <test_progs.h> 4 - #include "test_pkt_access.skel.h" 5 4 #include "fentry_test.skel.h" 6 5 7 6 void test_fentry_test(void) 8 7 { 9 - struct test_pkt_access *pkt_skel = NULL; 10 8 struct fentry_test *fentry_skel = NULL; 11 - int err, pkt_fd, i; 9 + int err, prog_fd, i; 12 10 __u32 duration = 0, retval; 13 11 __u64 *result; 14 12 15 - pkt_skel = test_pkt_access__open_and_load(); 16 - if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) 17 - return; 18 13 fentry_skel = fentry_test__open_and_load(); 19 14 if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) 20 15 goto cleanup; ··· 18 23 if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) 19 24 goto cleanup; 20 25 21 - pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); 22 - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 26 + prog_fd = bpf_program__fd(fentry_skel->progs.test1); 27 + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, 23 28 NULL, NULL, &retval, &duration); 24 - CHECK(err || retval, "ipv6", 29 + CHECK(err || retval, "test_run", 25 30 "err %d errno %d retval %d duration %d\n", 26 31 err, errno, retval, duration); 27 32 ··· 34 39 35 40 cleanup: 36 41 fentry_test__destroy(fentry_skel); 37 - test_pkt_access__destroy(pkt_skel); 38 42 }

+21 -48

tools/testing/selftests/bpf/prog_tests/fexit_test.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <test_progs.h> 4 + #include "fexit_test.skel.h" 4 5 5 6 void test_fexit_test(void) 6 7 { 7 - struct bpf_prog_load_attr attr = { 8 - .file = "./fexit_test.o", 9 - }; 10 - 11 - char prog_name[] = "fexit/bpf_fentry_testX"; 12 - struct bpf_object *obj = NULL, *pkt_obj; 13 - int err, pkt_fd, kfree_skb_fd, i; 14 - struct bpf_link *link[6] = {}; 15 - struct bpf_program *prog[6]; 8 + struct fexit_test *fexit_skel = NULL; 9 + int err, prog_fd, i; 16 10 __u32 duration = 0, retval; 17 - struct bpf_map *data_map; 18 - const int zero = 0; 19 - u64 result[6]; 11 + __u64 *result; 20 12 21 - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 22 - &pkt_obj, &pkt_fd); 23 - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 24 - return; 25 - err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); 26 - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 27 - goto close_prog; 13 + fexit_skel = fexit_test__open_and_load(); 14 + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) 15 + goto cleanup; 28 16 29 - for (i = 0; i < 6; i++) { 30 - prog_name[sizeof(prog_name) - 2] = '1' + i; 31 - prog[i] = bpf_object__find_program_by_title(obj, prog_name); 32 - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) 33 - goto close_prog; 34 - link[i] = bpf_program__attach_trace(prog[i]); 35 - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 36 - goto close_prog; 37 - } 38 - data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); 39 - if (CHECK(!data_map, "find_data_map", "data map not found\n")) 40 - goto close_prog; 17 + err = fexit_test__attach(fexit_skel); 18 + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) 19 + goto cleanup; 41 20 42 - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 21 + prog_fd = bpf_program__fd(fexit_skel->progs.test1); 22 + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, 43 23 NULL, NULL, &retval, &duration); 44 - CHECK(err || retval, "ipv6", 24 + CHECK(err || retval, "test_run", 45 25 "err %d errno %d retval %d duration %d\n", 46 26 err, errno, retval, duration); 47 27 48 - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); 49 - if (CHECK(err, "get_result", 50 - "failed to get output data: %d\n", err)) 51 - goto close_prog; 28 + result = (__u64 *)fexit_skel->bss; 29 + for (i = 0; i < 6; i++) { 30 + if (CHECK(result[i] != 1, "result", 31 + "fexit_test%d failed err %lld\n", i + 1, result[i])) 32 + goto cleanup; 33 + } 52 34 53 - for (i = 0; i < 6; i++) 54 - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", 55 - i + 1, result[i])) 56 - goto close_prog; 57 - 58 - close_prog: 59 - for (i = 0; i < 6; i++) 60 - if (!IS_ERR_OR_NULL(link[i])) 61 - bpf_link__destroy(link[i]); 62 - bpf_object__close(obj); 63 - bpf_object__close(pkt_obj); 35 + cleanup: 36 + fexit_test__destroy(fexit_skel); 64 37 }

+105

tools/testing/selftests/bpf/prog_tests/link_pinning.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + 4 + #include <test_progs.h> 5 + #include <sys/stat.h> 6 + 7 + #include "test_link_pinning.skel.h" 8 + 9 + static int duration = 0; 10 + 11 + void test_link_pinning_subtest(struct bpf_program *prog, 12 + struct test_link_pinning__bss *bss) 13 + { 14 + const char *link_pin_path = "/sys/fs/bpf/pinned_link_test"; 15 + struct stat statbuf = {}; 16 + struct bpf_link *link; 17 + int err, i; 18 + 19 + link = bpf_program__attach(prog); 20 + if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link))) 21 + goto cleanup; 22 + 23 + bss->in = 1; 24 + usleep(1); 25 + CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out); 26 + 27 + /* pin link */ 28 + err = bpf_link__pin(link, link_pin_path); 29 + if (CHECK(err, "link_pin", "err: %d\n", err)) 30 + goto cleanup; 31 + 32 + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1", 33 + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); 34 + 35 + /* check that link was pinned */ 36 + err = stat(link_pin_path, &statbuf); 37 + if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno)) 38 + goto cleanup; 39 + 40 + bss->in = 2; 41 + usleep(1); 42 + CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out); 43 + 44 + /* destroy link, pinned link should keep program attached */ 45 + bpf_link__destroy(link); 46 + link = NULL; 47 + 48 + bss->in = 3; 49 + usleep(1); 50 + CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out); 51 + 52 + /* re-open link from BPFFS */ 53 + link = bpf_link__open(link_pin_path); 54 + if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link))) 55 + goto cleanup; 56 + 57 + CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2", 58 + "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link)); 59 + 60 + /* unpin link from BPFFS, program still attached */ 61 + err = bpf_link__unpin(link); 62 + if (CHECK(err, "link_unpin", "err: %d\n", err)) 63 + goto cleanup; 64 + 65 + /* still active, as we have FD open now */ 66 + bss->in = 4; 67 + usleep(1); 68 + CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out); 69 + 70 + bpf_link__destroy(link); 71 + link = NULL; 72 + 73 + /* Validate it's finally detached. 74 + * Actual detachment might get delayed a bit, so there is no reliable 75 + * way to validate it immediately here, let's count up for long enough 76 + * and see if eventually output stops being updated 77 + */ 78 + for (i = 5; i < 10000; i++) { 79 + bss->in = i; 80 + usleep(1); 81 + if (bss->out == i - 1) 82 + break; 83 + } 84 + CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i); 85 + 86 + cleanup: 87 + if (!IS_ERR(link)) 88 + bpf_link__destroy(link); 89 + } 90 + 91 + void test_link_pinning(void) 92 + { 93 + struct test_link_pinning* skel; 94 + 95 + skel = test_link_pinning__open_and_load(); 96 + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) 97 + return; 98 + 99 + if (test__start_subtest("pin_raw_tp")) 100 + test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss); 101 + if (test__start_subtest("pin_tp_btf")) 102 + test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss); 103 + 104 + test_link_pinning__destroy(skel); 105 + }

+65

tools/testing/selftests/bpf/prog_tests/modify_return.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Copyright 2020 Google LLC. 5 + */ 6 + 7 + #include <test_progs.h> 8 + #include "modify_return.skel.h" 9 + 10 + #define LOWER(x) ((x) & 0xffff) 11 + #define UPPER(x) ((x) >> 16) 12 + 13 + 14 + static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) 15 + { 16 + struct modify_return *skel = NULL; 17 + int err, prog_fd; 18 + __u32 duration = 0, retval; 19 + __u16 side_effect; 20 + __s16 ret; 21 + 22 + skel = modify_return__open_and_load(); 23 + if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n")) 24 + goto cleanup; 25 + 26 + err = modify_return__attach(skel); 27 + if (CHECK(err, "modify_return", "attach failed: %d\n", err)) 28 + goto cleanup; 29 + 30 + skel->bss->input_retval = input_retval; 31 + prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); 32 + err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0, 33 + &retval, &duration); 34 + 35 + CHECK(err, "test_run", "err %d errno %d\n", err, errno); 36 + 37 + side_effect = UPPER(retval); 38 + ret = LOWER(retval); 39 + 40 + CHECK(ret != want_ret, "test_run", 41 + "unexpected ret: %d, expected: %d\n", ret, want_ret); 42 + CHECK(side_effect != want_side_effect, "modify_return", 43 + "unexpected side_effect: %d\n", side_effect); 44 + 45 + CHECK(skel->bss->fentry_result != 1, "modify_return", 46 + "fentry failed\n"); 47 + CHECK(skel->bss->fexit_result != 1, "modify_return", 48 + "fexit failed\n"); 49 + CHECK(skel->bss->fmod_ret_result != 1, "modify_return", 50 + "fmod_ret failed\n"); 51 + 52 + cleanup: 53 + modify_return__destroy(skel); 54 + } 55 + 56 + void test_modify_return(void) 57 + { 58 + run_test(0 /* input_retval */, 59 + 1 /* want_side_effect */, 60 + 4 /* want_ret */); 61 + run_test(-EINVAL /* input_retval */, 62 + 0 /* want_side_effect */, 63 + -EINVAL /* want_ret */); 64 + } 65 +

+88

tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ 3 + #include <test_progs.h> 4 + #include <sys/stat.h> 5 + #include <sys/types.h> 6 + #include <unistd.h> 7 + #include <sys/syscall.h> 8 + 9 + struct bss { 10 + __u64 dev; 11 + __u64 ino; 12 + __u64 pid_tgid; 13 + __u64 user_pid_tgid; 14 + }; 15 + 16 + void test_ns_current_pid_tgid(void) 17 + { 18 + const char *probe_name = "raw_tracepoint/sys_enter"; 19 + const char *file = "test_ns_current_pid_tgid.o"; 20 + int err, key = 0, duration = 0; 21 + struct bpf_link *link = NULL; 22 + struct bpf_program *prog; 23 + struct bpf_map *bss_map; 24 + struct bpf_object *obj; 25 + struct bss bss; 26 + struct stat st; 27 + __u64 id; 28 + 29 + obj = bpf_object__open_file(file, NULL); 30 + if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) 31 + return; 32 + 33 + err = bpf_object__load(obj); 34 + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) 35 + goto cleanup; 36 + 37 + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); 38 + if (CHECK(!bss_map, "find_bss_map", "failed\n")) 39 + goto cleanup; 40 + 41 + prog = bpf_object__find_program_by_title(obj, probe_name); 42 + if (CHECK(!prog, "find_prog", "prog '%s' not found\n", 43 + probe_name)) 44 + goto cleanup; 45 + 46 + memset(&bss, 0, sizeof(bss)); 47 + pid_t tid = syscall(SYS_gettid); 48 + pid_t pid = getpid(); 49 + 50 + id = (__u64) tid << 32 | pid; 51 + bss.user_pid_tgid = id; 52 + 53 + if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) { 54 + perror("Failed to stat /proc/self/ns/pid"); 55 + goto cleanup; 56 + } 57 + 58 + bss.dev = st.st_dev; 59 + bss.ino = st.st_ino; 60 + 61 + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); 62 + if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err)) 63 + goto cleanup; 64 + 65 + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); 66 + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", 67 + PTR_ERR(link))) { 68 + link = NULL; 69 + goto cleanup; 70 + } 71 + 72 + /* trigger some syscalls */ 73 + usleep(1); 74 + 75 + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); 76 + if (CHECK(err, "set_bss", "failed to get bss : %d\n", err)) 77 + goto cleanup; 78 + 79 + if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", 80 + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) 81 + goto cleanup; 82 + cleanup: 83 + if (!link) { 84 + bpf_link__destroy(link); 85 + link = NULL; 86 + } 87 + bpf_object__close(obj); 88 + }

-6

tools/testing/selftests/bpf/prog_tests/select_reuseport.c

··· 805 805 char s[MAX_TEST_NAME]; 806 806 const struct test *t; 807 807 808 - /* SOCKMAP/SOCKHASH don't support UDP yet */ 809 - if (sotype == SOCK_DGRAM && 810 - (inner_map_type == BPF_MAP_TYPE_SOCKMAP || 811 - inner_map_type == BPF_MAP_TYPE_SOCKHASH)) 812 - return; 813 - 814 808 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 815 809 if (t->need_sotype && t->need_sotype != sotype) 816 810 continue; /* test not compatible with socket type */

+1

tools/testing/selftests/bpf/prog_tests/skb_ctx.c

··· 14 14 .wire_len = 100, 15 15 .gso_segs = 8, 16 16 .mark = 9, 17 + .gso_size = 10, 17 18 }; 18 19 struct bpf_prog_test_run_attr tattr = { 19 20 .data_in = &pkt_v4,

+199 -60

tools/testing/selftests/bpf/prog_tests/sockmap_listen.c

··· 16 16 #include <pthread.h> 17 17 #include <stdlib.h> 18 18 #include <string.h> 19 + #include <sys/select.h> 19 20 #include <unistd.h> 20 21 21 22 #include <bpf/bpf.h> ··· 26 25 #include "test_progs.h" 27 26 #include "test_sockmap_listen.skel.h" 28 27 28 + #define IO_TIMEOUT_SEC 30 29 29 #define MAX_STRERR_LEN 256 30 30 #define MAX_TEST_NAME 80 31 31 ··· 46 44 47 45 /* Wrappers that fail the test on error and report it. */ 48 46 49 - #define xaccept(fd, addr, len) \ 47 + #define xaccept_nonblock(fd, addr, len) \ 50 48 ({ \ 51 - int __ret = accept((fd), (addr), (len)); \ 49 + int __ret = \ 50 + accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \ 52 51 if (__ret == -1) \ 53 52 FAIL_ERRNO("accept"); \ 54 53 __ret; \ ··· 108 105 int __ret = setsockopt((fd), (level), (name), (val), (len)); \ 109 106 if (__ret == -1) \ 110 107 FAIL_ERRNO("setsockopt(" #name ")"); \ 108 + __ret; \ 109 + }) 110 + 111 + #define xsend(fd, buf, len, flags) \ 112 + ({ \ 113 + ssize_t __ret = send((fd), (buf), (len), (flags)); \ 114 + if (__ret == -1) \ 115 + FAIL_ERRNO("send"); \ 116 + __ret; \ 117 + }) 118 + 119 + #define xrecv_nonblock(fd, buf, len, flags) \ 120 + ({ \ 121 + ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \ 122 + IO_TIMEOUT_SEC); \ 123 + if (__ret == -1) \ 124 + FAIL_ERRNO("recv"); \ 111 125 __ret; \ 112 126 }) 113 127 ··· 195 175 __ret; \ 196 176 }) 197 177 178 + static int poll_read(int fd, unsigned int timeout_sec) 179 + { 180 + struct timeval timeout = { .tv_sec = timeout_sec }; 181 + fd_set rfds; 182 + int r; 183 + 184 + FD_ZERO(&rfds); 185 + FD_SET(fd, &rfds); 186 + 187 + r = select(fd + 1, &rfds, NULL, NULL, &timeout); 188 + if (r == 0) 189 + errno = ETIME; 190 + 191 + return r == 1 ? 0 : -1; 192 + } 193 + 194 + static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len, 195 + unsigned int timeout_sec) 196 + { 197 + if (poll_read(fd, timeout_sec)) 198 + return -1; 199 + 200 + return accept(fd, addr, len); 201 + } 202 + 203 + static int recv_timeout(int fd, void *buf, size_t len, int flags, 204 + unsigned int timeout_sec) 205 + { 206 + if (poll_read(fd, timeout_sec)) 207 + return -1; 208 + 209 + return recv(fd, buf, len, flags); 210 + } 211 + 198 212 static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len) 199 213 { 200 214 struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss)); ··· 284 230 return 0; 285 231 } 286 232 287 - static int listen_loopback_reuseport(int family, int sotype, int progfd) 233 + static int socket_loopback_reuseport(int family, int sotype, int progfd) 288 234 { 289 235 struct sockaddr_storage addr; 290 236 socklen_t len; ··· 303 249 if (err) 304 250 goto close; 305 251 252 + if (sotype & SOCK_DGRAM) 253 + return s; 254 + 306 255 err = xlisten(s, SOMAXCONN); 307 256 if (err) 308 257 goto close; ··· 316 259 return -1; 317 260 } 318 261 319 - static int listen_loopback(int family, int sotype) 262 + static int socket_loopback(int family, int sotype) 320 263 { 321 - return listen_loopback_reuseport(family, sotype, -1); 264 + return socket_loopback_reuseport(family, sotype, -1); 322 265 } 323 266 324 267 static void test_insert_invalid(int family, int sotype, int mapfd) ··· 384 327 xclose(s); 385 328 } 386 329 387 - static void test_insert_listening(int family, int sotype, int mapfd) 330 + static void test_insert(int family, int sotype, int mapfd) 388 331 { 389 332 u64 value; 390 333 u32 key; 391 334 int s; 392 335 393 - s = listen_loopback(family, sotype); 336 + s = socket_loopback(family, sotype); 394 337 if (s < 0) 395 338 return; 396 339 ··· 406 349 u32 key; 407 350 int s; 408 351 409 - s = listen_loopback(family, sotype); 352 + s = socket_loopback(family, sotype); 410 353 if (s < 0) 411 354 return; 412 355 ··· 423 366 u64 value; 424 367 u32 key; 425 368 426 - s = listen_loopback(family, sotype); 369 + s = socket_loopback(family, sotype); 427 370 if (s < 0) 428 371 return; 429 372 ··· 447 390 u32 key; 448 391 int s; 449 392 450 - s = listen_loopback(family, sotype); 393 + s = socket_loopback(family, sotype); 451 394 if (s < 0) 452 395 return; 453 396 ··· 474 417 u64 value; 475 418 u32 key; 476 419 477 - s = listen_loopback(family, sotype); 420 + s = socket_loopback(family, sotype); 478 421 if (s < 0) 479 422 return; 480 423 ··· 496 439 u32 key, value32; 497 440 int err, s; 498 441 499 - s = listen_loopback(family, sotype); 442 + s = socket_loopback(family, sotype); 500 443 if (s < 0) 501 444 return; 502 445 ··· 521 464 xclose(s); 522 465 } 523 466 524 - static void test_update_listening(int family, int sotype, int mapfd) 467 + static void test_update_existing(int family, int sotype, int mapfd) 525 468 { 526 469 int s1, s2; 527 470 u64 value; 528 471 u32 key; 529 472 530 - s1 = listen_loopback(family, sotype); 473 + s1 = socket_loopback(family, sotype); 531 474 if (s1 < 0) 532 475 return; 533 476 534 - s2 = listen_loopback(family, sotype); 477 + s2 = socket_loopback(family, sotype); 535 478 if (s2 < 0) 536 479 goto close_s1; 537 480 ··· 557 500 u64 value; 558 501 u32 key; 559 502 560 - s = listen_loopback(family, sotype); 503 + s = socket_loopback(family, sotype); 561 504 if (s < 0) 562 505 return; 563 506 ··· 591 534 u64 value; 592 535 u32 key; 593 536 594 - s = listen_loopback(family, sotype); 537 + s = socket_loopback(family, sotype); 595 538 if (s < 0) 596 539 return; 597 540 ··· 627 570 socklen_t len; 628 571 u64 value; 629 572 630 - s = listen_loopback(family, sotype); 573 + s = socket_loopback(family, sotype | SOCK_NONBLOCK); 631 574 if (s == -1) 632 575 return; 633 576 ··· 655 598 if (err) 656 599 goto close_cli; 657 600 658 - p = xaccept(s, NULL, NULL); 601 + p = xaccept_nonblock(s, NULL, NULL); 659 602 if (p == -1) 660 603 goto close_cli; 661 604 ··· 681 624 socklen_t len; 682 625 u64 value; 683 626 684 - s = listen_loopback(family, sotype); 627 + s = socket_loopback(family, sotype | SOCK_NONBLOCK); 685 628 if (s == -1) 686 629 return; 687 630 ··· 704 647 if (err) 705 648 goto close_cli; 706 649 707 - p = xaccept(s, NULL, NULL); 650 + p = xaccept_nonblock(s, NULL, NULL); 708 651 if (p == -1) 709 652 goto close_cli; 710 653 ··· 768 711 break; 769 712 } 770 713 771 - p = xaccept(s, NULL, NULL); 714 + p = xaccept_nonblock(s, NULL, NULL); 772 715 if (p < 0) { 773 716 xclose(c); 774 717 break; ··· 792 735 int err, s; 793 736 u64 value; 794 737 795 - s = listen_loopback(family, sotype | SOCK_NONBLOCK); 738 + s = socket_loopback(family, sotype | SOCK_NONBLOCK); 796 739 if (s < 0) 797 740 return; 798 741 ··· 934 877 935 878 zero_verdict_count(verd_mapfd); 936 879 937 - s = listen_loopback(family, sotype | SOCK_NONBLOCK); 880 + s = socket_loopback(family, sotype | SOCK_NONBLOCK); 938 881 if (s < 0) 939 882 return; 940 883 ··· 950 893 if (err) 951 894 goto close_cli0; 952 895 953 - p0 = xaccept(s, NULL, NULL); 896 + p0 = xaccept_nonblock(s, NULL, NULL); 954 897 if (p0 < 0) 955 898 goto close_cli0; 956 899 ··· 961 904 if (err) 962 905 goto close_cli1; 963 906 964 - p1 = xaccept(s, NULL, NULL); 907 + p1 = xaccept_nonblock(s, NULL, NULL); 965 908 if (p1 < 0) 966 909 goto close_cli1; 967 910 ··· 1066 1009 1067 1010 zero_verdict_count(verd_mapfd); 1068 1011 1069 - s = listen_loopback(family, sotype | SOCK_NONBLOCK); 1012 + s = socket_loopback(family, sotype | SOCK_NONBLOCK); 1070 1013 if (s < 0) 1071 1014 return; 1072 1015 ··· 1082 1025 if (err) 1083 1026 goto close_cli; 1084 1027 1085 - p = xaccept(s, NULL, NULL); 1028 + p = xaccept_nonblock(s, NULL, NULL); 1086 1029 if (p < 0) 1087 1030 goto close_cli; 1088 1031 ··· 1170 1113 { 1171 1114 struct sockaddr_storage addr; 1172 1115 unsigned int pass; 1173 - int s, c, p, err; 1116 + int s, c, err; 1174 1117 socklen_t len; 1175 1118 u64 value; 1176 1119 u32 key; 1177 1120 1178 1121 zero_verdict_count(verd_map); 1179 1122 1180 - s = listen_loopback_reuseport(family, sotype, reuseport_prog); 1123 + s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK, 1124 + reuseport_prog); 1181 1125 if (s < 0) 1182 1126 return; 1183 1127 ··· 1200 1142 if (err) 1201 1143 goto close_cli; 1202 1144 1203 - p = xaccept(s, NULL, NULL); 1204 - if (p < 0) 1205 - goto close_cli; 1145 + if (sotype == SOCK_STREAM) { 1146 + int p; 1147 + 1148 + p = xaccept_nonblock(s, NULL, NULL); 1149 + if (p < 0) 1150 + goto close_cli; 1151 + xclose(p); 1152 + } else { 1153 + char b = 'a'; 1154 + ssize_t n; 1155 + 1156 + n = xsend(c, &b, sizeof(b), 0); 1157 + if (n == -1) 1158 + goto close_cli; 1159 + 1160 + n = xrecv_nonblock(s, &b, sizeof(b), 0); 1161 + if (n == -1) 1162 + goto close_cli; 1163 + } 1206 1164 1207 1165 key = SK_PASS; 1208 1166 err = xbpf_map_lookup_elem(verd_map, &key, &pass); 1209 1167 if (err) 1210 - goto close_peer; 1168 + goto close_cli; 1211 1169 if (pass != 1) 1212 1170 FAIL("want pass count 1, have %d", pass); 1213 1171 1214 - close_peer: 1215 - xclose(p); 1216 1172 close_cli: 1217 1173 xclose(c); 1218 1174 close_srv: ··· 1246 1174 1247 1175 zero_verdict_count(verd_map); 1248 1176 1249 - s = listen_loopback_reuseport(family, sotype, reuseport_prog); 1177 + s = socket_loopback_reuseport(family, sotype, reuseport_prog); 1250 1178 if (s < 0) 1251 1179 return; 1252 1180 ··· 1270 1198 if (err) 1271 1199 goto close_cli0; 1272 1200 1273 - p0 = xaccept(s, NULL, NULL); 1274 - if (err) 1275 - goto close_cli0; 1201 + if (sotype == SOCK_STREAM) { 1202 + p0 = xaccept_nonblock(s, NULL, NULL); 1203 + if (p0 < 0) 1204 + goto close_cli0; 1205 + } else { 1206 + p0 = xsocket(family, sotype, 0); 1207 + if (p0 < 0) 1208 + goto close_cli0; 1209 + 1210 + len = sizeof(addr); 1211 + err = xgetsockname(c0, sockaddr(&addr), &len); 1212 + if (err) 1213 + goto close_cli0; 1214 + 1215 + err = xconnect(p0, sockaddr(&addr), len); 1216 + if (err) 1217 + goto close_cli0; 1218 + } 1276 1219 1277 1220 /* Update sock_map[0] to redirect to a connected socket */ 1278 1221 key = 0; ··· 1300 1213 if (c1 < 0) 1301 1214 goto close_peer0; 1302 1215 1216 + len = sizeof(addr); 1217 + err = xgetsockname(s, sockaddr(&addr), &len); 1218 + if (err) 1219 + goto close_srv; 1220 + 1303 1221 errno = 0; 1304 1222 err = connect(c1, sockaddr(&addr), len); 1223 + if (sotype == SOCK_DGRAM) { 1224 + char b = 'a'; 1225 + ssize_t n; 1226 + 1227 + n = xsend(c1, &b, sizeof(b), 0); 1228 + if (n == -1) 1229 + goto close_cli1; 1230 + 1231 + n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1232 + err = n == -1; 1233 + } 1305 1234 if (!err || errno != ECONNREFUSED) 1306 1235 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1307 1236 ··· 1352 1249 zero_verdict_count(verd_map); 1353 1250 1354 1251 /* Create two listeners, each in its own reuseport group */ 1355 - s1 = listen_loopback_reuseport(family, sotype, reuseport_prog); 1252 + s1 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1356 1253 if (s1 < 0) 1357 1254 return; 1358 1255 1359 - s2 = listen_loopback_reuseport(family, sotype, reuseport_prog); 1256 + s2 = socket_loopback_reuseport(family, sotype, reuseport_prog); 1360 1257 if (s2 < 0) 1361 1258 goto close_srv1; 1362 1259 ··· 1381 1278 goto close_srv2; 1382 1279 1383 1280 err = connect(c, sockaddr(&addr), len); 1384 - if (err && errno != ECONNREFUSED) { 1281 + if (sotype == SOCK_DGRAM) { 1282 + char b = 'a'; 1283 + ssize_t n; 1284 + 1285 + n = xsend(c, &b, sizeof(b), 0); 1286 + if (n == -1) 1287 + goto close_cli; 1288 + 1289 + n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC); 1290 + err = n == -1; 1291 + } 1292 + if (!err || errno != ECONNREFUSED) { 1385 1293 FAIL_ERRNO("connect: expected ECONNREFUSED"); 1386 1294 goto close_cli; 1387 1295 } ··· 1413 1299 xclose(s1); 1414 1300 } 1415 1301 1416 - #define TEST(fn) \ 1302 + #define TEST(fn, ...) \ 1417 1303 { \ 1418 - fn, #fn \ 1304 + fn, #fn, __VA_ARGS__ \ 1419 1305 } 1420 1306 1421 1307 static void test_ops_cleanup(const struct bpf_map *map) ··· 1464 1350 } 1465 1351 } 1466 1352 1353 + static const char *sotype_str(int sotype) 1354 + { 1355 + switch (sotype) { 1356 + case SOCK_DGRAM: 1357 + return "UDP"; 1358 + case SOCK_STREAM: 1359 + return "TCP"; 1360 + default: 1361 + return "unknown"; 1362 + } 1363 + } 1364 + 1467 1365 static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map, 1468 1366 int family, int sotype) 1469 1367 { 1470 1368 const struct op_test { 1471 1369 void (*fn)(int family, int sotype, int mapfd); 1472 1370 const char *name; 1371 + int sotype; 1473 1372 } tests[] = { 1474 1373 /* insert */ 1475 1374 TEST(test_insert_invalid), 1476 1375 TEST(test_insert_opened), 1477 - TEST(test_insert_bound), 1478 - TEST(test_insert_listening), 1376 + TEST(test_insert_bound, SOCK_STREAM), 1377 + TEST(test_insert), 1479 1378 /* delete */ 1480 1379 TEST(test_delete_after_insert), 1481 1380 TEST(test_delete_after_close), ··· 1497 1370 TEST(test_lookup_after_delete), 1498 1371 TEST(test_lookup_32_bit_value), 1499 1372 /* update */ 1500 - TEST(test_update_listening), 1373 + TEST(test_update_existing), 1501 1374 /* races with insert/delete */ 1502 - TEST(test_destroy_orphan_child), 1503 - TEST(test_syn_recv_insert_delete), 1504 - TEST(test_race_insert_listen), 1375 + TEST(test_destroy_orphan_child, SOCK_STREAM), 1376 + TEST(test_syn_recv_insert_delete, SOCK_STREAM), 1377 + TEST(test_race_insert_listen, SOCK_STREAM), 1505 1378 /* child clone */ 1506 - TEST(test_clone_after_delete), 1507 - TEST(test_accept_after_delete), 1508 - TEST(test_accept_before_delete), 1379 + TEST(test_clone_after_delete, SOCK_STREAM), 1380 + TEST(test_accept_after_delete, SOCK_STREAM), 1381 + TEST(test_accept_before_delete, SOCK_STREAM), 1509 1382 }; 1510 - const char *family_name, *map_name; 1383 + const char *family_name, *map_name, *sotype_name; 1511 1384 const struct op_test *t; 1512 1385 char s[MAX_TEST_NAME]; 1513 1386 int map_fd; 1514 1387 1515 1388 family_name = family_str(family); 1516 1389 map_name = map_type_str(map); 1390 + sotype_name = sotype_str(sotype); 1517 1391 map_fd = bpf_map__fd(map); 1518 1392 1519 1393 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1520 - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, 1521 - t->name); 1394 + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1395 + sotype_name, t->name); 1396 + 1397 + if (t->sotype != 0 && t->sotype != sotype) 1398 + continue; 1522 1399 1523 1400 if (!test__start_subtest(s)) 1524 1401 continue; ··· 1555 1424 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1556 1425 snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, 1557 1426 t->name); 1427 + 1558 1428 if (!test__start_subtest(s)) 1559 1429 continue; 1560 1430 ··· 1570 1438 void (*fn)(int family, int sotype, int socket_map, 1571 1439 int verdict_map, int reuseport_prog); 1572 1440 const char *name; 1441 + int sotype; 1573 1442 } tests[] = { 1574 1443 TEST(test_reuseport_select_listening), 1575 1444 TEST(test_reuseport_select_connected), 1576 1445 TEST(test_reuseport_mixed_groups), 1577 1446 }; 1578 1447 int socket_map, verdict_map, reuseport_prog; 1579 - const char *family_name, *map_name; 1448 + const char *family_name, *map_name, *sotype_name; 1580 1449 const struct reuseport_test *t; 1581 1450 char s[MAX_TEST_NAME]; 1582 1451 1583 1452 family_name = family_str(family); 1584 1453 map_name = map_type_str(map); 1454 + sotype_name = sotype_str(sotype); 1585 1455 1586 1456 socket_map = bpf_map__fd(map); 1587 1457 verdict_map = bpf_map__fd(skel->maps.verdict_map); 1588 1458 reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport); 1589 1459 1590 1460 for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { 1591 - snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, 1592 - t->name); 1461 + snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name, 1462 + sotype_name, t->name); 1463 + 1464 + if (t->sotype != 0 && t->sotype != sotype) 1465 + continue; 1593 1466 1594 1467 if (!test__start_subtest(s)) 1595 1468 continue; ··· 1607 1470 int family) 1608 1471 { 1609 1472 test_ops(skel, map, family, SOCK_STREAM); 1473 + test_ops(skel, map, family, SOCK_DGRAM); 1610 1474 test_redir(skel, map, family, SOCK_STREAM); 1611 1475 test_reuseport(skel, map, family, SOCK_STREAM); 1476 + test_reuseport(skel, map, family, SOCK_DGRAM); 1612 1477 } 1613 1478 1614 1479 void test_sockmap_listen(void)

+20 -12

tools/testing/selftests/bpf/prog_tests/tcp_rtt.c

··· 188 188 }; 189 189 int fd; 190 190 191 - fd = socket(AF_INET, SOCK_STREAM, 0); 191 + fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); 192 192 if (fd < 0) { 193 193 log_err("Failed to create server socket"); 194 194 return -1; ··· 205 205 206 206 static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER; 207 207 static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER; 208 + static volatile bool server_done = false; 208 209 209 210 static void *server_thread(void *arg) 210 211 { ··· 223 222 224 223 if (CHECK_FAIL(err < 0)) { 225 224 perror("Failed to listed on socket"); 226 - return NULL; 225 + return ERR_PTR(err); 227 226 } 228 227 229 - client_fd = accept(fd, (struct sockaddr *)&addr, &len); 228 + while (!server_done) { 229 + client_fd = accept(fd, (struct sockaddr *)&addr, &len); 230 + if (client_fd == -1 && errno == EAGAIN) { 231 + usleep(50); 232 + continue; 233 + } 234 + break; 235 + } 230 236 if (CHECK_FAIL(client_fd < 0)) { 231 237 perror("Failed to accept client"); 232 - return NULL; 238 + return ERR_PTR(err); 233 239 } 234 240 235 - /* Wait for the next connection (that never arrives) 236 - * to keep this thread alive to prevent calling 237 - * close() on client_fd. 238 - */ 239 - if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) { 240 - perror("Unexpected success in second accept"); 241 - return NULL; 242 - } 241 + while (!server_done) 242 + usleep(50); 243 243 244 244 close(client_fd); 245 245 ··· 251 249 { 252 250 int server_fd, cgroup_fd; 253 251 pthread_t tid; 252 + void *server_res; 254 253 255 254 cgroup_fd = test__join_cgroup("/tcp_rtt"); 256 255 if (CHECK_FAIL(cgroup_fd < 0)) ··· 270 267 pthread_mutex_unlock(&server_started_mtx); 271 268 272 269 CHECK_FAIL(run_test(cgroup_fd, server_fd)); 270 + 271 + server_done = true; 272 + pthread_join(tid, &server_res); 273 + CHECK_FAIL(IS_ERR(server_res)); 274 + 273 275 close_server_fd: 274 276 close(server_fd); 275 277 close_cgroup_fd:

+43

tools/testing/selftests/bpf/prog_tests/vmlinux.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + 4 + #include <test_progs.h> 5 + #include <time.h> 6 + #include "test_vmlinux.skel.h" 7 + 8 + #define MY_TV_NSEC 1337 9 + 10 + static void nsleep() 11 + { 12 + struct timespec ts = { .tv_nsec = MY_TV_NSEC }; 13 + 14 + (void)nanosleep(&ts, NULL); 15 + } 16 + 17 + void test_vmlinux(void) 18 + { 19 + int duration = 0, err; 20 + struct test_vmlinux* skel; 21 + struct test_vmlinux__bss *bss; 22 + 23 + skel = test_vmlinux__open_and_load(); 24 + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) 25 + return; 26 + bss = skel->bss; 27 + 28 + err = test_vmlinux__attach(skel); 29 + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) 30 + goto cleanup; 31 + 32 + /* trigger everything */ 33 + nsleep(); 34 + 35 + CHECK(!bss->tp_called, "tp", "not called\n"); 36 + CHECK(!bss->raw_tp_called, "raw_tp", "not called\n"); 37 + CHECK(!bss->tp_btf_called, "tp_btf", "not called\n"); 38 + CHECK(!bss->kprobe_called, "kprobe", "not called\n"); 39 + CHECK(!bss->fentry_called, "fentry", "not called\n"); 40 + 41 + cleanup: 42 + test_vmlinux__destroy(skel); 43 + }

+53

tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c

··· 4 4 #include "test_xdp.skel.h" 5 5 #include "test_xdp_bpf2bpf.skel.h" 6 6 7 + struct meta { 8 + int ifindex; 9 + int pkt_len; 10 + }; 11 + 12 + static void on_sample(void *ctx, int cpu, void *data, __u32 size) 13 + { 14 + int duration = 0; 15 + struct meta *meta = (struct meta *)data; 16 + struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); 17 + 18 + if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), 19 + "check_size", "size %u < %zu\n", 20 + size, sizeof(pkt_v4) + sizeof(*meta))) 21 + return; 22 + 23 + if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", 24 + "meta->ifindex = %d\n", meta->ifindex)) 25 + return; 26 + 27 + if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", 28 + "meta->pkt_len = %zd\n", sizeof(pkt_v4))) 29 + return; 30 + 31 + if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 32 + "check_packet_content", "content not the same\n")) 33 + return; 34 + 35 + *(bool *)ctx = true; 36 + } 37 + 7 38 void test_xdp_bpf2bpf(void) 8 39 { 9 40 __u32 duration = 0, retval, size; 10 41 char buf[128]; 11 42 int err, pkt_fd, map_fd; 43 + bool passed = false; 12 44 struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); 13 45 struct iptnl_info value4 = {.family = AF_INET}; 14 46 struct test_xdp *pkt_skel = NULL; 15 47 struct test_xdp_bpf2bpf *ftrace_skel = NULL; 16 48 struct vip key4 = {.protocol = 6, .family = AF_INET}; 17 49 struct bpf_program *prog; 50 + struct perf_buffer *pb = NULL; 51 + struct perf_buffer_opts pb_opts = {}; 18 52 19 53 /* Load XDP program to introspect */ 20 54 pkt_skel = test_xdp__open_and_load(); ··· 84 50 if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) 85 51 goto out; 86 52 53 + /* Set up perf buffer */ 54 + pb_opts.sample_cb = on_sample; 55 + pb_opts.ctx = &passed; 56 + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 57 + 1, &pb_opts); 58 + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) 59 + goto out; 60 + 87 61 /* Run test program */ 88 62 err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), 89 63 buf, &size, &retval, &duration); ··· 101 59 "err %d errno %d retval %d size %d\n", 102 60 err, errno, retval, size)) 103 61 goto out; 62 + 63 + /* Make sure bpf_xdp_output() was triggered and it sent the expected 64 + * data to the perf ring buffer. 65 + */ 66 + err = perf_buffer__poll(pb, 100); 67 + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) 68 + goto out; 69 + 70 + CHECK_FAIL(!passed); 104 71 105 72 /* Verify test results */ 106 73 if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), ··· 121 70 "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); 122 71 123 72 out: 73 + if (pb) 74 + perf_buffer__free(pb); 124 75 test_xdp__destroy(pkt_skel); 125 76 test_xdp_bpf2bpf__destroy(ftrace_skel); 126 77 }

+1 -1

tools/testing/selftests/bpf/progs/bpf_dctcp.c

··· 9 9 #include <linux/bpf.h> 10 10 #include <linux/types.h> 11 11 #include <bpf/bpf_helpers.h> 12 - #include "bpf_trace_helpers.h" 12 + #include <bpf/bpf_tracing.h> 13 13 #include "bpf_tcp_helpers.h" 14 14 15 15 char _license[] SEC("license") = "GPL";

+1 -1

tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c

··· 13 13 14 14 enum e2 { 15 15 C = 100, 16 - D = -100, 16 + D = 4294967295, 17 17 E = 0, 18 18 }; 19 19

+1 -1

tools/testing/selftests/bpf/progs/fentry_test.c

··· 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <linux/bpf.h> 4 4 #include <bpf/bpf_helpers.h> 5 - #include "bpf_trace_helpers.h" 5 + #include <bpf/bpf_tracing.h> 6 6 7 7 char _license[] SEC("license") = "GPL"; 8 8

+1 -1

tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c

··· 5 5 #include <linux/bpf.h> 6 6 #include <bpf/bpf_helpers.h> 7 7 #include <bpf/bpf_endian.h> 8 - #include "bpf_trace_helpers.h" 8 + #include <bpf/bpf_tracing.h> 9 9 10 10 struct sk_buff { 11 11 unsigned int len;

+1 -1

tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c

··· 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <linux/bpf.h> 4 4 #include <bpf/bpf_helpers.h> 5 - #include "bpf_trace_helpers.h" 5 + #include <bpf/bpf_tracing.h> 6 6 7 7 struct sk_buff { 8 8 unsigned int len;

+1 -1

tools/testing/selftests/bpf/progs/fexit_test.c

··· 2 2 /* Copyright (c) 2019 Facebook */ 3 3 #include <linux/bpf.h> 4 4 #include <bpf/bpf_helpers.h> 5 - #include "bpf_trace_helpers.h" 5 + #include <bpf/bpf_tracing.h> 6 6 7 7 char _license[] SEC("license") = "GPL"; 8 8

+1 -1

tools/testing/selftests/bpf/progs/kfree_skb.c

··· 4 4 #include <stdbool.h> 5 5 #include <bpf/bpf_helpers.h> 6 6 #include <bpf/bpf_endian.h> 7 - #include "bpf_trace_helpers.h" 7 + #include <bpf/bpf_tracing.h> 8 8 9 9 char _license[] SEC("license") = "GPL"; 10 10 struct {

+49

tools/testing/selftests/bpf/progs/modify_return.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Copyright 2020 Google LLC. 5 + */ 6 + 7 + #include <linux/bpf.h> 8 + #include <bpf/bpf_helpers.h> 9 + #include <bpf/bpf_tracing.h> 10 + 11 + char _license[] SEC("license") = "GPL"; 12 + 13 + static int sequence = 0; 14 + __s32 input_retval = 0; 15 + 16 + __u64 fentry_result = 0; 17 + SEC("fentry/bpf_modify_return_test") 18 + int BPF_PROG(fentry_test, int a, __u64 b) 19 + { 20 + sequence++; 21 + fentry_result = (sequence == 1); 22 + return 0; 23 + } 24 + 25 + __u64 fmod_ret_result = 0; 26 + SEC("fmod_ret/bpf_modify_return_test") 27 + int BPF_PROG(fmod_ret_test, int a, int *b, int ret) 28 + { 29 + sequence++; 30 + /* This is the first fmod_ret program, the ret passed should be 0 */ 31 + fmod_ret_result = (sequence == 2 && ret == 0); 32 + return input_retval; 33 + } 34 + 35 + __u64 fexit_result = 0; 36 + SEC("fexit/bpf_modify_return_test") 37 + int BPF_PROG(fexit_test, int a, __u64 b, int ret) 38 + { 39 + sequence++; 40 + /* If the input_reval is non-zero a successful modification should have 41 + * occurred. 42 + */ 43 + if (input_retval) 44 + fexit_result = (sequence == 3 && ret == input_retval); 45 + else 46 + fexit_result = (sequence == 3 && ret == 4); 47 + 48 + return 0; 49 + }

+2 -1

tools/testing/selftests/bpf/progs/test_attach_probe.c

··· 4 4 #include <linux/ptrace.h> 5 5 #include <linux/bpf.h> 6 6 #include <bpf/bpf_helpers.h> 7 + #include <bpf/bpf_tracing.h> 7 8 8 9 int kprobe_res = 0; 9 10 int kretprobe_res = 0; ··· 19 18 } 20 19 21 20 SEC("kretprobe/sys_nanosleep") 22 - int handle_kretprobe(struct pt_regs *ctx) 21 + int BPF_KRETPROBE(handle_kretprobe) 23 22 { 24 23 kretprobe_res = 2; 25 24 return 0;

+25

tools/testing/selftests/bpf/progs/test_link_pinning.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + 4 + #include <stdbool.h> 5 + #include <linux/bpf.h> 6 + #include <bpf/bpf_helpers.h> 7 + 8 + int in = 0; 9 + int out = 0; 10 + 11 + SEC("raw_tp/sys_enter") 12 + int raw_tp_prog(const void *ctx) 13 + { 14 + out = in; 15 + return 0; 16 + } 17 + 18 + SEC("tp_btf/sys_enter") 19 + int tp_btf_prog(const void *ctx) 20 + { 21 + out = in; 22 + return 0; 23 + } 24 + 25 + char _license[] SEC("license") = "GPL";

+37

tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */ 3 + 4 + #include <linux/bpf.h> 5 + #include <stdint.h> 6 + #include <bpf/bpf_helpers.h> 7 + 8 + static volatile struct { 9 + __u64 dev; 10 + __u64 ino; 11 + __u64 pid_tgid; 12 + __u64 user_pid_tgid; 13 + } res; 14 + 15 + SEC("raw_tracepoint/sys_enter") 16 + int trace(void *ctx) 17 + { 18 + __u64 ns_pid_tgid, expected_pid; 19 + struct bpf_pidns_info nsdata; 20 + __u32 key = 0; 21 + 22 + if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata, 23 + sizeof(struct bpf_pidns_info))) 24 + return 0; 25 + 26 + ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid; 27 + expected_pid = res.user_pid_tgid; 28 + 29 + if (expected_pid != ns_pid_tgid) 30 + return 0; 31 + 32 + res.pid_tgid = ns_pid_tgid; 33 + 34 + return 0; 35 + } 36 + 37 + char _license[] SEC("license") = "GPL";

+2 -5

tools/testing/selftests/bpf/progs/test_overhead.c

··· 6 6 #include <linux/ptrace.h> 7 7 #include <bpf/bpf_helpers.h> 8 8 #include <bpf/bpf_tracing.h> 9 - #include "bpf_trace_helpers.h" 10 9 11 10 struct task_struct; 12 11 ··· 16 17 } 17 18 18 19 SEC("kretprobe/__set_task_comm") 19 - int BPF_KRETPROBE(prog2, 20 - struct task_struct *tsk, const char *buf, bool exec, 21 - int ret) 20 + int BPF_KRETPROBE(prog2, int ret) 22 21 { 23 - return !PT_REGS_PARM1(ctx) && ret; 22 + return ret; 24 23 } 25 24 26 25 SEC("raw_tp/task_rename")

+1 -1

tools/testing/selftests/bpf/progs/test_perf_branches.c

··· 5 5 #include <linux/ptrace.h> 6 6 #include <linux/bpf.h> 7 7 #include <bpf/bpf_helpers.h> 8 - #include "bpf_trace_helpers.h" 8 + #include <bpf/bpf_tracing.h> 9 9 10 10 int valid = 0; 11 11 int required_size_out = 0;

+1 -1

tools/testing/selftests/bpf/progs/test_perf_buffer.c

··· 4 4 #include <linux/ptrace.h> 5 5 #include <linux/bpf.h> 6 6 #include <bpf/bpf_helpers.h> 7 - #include "bpf_trace_helpers.h" 7 + #include <bpf/bpf_tracing.h> 8 8 9 9 struct { 10 10 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);

-1

tools/testing/selftests/bpf/progs/test_probe_user.c

··· 7 7 8 8 #include <bpf/bpf_helpers.h> 9 9 #include <bpf/bpf_tracing.h> 10 - #include "bpf_trace_helpers.h" 11 10 12 11 static struct sockaddr_in old; 13 12

+2

tools/testing/selftests/bpf/progs/test_skb_ctx.c

··· 23 23 return 1; 24 24 if (skb->gso_segs != 8) 25 25 return 1; 26 + if (skb->gso_size != 10) 27 + return 1; 26 28 27 29 return 0; 28 30 }

+2 -1

tools/testing/selftests/bpf/progs/test_trampoline_count.c

··· 2 2 #include <stdbool.h> 3 3 #include <stddef.h> 4 4 #include <linux/bpf.h> 5 - #include "bpf_trace_helpers.h" 5 + #include <bpf/bpf_helpers.h> 6 + #include <bpf/bpf_tracing.h> 6 7 7 8 struct task_struct; 8 9

+84

tools/testing/selftests/bpf/progs/test_vmlinux.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Facebook */ 3 + 4 + #include "vmlinux.h" 5 + #include <asm/unistd.h> 6 + #include <bpf/bpf_helpers.h> 7 + #include <bpf/bpf_tracing.h> 8 + #include <bpf/bpf_core_read.h> 9 + 10 + #define MY_TV_NSEC 1337 11 + 12 + bool tp_called = false; 13 + bool raw_tp_called = false; 14 + bool tp_btf_called = false; 15 + bool kprobe_called = false; 16 + bool fentry_called = false; 17 + 18 + SEC("tp/syscalls/sys_enter_nanosleep") 19 + int handle__tp(struct trace_event_raw_sys_enter *args) 20 + { 21 + struct __kernel_timespec *ts; 22 + 23 + if (args->id != __NR_nanosleep) 24 + return 0; 25 + 26 + ts = (void *)args->args[0]; 27 + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) 28 + return 0; 29 + 30 + tp_called = true; 31 + return 0; 32 + } 33 + 34 + SEC("raw_tp/sys_enter") 35 + int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) 36 + { 37 + struct __kernel_timespec *ts; 38 + 39 + if (id != __NR_nanosleep) 40 + return 0; 41 + 42 + ts = (void *)PT_REGS_PARM1_CORE(regs); 43 + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) 44 + return 0; 45 + 46 + raw_tp_called = true; 47 + return 0; 48 + } 49 + 50 + SEC("tp_btf/sys_enter") 51 + int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) 52 + { 53 + struct __kernel_timespec *ts; 54 + 55 + if (id != __NR_nanosleep) 56 + return 0; 57 + 58 + ts = (void *)PT_REGS_PARM1_CORE(regs); 59 + if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) 60 + return 0; 61 + 62 + tp_btf_called = true; 63 + return 0; 64 + } 65 + 66 + SEC("kprobe/hrtimer_nanosleep") 67 + int BPF_KPROBE(handle__kprobe, 68 + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) 69 + { 70 + if (rqtp == MY_TV_NSEC) 71 + kprobe_called = true; 72 + return 0; 73 + } 74 + 75 + SEC("fentry/hrtimer_nanosleep") 76 + int BPF_PROG(handle__fentry, 77 + ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) 78 + { 79 + if (rqtp == MY_TV_NSEC) 80 + fentry_called = true; 81 + return 0; 82 + } 83 + 84 + char _license[] SEC("license") = "GPL";

+25 -1

tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <linux/bpf.h> 3 + #include <bpf/bpf_tracing.h> 3 4 #include <bpf/bpf_helpers.h> 4 - #include "bpf_trace_helpers.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 5 7 6 8 struct net_device { 7 9 /* Structure does not need to contain all entries, ··· 29 27 struct xdp_rxq_info *rxq; 30 28 } __attribute__((preserve_access_index)); 31 29 30 + struct meta { 31 + int ifindex; 32 + int pkt_len; 33 + }; 34 + 35 + struct { 36 + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); 37 + __uint(key_size, sizeof(int)); 38 + __uint(value_size, sizeof(int)); 39 + } perf_buf_map SEC(".maps"); 40 + 32 41 __u64 test_result_fentry = 0; 33 42 SEC("fentry/FUNC") 34 43 int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) 35 44 { 45 + struct meta meta; 46 + void *data_end = (void *)(long)xdp->data_end; 47 + void *data = (void *)(long)xdp->data; 48 + 49 + meta.ifindex = xdp->rxq->dev->ifindex; 50 + meta.pkt_len = data_end - data; 51 + bpf_xdp_output(xdp, &perf_buf_map, 52 + ((__u64) meta.pkt_len << 32) | 53 + BPF_F_CURRENT_CPU, 54 + &meta, sizeof(meta)); 55 + 36 56 test_result_fentry = xdp->rxq->dev->ifindex; 37 57 return 0; 38 58 }

+159

tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */ 3 + #define _GNU_SOURCE 4 + #include <sys/stat.h> 5 + #include <sys/types.h> 6 + #include <unistd.h> 7 + #include <sys/syscall.h> 8 + #include <sched.h> 9 + #include <sys/wait.h> 10 + #include <sys/mount.h> 11 + #include "test_progs.h" 12 + 13 + #define CHECK_NEWNS(condition, tag, format...) ({ \ 14 + int __ret = !!(condition); \ 15 + if (__ret) { \ 16 + printf("%s:FAIL:%s ", __func__, tag); \ 17 + printf(format); \ 18 + } else { \ 19 + printf("%s:PASS:%s\n", __func__, tag); \ 20 + } \ 21 + __ret; \ 22 + }) 23 + 24 + struct bss { 25 + __u64 dev; 26 + __u64 ino; 27 + __u64 pid_tgid; 28 + __u64 user_pid_tgid; 29 + }; 30 + 31 + int main(int argc, char **argv) 32 + { 33 + pid_t pid; 34 + int exit_code = 1; 35 + struct stat st; 36 + 37 + printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n"); 38 + 39 + if (stat("/proc/self/ns/pid", &st)) { 40 + perror("stat failed on /proc/self/ns/pid ns\n"); 41 + printf("%s:FAILED\n", argv[0]); 42 + return exit_code; 43 + } 44 + 45 + if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS), 46 + "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno)) 47 + return exit_code; 48 + 49 + pid = fork(); 50 + if (pid == -1) { 51 + perror("Fork() failed\n"); 52 + printf("%s:FAILED\n", argv[0]); 53 + return exit_code; 54 + } 55 + 56 + if (pid > 0) { 57 + int status; 58 + 59 + usleep(5); 60 + waitpid(pid, &status, 0); 61 + return 0; 62 + } else { 63 + 64 + pid = fork(); 65 + if (pid == -1) { 66 + perror("Fork() failed\n"); 67 + printf("%s:FAILED\n", argv[0]); 68 + return exit_code; 69 + } 70 + 71 + if (pid > 0) { 72 + int status; 73 + waitpid(pid, &status, 0); 74 + return 0; 75 + } else { 76 + if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL), 77 + "Unmounting proc", "Cannot umount proc! errno=%d\n", errno)) 78 + return exit_code; 79 + 80 + if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL), 81 + "Mounting proc", "Cannot mount proc! errno=%d\n", errno)) 82 + return exit_code; 83 + 84 + const char *probe_name = "raw_tracepoint/sys_enter"; 85 + const char *file = "test_ns_current_pid_tgid.o"; 86 + struct bpf_link *link = NULL; 87 + struct bpf_program *prog; 88 + struct bpf_map *bss_map; 89 + struct bpf_object *obj; 90 + int exit_code = 1; 91 + int err, key = 0; 92 + struct bss bss; 93 + struct stat st; 94 + __u64 id; 95 + 96 + obj = bpf_object__open_file(file, NULL); 97 + if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) 98 + return exit_code; 99 + 100 + err = bpf_object__load(obj); 101 + if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno)) 102 + goto cleanup; 103 + 104 + bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss"); 105 + if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n")) 106 + goto cleanup; 107 + 108 + prog = bpf_object__find_program_by_title(obj, probe_name); 109 + if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n", 110 + probe_name)) 111 + goto cleanup; 112 + 113 + memset(&bss, 0, sizeof(bss)); 114 + pid_t tid = syscall(SYS_gettid); 115 + pid_t pid = getpid(); 116 + 117 + id = (__u64) tid << 32 | pid; 118 + bss.user_pid_tgid = id; 119 + 120 + if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st), 121 + "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno)) 122 + goto cleanup; 123 + 124 + bss.dev = st.st_dev; 125 + bss.ino = st.st_ino; 126 + 127 + err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0); 128 + if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err)) 129 + goto cleanup; 130 + 131 + link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); 132 + if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n", 133 + PTR_ERR(link))) { 134 + link = NULL; 135 + goto cleanup; 136 + } 137 + 138 + /* trigger some syscalls */ 139 + usleep(1); 140 + 141 + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss); 142 + if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err)) 143 + goto cleanup; 144 + 145 + if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid", 146 + "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid)) 147 + goto cleanup; 148 + 149 + exit_code = 0; 150 + printf("%s:PASS\n", argv[0]); 151 + cleanup: 152 + if (!link) { 153 + bpf_link__destroy(link); 154 + link = NULL; 155 + } 156 + bpf_object__close(obj); 157 + } 158 + } 159 + }

+23 -5

tools/testing/selftests/bpf/test_progs.c

··· 29 29 int old_error_cnt; 30 30 }; 31 31 32 + /* Override C runtime library's usleep() implementation to ensure nanosleep() 33 + * is always called. Usleep is frequently used in selftests as a way to 34 + * trigger kprobe and tracepoints. 35 + */ 36 + int usleep(useconds_t usec) 37 + { 38 + struct timespec ts; 39 + 40 + if (usec > 999999) { 41 + ts.tv_sec = usec / 1000000; 42 + ts.tv_nsec = usec % 1000000; 43 + } else { 44 + ts.tv_sec = 0; 45 + ts.tv_nsec = usec; 46 + } 47 + return nanosleep(&ts, NULL); 48 + } 49 + 32 50 static bool should_run(struct test_selector *sel, int num, const char *name) 33 51 { 34 52 int i; ··· 216 198 217 199 map = bpf_object__find_map_by_name(obj, name); 218 200 if (!map) { 219 - printf("%s:FAIL:map '%s' not found\n", test, name); 201 + fprintf(stdout, "%s:FAIL:map '%s' not found\n", test, name); 220 202 test__fail(); 221 203 return -1; 222 204 } ··· 387 369 { 388 370 if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) 389 371 return 0; 390 - vprintf(format, args); 372 + vfprintf(stdout, format, args); 391 373 return 0; 392 374 } 393 375 ··· 633 615 if (!flavor) 634 616 return 0; 635 617 flavor++; 636 - printf("Switching to flavor '%s' subdirectory...\n", flavor); 618 + fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); 637 619 return chdir(flavor); 638 620 } 639 621 ··· 716 698 cleanup_cgroup_environment(); 717 699 } 718 700 stdio_restore(); 719 - printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", 720 - env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); 701 + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", 702 + env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); 721 703 722 704 free(env.test_selector.blacklist.strs); 723 705 free(env.test_selector.whitelist.strs);

+4 -4

tools/testing/selftests/bpf/test_progs.h

··· 109 109 int __save_errno = errno; \ 110 110 if (__ret) { \ 111 111 test__fail(); \ 112 - printf("%s:FAIL:%s ", __func__, tag); \ 113 - printf(format); \ 112 + fprintf(stdout, "%s:FAIL:%s ", __func__, tag); \ 113 + fprintf(stdout, ##format); \ 114 114 } else { \ 115 - printf("%s:PASS:%s %d nsec\n", \ 115 + fprintf(stdout, "%s:PASS:%s %d nsec\n", \ 116 116 __func__, tag, duration); \ 117 117 } \ 118 118 errno = __save_errno; \ ··· 124 124 int __save_errno = errno; \ 125 125 if (__ret) { \ 126 126 test__fail(); \ 127 - printf("%s:FAIL:%d\n", __func__, __LINE__); \ 127 + fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \ 128 128 } \ 129 129 errno = __save_errno; \ 130 130 __ret; \

+47

tools/testing/selftests/bpf/verifier/ctx_skb.c

··· 1011 1011 .prog_type = BPF_PROG_TYPE_SCHED_CLS, 1012 1012 }, 1013 1013 { 1014 + "read gso_size from CGROUP_SKB", 1015 + .insns = { 1016 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1017 + offsetof(struct __sk_buff, gso_size)), 1018 + BPF_MOV64_IMM(BPF_REG_0, 0), 1019 + BPF_EXIT_INSN(), 1020 + }, 1021 + .result = ACCEPT, 1022 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 1023 + }, 1024 + { 1025 + "read gso_size from CGROUP_SKB", 1026 + .insns = { 1027 + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 1028 + offsetof(struct __sk_buff, gso_size)), 1029 + BPF_MOV64_IMM(BPF_REG_0, 0), 1030 + BPF_EXIT_INSN(), 1031 + }, 1032 + .result = ACCEPT, 1033 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 1034 + }, 1035 + { 1036 + "write gso_size from CGROUP_SKB", 1037 + .insns = { 1038 + BPF_MOV64_IMM(BPF_REG_0, 0), 1039 + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 1040 + offsetof(struct __sk_buff, gso_size)), 1041 + BPF_MOV64_IMM(BPF_REG_0, 0), 1042 + BPF_EXIT_INSN(), 1043 + }, 1044 + .result = REJECT, 1045 + .result_unpriv = REJECT, 1046 + .errstr = "invalid bpf_context access off=176 size=4", 1047 + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, 1048 + }, 1049 + { 1050 + "read gso_size from CLS", 1051 + .insns = { 1052 + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1053 + offsetof(struct __sk_buff, gso_size)), 1054 + BPF_MOV64_IMM(BPF_REG_0, 0), 1055 + BPF_EXIT_INSN(), 1056 + }, 1057 + .result = ACCEPT, 1058 + .prog_type = BPF_PROG_TYPE_SCHED_CLS, 1059 + }, 1060 + { 1014 1061 "check wire_len is not readable by sockets", 1015 1062 .insns = { 1016 1063 BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,